From 44502fcc3e45f23a8a6b7627ffaba0538f0197cd8876125cd3b65f7b636ea9a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ismail=20D=C3=B6nmez?= <ismail@i10z.com>
Date: Tue, 10 Dec 2013 19:38:49 +0000
Subject: [PATCH] Accepting request 210407 from
 home:sumski:branches:multimedia:libs

Update to 1.3.0

OBS-URL: https://build.opensuse.org/request/show/210407
OBS-URL: https://build.opensuse.org/package/show/multimedia:libs/libvpx?expand=0&rev=47
---
 libvpx-1.3.0.tar.xz               |     3 +
 libvpx-armv7-use-hard-float.patch |    20 +-
 libvpx-configure-add-s390.patch   |    22 +-
 libvpx-v1.1.0.tar.bz2             |     3 -
 libvpx.changes                    |    48 +
 libvpx.spec                       |    14 +-
 version_1.1.0_to_1.2.0.diff       | 45439 ----------------------------
 7 files changed, 78 insertions(+), 45471 deletions(-)
 create mode 100644 libvpx-1.3.0.tar.xz
 delete mode 100644 libvpx-v1.1.0.tar.bz2
 delete mode 100644 version_1.1.0_to_1.2.0.diff

diff --git a/libvpx-1.3.0.tar.xz b/libvpx-1.3.0.tar.xz
new file mode 100644
index 0000000..969a450
--- /dev/null
+++ b/libvpx-1.3.0.tar.xz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4087c5195c35ff1de6d9449919c35207822c1b5dc78a6684d88d64f93ec7c6a
+size 1752356
diff --git a/libvpx-armv7-use-hard-float.patch b/libvpx-armv7-use-hard-float.patch
index 8fb72a5..da417e1 100644
--- a/libvpx-armv7-use-hard-float.patch
+++ b/libvpx-armv7-use-hard-float.patch
@@ -1,13 +1,13 @@
-Index: build/make/configure.sh
-===================================================================
---- build/make/configure.sh.orig
-+++ build/make/configure.sh
-@@ -789,8 +789,8 @@ process_common_toolchain() {
-             check_add_asflags --defsym ARCHITECTURE=${arch_int}
-             tune_cflags="-mtune="
-             if [ ${tgt_isa} == "armv7" ]; then
--                check_add_cflags  -march=armv7-a -mfloat-abi=softfp
--                check_add_asflags -march=armv7-a -mfloat-abi=softfp
+diff --git a/build/make/configure.sh b/build/make/configure.sh
+index cf78764..3213d97 100755
+--- a/build/make/configure.sh
++++ b/build/make/configure.sh
+@@ -820,8 +820,8 @@ process_common_toolchain() {
+ #endif
+ EOF
+                 fi
+-                check_add_cflags  -march=armv7-a -mfloat-abi=${float_abi}
+-                check_add_asflags -march=armv7-a -mfloat-abi=${float_abi}
 +                check_add_cflags  -march=armv7-a -mfloat-abi=hard
 +                check_add_asflags -march=armv7-a -mfloat-abi=hard
  
diff --git a/libvpx-configure-add-s390.patch b/libvpx-configure-add-s390.patch
index ca07651..3ec702e 100644
--- a/libvpx-configure-add-s390.patch
+++ b/libvpx-configure-add-s390.patch
@@ -1,8 +1,8 @@
-Index: build/make/configure.sh
-===================================================================
---- build/make/configure.sh.orig
-+++ build/make/configure.sh
-@@ -614,6 +614,12 @@ process_common_toolchain() {
+diff --git a/build/make/configure.sh b/build/make/configure.sh
+index 8dcb9bb..cf78764 100755
+--- a/build/make/configure.sh
++++ b/build/make/configure.sh
+@@ -627,6 +627,12 @@ process_common_toolchain() {
              *powerpc*)
                  tgt_isa=ppc32
                  ;;
@@ -15,16 +15,16 @@ Index: build/make/configure.sh
              *sparc*)
                  tgt_isa=sparc
                  ;;
-Index: configure
-===================================================================
---- configure.orig
-+++ configure
-@@ -104,6 +104,8 @@ all_platforms="${all_platforms} ppc32-li
+diff --git a/configure b/configure
+index 4ff3fc7..c590057 100755
+--- a/configure
++++ b/configure
+@@ -108,6 +108,8 @@ all_platforms="${all_platforms} ppc32-linux-gcc"
  all_platforms="${all_platforms} ppc64-darwin8-gcc"
  all_platforms="${all_platforms} ppc64-darwin9-gcc"
  all_platforms="${all_platforms} ppc64-linux-gcc"
 +all_platforms="${all_platforms} s390-linux-gcc"
 +all_platforms="${all_platforms} s390x-linux-gcc"
  all_platforms="${all_platforms} sparc-solaris-gcc"
+ all_platforms="${all_platforms} x86-android-gcc"
  all_platforms="${all_platforms} x86-darwin8-gcc"
- all_platforms="${all_platforms} x86-darwin8-icc"
diff --git a/libvpx-v1.1.0.tar.bz2 b/libvpx-v1.1.0.tar.bz2
deleted file mode 100644
index 42c2e70..0000000
--- a/libvpx-v1.1.0.tar.bz2
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9ce074cf4b3bcd9a49ff93e05485b71c273bfc3685a305e55a0e7fa51beb72c5
-size 1653485
diff --git a/libvpx.changes b/libvpx.changes
index 51d3aee..f14d91d 100644
--- a/libvpx.changes
+++ b/libvpx.changes
@@ -1,3 +1,51 @@
+-------------------------------------------------------------------
+Tue Dec 10 16:26:06 UTC 2013 - hrvoje.senjan@gmail.com
+
+- Update to version 1.3.0
+  + This release is ABI and API compatible with Duclair (v1.0.0). 
+  + This release introduces the VP9 codec in a backward-compatible 
+    way. All existing users of VP8 can continue to use the library 
+    without modification. However, some VP8 options do not map to 
+    VP9 in the same manner.
+  + Enhancements:
+    - Get rid of bashisms in the main build scripts
+    - Added usage info on command line options
+    - Add lossless compression mode
+    - Dll build of libvpx
+    - Add additional Mac OS X targets: 10.7, 10.8 and 10.9
+    - Add option to disable documentation
+    - configure: add --enable-external-build support
+    - make: support V=1 as short form of verbose=yes
+    - configure: support mingw-w64
+    - configure: support hardfloat armv7 CHOSTS
+    - configure: add support for android x86
+    - Add estimated completion time to vpxenc
+    - Don't exit on decode errors in vpxenc
+    - vpxenc: support scaling prior to encoding
+    - vpxdec: support scaling output
+    - vpxenc: improve progress indicators with --skip
+    - msvs: Don't link to winmm.lib
+    - Add a new script for producing vcxproj files
+    - Produce Visual Studio 10 and 11 project files
+    - Produce Windows Phone project files
+    - msvs-build: use msbuild for vs >= 2005
+    - configure: default configure log to config.log
+    - Add encoding option --static-thresh
+  + Speed:
+    - Miscellaneous speed optimizations for VP8 and VP9.
+  + Quality:
+    - In general, quality is consistent with the Eider release.
+  + Bug Fixes:
+    - This release represents approximately a year of engineering 
+      effort, and contains multiple bug fixes. Please refer to git 
+      history for details.
+- Rebase libvpx-armv7-use-hard-float.patch and 
+  libvpx-configure-add-s390.patch to this release
+- Droped version_1.1.0_to_1.2.0.diff
+- Added xz BuildRequires
+- Remove Source URL, as the tarball is not available at that place
+- Tarball is generated from v1.3.0 tag
+
 -------------------------------------------------------------------
 Mon Jan 28 17:07:51 UTC 2013 - adrian@suse.de
 
diff --git a/libvpx.spec b/libvpx.spec
index b6d5720..6ee54ab 100644
--- a/libvpx.spec
+++ b/libvpx.spec
@@ -17,15 +17,13 @@
 
 
 Name:           libvpx
-Version:        1.2.0
+Version:        1.3.0
 Release:        0
 Summary:        VP8 codec library
 License:        BSD-3-Clause and GPL-2.0+
 Group:          Productivity/Multimedia/Other
 Url:            http://www.webmproject.org/
-Source0:        http://webm.googlecode.com/files/%{name}-v1.1.0.tar.bz2
-# The upstream project did not release a tar ball, just a git tag of version 1.2.0 :/
-Patch0:         version_1.1.0_to_1.2.0.diff
+Source0:        %{name}-%{version}.tar.xz
 # PATCH-FIX-UPSTREAM libvpx-define-config_pic.patch dimstar@opensuse.org -- For older compilers, CONFIG_PIC need to be defined.
 Patch1:         libvpx-define-config_pic.patch
 Patch2:         libvpx-configure-add-s390.patch
@@ -33,6 +31,7 @@ Patch3:         libvpx-disable-cross-for-arm.patch
 Patch4:         libvpx-armv7-use-hard-float.patch
 # Needed to be able to create pkgconfig() provides.
 BuildRequires:  pkg-config
+BuildRequires:  xz
 BuildRequires:  yasm
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
 
@@ -90,12 +89,11 @@ and audio streams compressed with the Vorbis audio codec.
 The WebM file structure is based on the Matroska container.
 
 %prep
-%setup -q -n %name-v1.1.0
-%patch0 -p1
+%setup -q
 %patch1 -p1
-%patch2
+%patch2 -p1
 %patch3
-%patch4
+%patch4 -p1
 
 %build
 cd build
diff --git a/version_1.1.0_to_1.2.0.diff b/version_1.1.0_to_1.2.0.diff
deleted file mode 100644
index 1e40126..0000000
--- a/version_1.1.0_to_1.2.0.diff
+++ /dev/null
@@ -1,45439 +0,0 @@
-diff --git a/.gitignore b/.gitignore
-index 110146d..4074b0b 100644
---- a/.gitignore
-+++ b/.gitignore
-@@ -32,6 +32,8 @@
- /ivfdec.dox
- /ivfenc
- /ivfenc.dox
-+/libvpx.so*
-+/libvpx.ver
- /obj_int_extract
- /postproc
- /postproc.c
-@@ -43,6 +45,7 @@
- /simple_encoder
- /simple_encoder.c
- /simple_encoder.dox
-+/test_libvpx
- /twopass_encoder
- /twopass_encoder.c
- /twopass_encoder.dox
-@@ -55,7 +58,14 @@
- /vp8cx_set_ref
- /vp8cx_set_ref.c
- /vp8cx_set_ref.dox
-+/vpx.pc
- /vpx_config.c
- /vpx_config.h
-+/vpx_rtcd.h
- /vpx_version.h
-+/vpxdec
-+/vpxenc
- TAGS
-+.cproject
-+.project
-+.settings
-diff --git a/CHANGELOG b/CHANGELOG
-index dcb9f73..ef64a96 100644
---- a/CHANGELOG
-+++ b/CHANGELOG
-@@ -1,3 +1,32 @@
-+2012-12-21 v1.2.0
-+  This release acts as a checkpoint for a large amount of internal refactoring
-+  and testing. It also contains a number of small bugfixes, so all users are
-+  encouraged to upgrade.
-+
-+  - Upgrading:
-+    This release is ABI and API compatible with Duclair (v1.0.0). Users
-+    of older releases should refer to the Upgrading notes in this
-+    document for that release.
-+
-+  - Enhancements:
-+      VP8 optimizations for MIPS dspr2
-+      vpxenc: add -quiet option
-+
-+  - Speed:
-+      Encoder and decoder speed is consistent with the Eider release.
-+
-+  - Quality:
-+      In general, quality is consistent with the Eider release.
-+
-+      Minor tweaks to ARNR filtering
-+      Minor improvements to real time encoding with multiple temporal layers
-+
-+  - Bug Fixes:
-+      Fixes multithreaded encoder race condition in loopfilter
-+      Fixes multi-resolution threaded encoding
-+      Fix potential encoder dead-lock after picture resize
-+
-+
- 2012-05-09 v1.1.0 "Eider"
-   This introduces a number of enhancements, mostly focused on real-time
-   encoding. In addition, it fixes a decoder bug (first introduced in
-diff --git a/README b/README
-index 0dfb0fe..0475dad 100644
---- a/README
-+++ b/README
-@@ -1,5 +1,5 @@
- vpx Multi-Format Codec SDK
--README - 19 May 2010
-+README - 21 June 2012
- 
- Welcome to the WebM VP8 Codec SDK!
- 
-@@ -15,11 +15,19 @@ COMPILING THE APPLICATIONS/LIBRARIES:
-     * Building the documentation requires PHP[3] and Doxygen[4]. If you do not
-       have these packages, you must pass --disable-install-docs to the
-       configure script.
-+    * Downloading the data for the unit tests requires curl[5] and sha1sum.
-+      sha1sum is provided via the GNU coreutils, installed by default on
-+      many *nix platforms, as well as MinGW and Cygwin. If coreutils is not
-+      available, a compatible version of sha1sum can be built from
-+      source[6]. These requirements are optional if not running the unit
-+      tests.
- 
-     [1]: http://www.tortall.net/projects/yasm
-     [2]: http://www.cygwin.com
-     [3]: http://php.net
-     [4]: http://www.doxygen.org
-+    [5]: http://curl.haxx.se
-+    [6]: http://www.microbrew.org/tools/md5sha1sum/
- 
-   2. Out-of-tree builds
-   Out of tree builds are a supported method of building the application. For
-@@ -94,5 +102,5 @@ COMPILING THE APPLICATIONS/LIBRARIES:
- 
- SUPPORT
-   This library is an open source project supported by its community. Please
--  please email webm-users@webmproject.org for help.
-+  please email webm-discuss@webmproject.org for help.
- 
-diff --git a/build/make/Android.mk b/build/make/Android.mk
-index 6fcd4ae..c6b9cf9 100644
---- a/build/make/Android.mk
-+++ b/build/make/Android.mk
-@@ -27,15 +27,22 @@
- # Android.mk file in the libvpx directory:
- # LOCAL_PATH := $(call my-dir)
- # include $(CLEAR_VARS)
--# include libvpx/build/make/Android.mk
-+# include jni/libvpx/build/make/Android.mk
- #
- # There are currently two TARGET_ARCH_ABI targets for ARM.
- # armeabi and armeabi-v7a.  armeabi-v7a is selected by creating an
- # Application.mk in the jni directory that contains:
- # APP_ABI := armeabi-v7a
- #
-+# By default libvpx will detect at runtime the existance of NEON extension.
-+# For this we import the 'cpufeatures' module from the NDK sources.
-+# libvpx can also be configured without this runtime detection method.
-+# Configuring with --disable-runtime-cpu-detect will assume presence of NEON.
-+# Configuring with --disable-runtime-cpu-detect --disable-neon will remove any
-+# NEON dependency.
-+
- # To change to building armeabi, run ./libvpx/configure again, but with
--# --target=arm5te-android-gcc and and modify the Application.mk file to
-+# --target=arm5te-android-gcc and modify the Application.mk file to
- # set APP_ABI := armeabi
- #
- # Running ndk-build will build libvpx and include it in your project.
-@@ -166,7 +173,9 @@ LOCAL_MODULE := libvpx
- 
- LOCAL_LDLIBS := -llog
- 
--LOCAL_STATIC_LIBRARIES := cpufeatures
-+ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
-+  LOCAL_STATIC_LIBRARIES := cpufeatures
-+endif
- 
- $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_rtcd.h
- 
-@@ -196,4 +205,7 @@ ifeq ($(CONFIG_VP8_ENCODER), yes)
-     $(LIBVPX_PATH)/vp8/encoder/asm_enc_offsets.c))
- endif
- 
-+ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
- $(call import-module,cpufeatures)
-+endif
-+
-diff --git a/build/make/Makefile b/build/make/Makefile
-index b6cf320..1088c84 100644
---- a/build/make/Makefile
-+++ b/build/make/Makefile
-@@ -21,6 +21,7 @@ all: .DEFAULT
- clean:: .DEFAULT
- install:: .DEFAULT
- test:: .DEFAULT
-+testdata:: .DEFAULT
- 
- 
- # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
-@@ -66,6 +67,7 @@ endif
- BUILD_ROOT?=.
- VPATH=$(SRC_PATH_BARE)
- CFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH)
-+CXXFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH)
- ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT)/ -I$(SRC_PATH)/
- DIST_DIR?=dist
- HOSTCC?=gcc
-@@ -98,6 +100,8 @@ dist:
- install::
- .PHONY: test
- test::
-+.PHONY: testdata
-+testdata::
- 
- $(BUILD_PFX)%.c.d: %.c
- 	$(if $(quiet),@echo "    [DEP] $@")
-@@ -111,11 +115,11 @@ $(BUILD_PFX)%.c.o: %.c
- $(BUILD_PFX)%.cc.d: %.cc
- 	$(if $(quiet),@echo "    [DEP] $@")
- 	$(qexec)mkdir -p $(dir $@)
--	$(qexec)g++ $(INTERNAL_CFLAGS) $(CFLAGS) -M $< | $(fmt_deps) > $@
-+	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -M $< | $(fmt_deps) > $@
- 
- $(BUILD_PFX)%.cc.o: %.cc
- 	$(if $(quiet),@echo "    [CXX] $@")
--	$(qexec)g++ $(INTERNAL_CFLAGS) $(CFLAGS) -c -o $@ $<
-+	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<
- 
- $(BUILD_PFX)%.asm.d: %.asm
- 	$(if $(quiet),@echo "    [DEP] $@")
-@@ -213,7 +217,7 @@ define linkerxx_template
- $(1): $(filter-out -%,$(2))
- $(1):
- 	$(if $(quiet),@echo    "    [LD] $$@")
--	$(qexec)g++ $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs))
-+	$(qexec)$$(CXX) $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs))
- endef
- # make-3.80 has a bug with expanding large input strings to the eval function,
- # which was triggered in some cases by the following component of
-diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl
-index c55ed0f..95be467 100755
---- a/build/make/ads2gas.pl
-+++ b/build/make/ads2gas.pl
-@@ -26,12 +26,22 @@ print "\t.equ DO1STROUNDING, 0\n";
- 
- while (<STDIN>)
- {
-+    undef $comment;
-+    undef $line;
-+    $comment_char = ";";
-+    $comment_sub = "@";
-+
-+    # Handle comments.
-+    if (/$comment_char/)
-+    {
-+      $comment = "";
-+      ($line, $comment) = /(.*?)$comment_char(.*)/;
-+      $_ = $line;
-+    }
-+
-     # Load and store alignment
-     s/@/,:/g;
- 
--    # Comment character
--    s/;/@/g;
--
-     # Hexadecimal constants prefaced by 0x
-     s/#&/#0x/g;
- 
-@@ -51,16 +61,27 @@ while (<STDIN>)
-     s/:SHR:/ >> /g;
- 
-     # Convert ELSE to .else
--    s/ELSE/.else/g;
-+    s/\bELSE\b/.else/g;
- 
-     # Convert ENDIF to .endif
--    s/ENDIF/.endif/g;
-+    s/\bENDIF\b/.endif/g;
- 
-     # Convert ELSEIF to .elseif
--    s/ELSEIF/.elseif/g;
-+    s/\bELSEIF\b/.elseif/g;
- 
-     # Convert LTORG to .ltorg
--    s/LTORG/.ltorg/g;
-+    s/\bLTORG\b/.ltorg/g;
-+
-+    # Convert endfunc to nothing.
-+    s/\bendfunc\b//ig;
-+
-+    # Convert FUNCTION to nothing.
-+    s/\bFUNCTION\b//g;
-+    s/\bfunction\b//g;
-+
-+    s/\bENTRY\b//g;
-+    s/\bMSARMASM\b/0/g;
-+    s/^\s+end\s+$//g;
- 
-     # Convert IF :DEF:to .if
-     # gcc doesn't have the ability to do a conditional
-@@ -106,6 +127,7 @@ while (<STDIN>)
-     if (s/RN\s+([Rr]\d+|lr)/.req $1/)
-     {
-         print;
-+        print "$comment_sub$comment\n" if defined $comment;
-         next;
-     }
- 
-@@ -114,6 +136,9 @@ while (<STDIN>)
-     s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/;
-     s/IMPORT\s+\|([\$\w]*)\|/.global $1/;
- 
-+    s/EXPORT\s+([\$\w]*)/.global $1/;
-+    s/export\s+([\$\w]*)/.global $1/;
-+
-     # No vertical bars required; make additional symbol with prepended
-     # underscore
-     s/^\|(\$?\w+)\|/_$1\n\t$1:/g;
-@@ -124,11 +149,19 @@ while (<STDIN>)
-     s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/;
- 
-     # ALIGN directive
--    s/ALIGN/.balign/g;
-+    s/\bALIGN\b/.balign/g;
- 
-     # ARM code
-     s/\sARM/.arm/g;
- 
-+    # push/pop
-+    s/(push\s+)(r\d+)/stmdb sp\!, \{$2\}/g;
-+    s/(pop\s+)(r\d+)/ldmia sp\!, \{$2\}/g;
-+
-+    # NEON code
-+    s/(vld1.\d+\s+)(q\d+)/$1\{$2\}/g;
-+    s/(vtbl.\d+\s+[^,]+),([^,]+)/$1,\{$2\}/g;
-+
-     # eabi_attributes numerical equivalents can be found in the
-     # "ARM IHI 0045C" document.
- 
-@@ -157,10 +190,10 @@ while (<STDIN>)
-     }
- 
-     # EQU directive
--    s/(.*)EQU(.*)/.equ $1, $2/;
-+    s/(\S+\s+)EQU(\s+\S+)/.equ $1, $2/;
- 
-     # Begin macro definition
--    if (/MACRO/) {
-+    if (/\bMACRO\b/) {
-         $_ = <STDIN>;
-         s/^/.macro/;
-         s/\$//g;                # remove formal param reference
-@@ -169,9 +202,10 @@ while (<STDIN>)
- 
-     # For macros, use \ to reference formal params
-     s/\$/\\/g;                  # End macro definition
--    s/MEND/.endm/;              # No need to tell it where to stop assembling
-+    s/\bMEND\b/.endm/;              # No need to tell it where to stop assembling
-     next if /^\s*END\s*$/;
-     print;
-+    print "$comment_sub$comment\n" if defined $comment;
- }
- 
- # Mark that this object doesn't need an executable stack.
-diff --git a/build/make/configure.sh b/build/make/configure.sh
-index 3c772e5..c99a01c 100755
---- a/build/make/configure.sh
-+++ b/build/make/configure.sh
-@@ -166,6 +166,17 @@ is_in(){
- 
- add_cflags() {
-     CFLAGS="${CFLAGS} $@"
-+    CXXFLAGS="${CXXFLAGS} $@"
-+}
-+
-+
-+add_cflags_only() {
-+    CFLAGS="${CFLAGS} $@"
-+}
-+
-+
-+add_cxxflags_only() {
-+    CXXFLAGS="${CXXFLAGS} $@"
- }
- 
- 
-@@ -277,6 +288,13 @@ check_cc() {
-     check_cmd ${CC} ${CFLAGS} "$@" -c -o ${TMP_O} ${TMP_C}
- }
- 
-+check_cxx() {
-+    log check_cxx "$@"
-+    cat >${TMP_C}
-+    log_file ${TMP_C}
-+    check_cmd ${CXX} ${CXXFLAGS} "$@" -c -o ${TMP_O} ${TMP_C}
-+}
-+
- check_cpp() {
-     log check_cpp "$@"
-     cat > ${TMP_C}
-@@ -310,8 +328,25 @@ int x;
- EOF
- }
- 
-+check_cxxflags() {
-+    log check_cxxflags "$@"
-+
-+    # Catch CFLAGS that trigger CXX warnings
-+    case "$CXX" in
-+      *g++*) check_cxx -Werror "$@" <<EOF
-+int x;
-+EOF
-+      ;;
-+      *) check_cxx "$@" <<EOF
-+int x;
-+EOF
-+      ;;
-+    esac
-+}
-+
- check_add_cflags() {
--    check_cflags "$@" && add_cflags "$@"
-+    check_cxxflags "$@" && add_cxxflags_only "$@"
-+    check_cflags "$@" && add_cflags_only "$@"
- }
- 
- check_add_asflags() {
-@@ -367,7 +402,9 @@ true
- 
- write_common_target_config_mk() {
-     local CC=${CC}
-+    local CXX=${CXX}
-     enabled ccache && CC="ccache ${CC}"
-+    enabled ccache && CXX="ccache ${CXX}"
-     print_webm_license $1 "##" ""
- 
-     cat >> $1 << EOF
-@@ -379,6 +416,7 @@ TOOLCHAIN=${toolchain}
- ASM_CONVERSION=${asm_conversion_cmd:-${source_path}/build/make/ads2gas.pl}
- 
- CC=${CC}
-+CXX=${CXX}
- AR=${AR}
- LD=${LD}
- AS=${AS}
-@@ -386,6 +424,7 @@ STRIP=${STRIP}
- NM=${NM}
- 
- CFLAGS  = ${CFLAGS}
-+CXXFLAGS  = ${CXXFLAGS}
- ARFLAGS = -rus\$(if \$(quiet),c,v)
- LDFLAGS = ${LDFLAGS}
- ASFLAGS = ${ASFLAGS}
-@@ -538,6 +577,7 @@ post_process_cmdline() {
- 
- setup_gnu_toolchain() {
-         CC=${CC:-${CROSS}gcc}
-+        CXX=${CXX:-${CROSS}g++}
-         AR=${AR:-${CROSS}ar}
-         LD=${LD:-${CROSS}${link_with_cc:-ld}}
-         AS=${AS:-${CROSS}as}
-@@ -549,10 +589,19 @@ setup_gnu_toolchain() {
- 
- process_common_toolchain() {
-     if [ -z "$toolchain" ]; then
--        gcctarget="$(gcc -dumpmachine 2> /dev/null)"
-+        gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
- 
-         # detect tgt_isa
-         case "$gcctarget" in
-+            armv6*)
-+                tgt_isa=armv6
-+                ;;
-+            armv7*)
-+                tgt_isa=armv7
-+                ;;
-+            armv5te*)
-+                tgt_isa=armv5te
-+                ;;
-             *x86_64*|*amd64*)
-                 tgt_isa=x86_64
-                 ;;
-@@ -718,6 +767,7 @@ process_common_toolchain() {
-             ;;
-         armv5te)
-             soft_enable edsp
-+            disable fast_unaligned
-             ;;
-         esac
- 
-@@ -733,17 +783,23 @@ process_common_toolchain() {
-             check_add_asflags --defsym ARCHITECTURE=${arch_int}
-             tune_cflags="-mtune="
-             if [ ${tgt_isa} == "armv7" ]; then
-+                check_add_cflags  -march=armv7-a -mfloat-abi=softfp
-+                check_add_asflags -march=armv7-a -mfloat-abi=softfp
-+
-                 if enabled neon
-                 then
-                     check_add_cflags -mfpu=neon #-ftree-vectorize
-                     check_add_asflags -mfpu=neon
-                 fi
--                check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp
--                check_add_asflags -mcpu=cortex-a8 -mfloat-abi=softfp  #-march=armv7-a
-+
-+                if [ -z "${tune_cpu}" ]; then
-+                    tune_cpu=cortex-a8
-+                fi
-             else
-                 check_add_cflags -march=${tgt_isa}
-                 check_add_asflags -march=${tgt_isa}
-             fi
-+
-             enabled debug && add_asflags -g
-             asm_conversion_cmd="${source_path}/build/make/ads2gas.pl"
-             ;;
-@@ -792,6 +848,7 @@ process_common_toolchain() {
-                                -name "arm-linux-androideabi-gcc*" -print -quit`
-             TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi-
-             CC=${TOOLCHAIN_PATH}gcc
-+            CXX=${TOOLCHAIN_PATH}g++
-             AR=${TOOLCHAIN_PATH}ar
-             LD=${TOOLCHAIN_PATH}gcc
-             AS=${TOOLCHAIN_PATH}as
-@@ -810,12 +867,17 @@ process_common_toolchain() {
-             add_cflags "--sysroot=${alt_libc}"
-             add_ldflags "--sysroot=${alt_libc}"
- 
--            add_cflags "-I${SDK_PATH}/sources/android/cpufeatures/"
-+            # linker flag that routes around a CPU bug in some
-+            # Cortex-A8 implementations (NDK Dev Guide)
-+            add_ldflags "-Wl,--fix-cortex-a8"
- 
-             enable pic
-             soft_enable realtime_only
-             if [ ${tgt_isa} == "armv7" ]; then
--                enable runtime_cpu_detect
-+                soft_enable runtime_cpu_detect
-+            fi
-+            if enabled runtime_cpu_detect; then
-+                add_cflags "-I${SDK_PATH}/sources/android/cpufeatures"
-             fi
-           ;;
- 
-@@ -827,6 +889,7 @@ process_common_toolchain() {
-                 SDK_PATH=${sdk_path}
-             fi
-             TOOLCHAIN_PATH=${SDK_PATH}/usr/bin
-+            CXX=${TOOLCHAIN_PATH}/g++
-             CC=${TOOLCHAIN_PATH}/gcc
-             AR=${TOOLCHAIN_PATH}/ar
-             LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-llvm-gcc-4.2
-@@ -890,13 +953,16 @@ process_common_toolchain() {
-         esac
-     ;;
-     mips*)
--        CROSS=${CROSS:-mipsel-linux-uclibc-}
-         link_with_cc=gcc
-         setup_gnu_toolchain
-         tune_cflags="-mtune="
-+        if enabled dspr2; then
-+            check_add_cflags -mips32r2 -mdspr2
-+            disable fast_unaligned
-+        fi
-         check_add_cflags -march=${tgt_isa}
--    check_add_asflags -march=${tgt_isa}
--    check_add_asflags -KPIC
-+        check_add_asflags -march=${tgt_isa}
-+        check_add_asflags -KPIC
-     ;;
-     ppc*)
-         enable ppc
-@@ -924,6 +990,11 @@ process_common_toolchain() {
-     x86*)
-         bits=32
-         enabled x86_64 && bits=64
-+        check_cpp <<EOF && bits=x32
-+#ifndef __ILP32__
-+#error "not x32"
-+#endif
-+EOF
-         soft_enable runtime_cpu_detect
-         soft_enable mmx
-         soft_enable sse
-@@ -938,6 +1009,7 @@ process_common_toolchain() {
-                 ;;
-             solaris*)
-                 CC=${CC:-${CROSS}gcc}
-+                CXX=${CXX:-${CROSS}g++}
-                 LD=${LD:-${CROSS}gcc}
-                 CROSS=${CROSS:-g}
-                 ;;
-@@ -965,16 +1037,22 @@ process_common_toolchain() {
-                         tune_cflags="-march="
-                     ;;
-                 esac
--                ;;
-+            ;;
-             gcc*)
--                add_cflags  -m${bits}
-+                add_cflags -m${bits}
-                 add_ldflags -m${bits}
-                 link_with_cc=gcc
-                 tune_cflags="-march="
-             setup_gnu_toolchain
-                 #for 32 bit x86 builds, -O3 did not turn on this flag
-                 enabled optimizations && check_add_cflags -fomit-frame-pointer
--                ;;
-+            ;;
-+            vs*)
-+                # When building with Microsoft Visual Studio the assembler is
-+                # invoked directly. Checking at configure time is unnecessary.
-+                # Skip the check by setting AS arbitrarily
-+                AS=msvs
-+            ;;
-         esac
- 
-         case "${AS}" in
-@@ -983,7 +1061,7 @@ process_common_toolchain() {
-                 which yasm >/dev/null 2>&1 && AS=yasm
-                 [ "${AS}" = auto -o -z "${AS}" ] \
-                     && die "Neither yasm nor nasm have been found"
--                ;;
-+            ;;
-         esac
-         log_echo "  using $AS"
-         [ "${AS##*/}" = nasm ] && add_asflags -Ox
-@@ -1065,7 +1143,7 @@ process_common_toolchain() {
- 
-     # Work around longjmp interception on glibc >= 2.11, to improve binary
-     # compatibility. See http://code.google.com/p/webm/issues/detail?id=166
--    enabled linux && check_add_cflags -D_FORTIFY_SOURCE=0
-+    enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0
- 
-     # Check for strip utility variant
-     ${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip
-@@ -1080,12 +1158,24 @@ EOF
-     # Almost every platform uses pthreads.
-     if enabled multithread; then
-         case ${toolchain} in
--            *-win*);;
-+            *-win*-vs*);;
-             *-android-gcc);;
-             *) check_header pthread.h && add_extralibs -lpthread
-         esac
-     fi
- 
-+    # only for MIPS platforms
-+    case ${toolchain} in
-+        mips*)
-+            if enabled dspr2; then
-+                if enabled big_endian; then
-+                    echo "dspr2 optimizations are available only for little endian platforms"
-+                    disable dspr2
-+                fi
-+            fi
-+        ;;
-+    esac
-+
-     # for sysconf(3) and friends.
-     check_header unistd.h
- 
-diff --git a/build/make/gen_asm_deps.sh b/build/make/gen_asm_deps.sh
-index 717f870..0b4e3aa 100755
---- a/build/make/gen_asm_deps.sh
-+++ b/build/make/gen_asm_deps.sh
-@@ -42,7 +42,7 @@ done
- 
- [ -n "$srcfile" ] || show_help
- sfx=${sfx:-asm}
--includes=$(LC_ALL=C egrep -i "include +\"?+[a-z0-9_/]+\.${sfx}" $srcfile |
-+includes=$(LC_ALL=C egrep -i "include +\"?[a-z0-9_/]+\.${sfx}" $srcfile |
-            perl -p -e "s;.*?([a-z0-9_/]+.${sfx}).*;\1;")
- #" restore editor state
- for inc in ${includes}; do
-diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
-index 04e14a6..bf317bd 100644
---- a/build/make/obj_int_extract.c
-+++ b/build/make/obj_int_extract.c
-@@ -680,7 +680,7 @@ int parse_coff(uint8_t *buf, size_t sz)
-     uint32_t symoffset;
- 
-     char **sectionlist;  //this array holds all section names in their correct order.
--    //it is used to check if the symbol is in .bss or .data section.
-+    //it is used to check if the symbol is in .bss or .rdata section.
- 
-     nsections = get_le16(buf + 2);
-     symtab_ptr = get_le32(buf + 8);
-@@ -725,15 +725,15 @@ int parse_coff(uint8_t *buf, size_t sz)
-         }
-         strcpy(sectionlist[i], sectionname);
- 
--        if (!strcmp(sectionname, ".data")) sectionrawdata_ptr = get_le32(ptr + 20);
-+        if (!strcmp(sectionname, ".rdata")) sectionrawdata_ptr = get_le32(ptr + 20);
- 
-         ptr += 40;
-     }
- 
-     //log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
--    //log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
-+    //log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
- 
--    /*  The compiler puts the data with non-zero offset in .data section, but puts the data with
-+    /*  The compiler puts the data with non-zero offset in .rdata section, but puts the data with
-         zero offset in .bss section. So, if the data in in .bss section, set offset=0.
-         Note from Wiki: In an object module compiled from C, the bss section contains
-         the local variables (but not functions) that were declared with the static keyword,
-diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh
-index 1dffde5..ddf9e09 100755
---- a/build/make/rtcd.sh
-+++ b/build/make/rtcd.sh
-@@ -211,6 +211,8 @@ common_top() {
- $(process_forward_decls)
- 
- $(declare_function_pointers c $ALL_ARCHS)
-+
-+void ${symbol:-rtcd}(void);
- EOF
- }
- 
-@@ -231,11 +233,10 @@ x86() {
- 
-   cat <<EOF
- $(common_top)
--void ${symbol:-rtcd}(void);
- 
- #ifdef RTCD_C
- #include "vpx_ports/x86.h"
--void ${symbol:-rtcd}(void)
-+static void setup_rtcd_internal(void)
- {
-     int flags = x86_simd_caps();
- 
-@@ -261,11 +262,9 @@ arm() {
- $(common_top)
- #include "vpx_config.h"
- 
--void ${symbol:-rtcd}(void);
--
- #ifdef RTCD_C
- #include "vpx_ports/arm.h"
--void ${symbol:-rtcd}(void)
-+static void setup_rtcd_internal(void)
- {
-     int flags = arm_cpu_caps();
- 
-@@ -279,16 +278,34 @@ EOF
- }
- 
- 
-+mips() {
-+  determine_indirection c $ALL_ARCHS
-+  cat <<EOF
-+$(common_top)
-+#include "vpx_config.h"
-+
-+#ifdef RTCD_C
-+static void setup_rtcd_internal(void)
-+{
-+$(set_function_pointers c $ALL_ARCHS)
-+#if HAVE_DSPR2
-+void dsputil_static_init();
-+dsputil_static_init();
-+#endif
-+}
-+#endif
-+$(common_bottom)
-+EOF
-+}
-+
- unoptimized() {
-   determine_indirection c
-   cat <<EOF
- $(common_top)
- #include "vpx_config.h"
- 
--void ${symbol:-rtcd}(void);
--
- #ifdef RTCD_C
--void ${symbol:-rtcd}(void)
-+static void setup_rtcd_internal(void)
- {
- $(set_function_pointers c)
- }
-@@ -312,6 +329,15 @@ case $arch in
-     require $(filter $REQUIRES)
-     x86
-     ;;
-+  mips32)
-+    ALL_ARCHS=$(filter mips32)
-+    dspr2=$([ -f "$config_file" ] && eval echo $(grep HAVE_DSPR2 "$config_file"))
-+    HAVE_DSPR2="${dspr2#*=}"
-+    if [ "$HAVE_DSPR2" = "yes" ]; then
-+        ALL_ARCHS=$(filter mips32 dspr2)
-+    fi
-+    mips
-+    ;;
-   armv5te)
-     ALL_ARCHS=$(filter edsp)
-     arm
-diff --git a/configure b/configure
-index 62e1ffb..be36e56 100755
---- a/configure
-+++ b/configure
-@@ -20,9 +20,10 @@ show_help(){
-     show_help_pre
-     cat << EOF
- Advanced options:
--  ${toggle_libs}                  don't build libraries
--  ${toggle_examples}              don't build examples
--  ${toggle_unit_tests}            build unit tests
-+  ${toggle_libs}                  libraries
-+  ${toggle_examples}              examples
-+  ${toggle_docs}                  documentation
-+  ${toggle_unit_tests}            unit tests
-   --libc=PATH                     path to alternate libc
-   --as={yasm|nasm|auto}           use specified assembler [auto, yasm preferred]
-   --sdk-path=PATH                 path to root of sdk (iOS, android builds only)
-@@ -209,6 +210,7 @@ ARCH_EXT_LIST="
-     neon
- 
-     mips32
-+    dspr2
- 
-     mmx
-     sse
-@@ -292,6 +294,7 @@ CMDLINE_SELECT="
- 
-     libs
-     examples
-+    docs
-     libc
-     as
-     fast_unaligned
-@@ -453,7 +456,13 @@ process_detect() {
-         # Can only build shared libs on a subset of platforms. Doing this check
-         # here rather than at option parse time because the target auto-detect
-         # magic happens after the command line has been parsed.
--        enabled linux || die "--enable-shared only supported on ELF for now"
-+        if ! enabled linux; then
-+            if enabled gnu; then
-+                echo "--enable-shared is only supported on ELF; assuming this is OK"
-+            else
-+                die "--enable-shared only supported on ELF for now"
-+            fi
-+        fi
-     fi
-     if [ -z "$CC" ]; then
-         echo "Bypassing toolchain for environment detection."
-@@ -531,7 +540,7 @@ process_toolchain() {
-         check_add_cflags -Wimplicit-function-declaration
-         check_add_cflags -Wuninitialized
-         check_add_cflags -Wunused-variable
--        check_add_cflags -Wunused-but-set-variable        
-+        check_add_cflags -Wunused-but-set-variable
-         enabled extra_warnings || check_add_cflags -Wno-unused-function
-     fi
- 
-@@ -586,6 +595,25 @@ process_toolchain() {
-     if enabled postproc_visualizer; then
-         enabled postproc || die "postproc_visualizer requires postproc to be enabled"
-     fi
-+
-+    # Enable unit tests if we have a working C++ compiler
-+    case "$toolchain" in
-+        *-vs*)
-+            soft_enable unit_tests
-+        ;;
-+        *-android-*)
-+            # GTestLog must be modified to use Android logging utilities.
-+        ;;
-+        *-darwin-*)
-+            # iOS/ARM builds do not work with gtest. This does not match
-+            # x86 targets.
-+        ;;
-+        *)
-+            check_cxx "$@" <<EOF && soft_enable unit_tests
-+int z;
-+EOF
-+        ;;
-+    esac
- }
- 
- 
-diff --git a/examples.mk b/examples.mk
-index b93a16b..90913e6 100644
---- a/examples.mk
-+++ b/examples.mk
-@@ -98,7 +98,7 @@ vp8cx_set_ref.GUID                  = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
- vp8cx_set_ref.DESCRIPTION           = VP8 set encoder reference frame
- 
- # C file is provided, not generated automatically.
--GEN_EXAMPLES-$(CONFIG_MULTI_RES_ENCODING) += vp8_multi_resolution_encoder.c
-+UTILS-$(CONFIG_MULTI_RES_ENCODING) += vp8_multi_resolution_encoder.c
- vp8_multi_resolution_encoder.SRCS  \
-                          += third_party/libyuv/include/libyuv/basic_types.h  \
-                             third_party/libyuv/include/libyuv/cpu_id.h  \
-diff --git a/examples/decoder_tmpl.txt b/examples/decoder_tmpl.txt
-index 92a2c30..e652a63 100644
---- a/examples/decoder_tmpl.txt
-+++ b/examples/decoder_tmpl.txt
-@@ -48,8 +48,8 @@ for(plane=0; plane < 3; plane++) {
-     unsigned char *buf =img->planes[plane];
- 
-     for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
--        if(fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
--           outfile));
-+        (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
-+                      outfile);
-         buf += img->stride[plane];
-     }
- }
-diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c
-index cc70b00..e2b65ec 100644
---- a/examples/encoder_tmpl.c
-+++ b/examples/encoder_tmpl.c
-@@ -85,7 +85,7 @@ static void write_ivf_file_header(FILE *outfile,
-     mem_put_le32(header+24, frame_cnt);           /* length */
-     mem_put_le32(header+28, 0);                   /* unused */
- 
--    if(fwrite(header, 1, 32, outfile));
-+    (void) fwrite(header, 1, 32, outfile);
- }
- 
- 
-@@ -103,7 +103,7 @@ static void write_ivf_frame_header(FILE *outfile,
-     mem_put_le32(header+4, pts&0xFFFFFFFF);
-     mem_put_le32(header+8, pts >> 32);
- 
--    if(fwrite(header, 1, 12, outfile));
-+    (void) fwrite(header, 1, 12, outfile);
- }
- 
- int main(int argc, char **argv) {
-diff --git a/examples/encoder_tmpl.txt b/examples/encoder_tmpl.txt
-index 0042071..1afbd8b 100644
---- a/examples/encoder_tmpl.txt
-+++ b/examples/encoder_tmpl.txt
-@@ -61,13 +61,14 @@ if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
- case VPX_CODEC_CX_FRAME_PKT:
-     write_ivf_frame_header(outfile, pkt);
--    if(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
--              outfile));
-+    (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-+                  outfile);
-     break;
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
- 
- 
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
-+vpx_img_free(&raw);
- if(vpx_codec_destroy(&codec))
-     die_codec(&codec, "Failed to destroy codec");
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
-diff --git a/examples/twopass_encoder.txt b/examples/twopass_encoder.txt
-index 4683bc7..2f81a90 100644
---- a/examples/twopass_encoder.txt
-+++ b/examples/twopass_encoder.txt
-@@ -71,5 +71,17 @@ Pass Progress Reporting
- It's sometimes helpful to see when each pass completes.
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END
-     printf("Pass %d complete.\n", pass+1);
-+    if(vpx_codec_destroy(&codec))
-+        die_codec(&codec, "Failed to destroy codec");
- }
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END
-+
-+
-+Clean-up
-+-----------------------------
-+Destruction of the encoder instance must be done on each pass. The
-+raw image should be destroyed at the end as usual.
-+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
-+vpx_img_free(&raw);
-+free(stats.buf);
-+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
-diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h
-index 3418e36..e3ce585 100644
---- a/libmkv/EbmlIDs.h
-+++ b/libmkv/EbmlIDs.h
-@@ -1,16 +1,16 @@
--// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
--//
--// Use of this source code is governed by a BSD-style license
--// that can be found in the LICENSE file in the root of the source
--// tree. An additional intellectual property rights grant can be found
--// in the file PATENTS.  All contributing project authors may
--// be found in the AUTHORS file in the root of the source tree.
--
--
-+/*
-+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
- #ifndef MKV_DEFS_HPP
- #define MKV_DEFS_HPP 1
- 
--//Commenting out values not available in webm, but available in matroska
-+/* Commenting out values not available in webm, but available in matroska */
- 
- enum mkv
- {
-@@ -22,7 +22,7 @@ enum mkv
-     DocType = 0x4282,
-     DocTypeVersion = 0x4287,
-     DocTypeReadVersion = 0x4285,
--//  CRC_32 = 0xBF,
-+/* CRC_32 = 0xBF, */
-     Void = 0xEC,
-     SignatureSlot = 0x1B538667,
-     SignatureAlgo = 0x7E8A,
-@@ -32,61 +32,61 @@ enum mkv
-     SignatureElements = 0x7E5B,
-     SignatureElementList = 0x7E7B,
-     SignedElement = 0x6532,
--    //segment
-+    /* segment */
-     Segment = 0x18538067,
--    //Meta Seek Information
-+    /* Meta Seek Information */
-     SeekHead = 0x114D9B74,
-     Seek = 0x4DBB,
-     SeekID = 0x53AB,
-     SeekPosition = 0x53AC,
--    //Segment Information
-+    /* Segment Information */
-     Info = 0x1549A966,
--//  SegmentUID = 0x73A4,
--//  SegmentFilename = 0x7384,
--//  PrevUID = 0x3CB923,
--//  PrevFilename = 0x3C83AB,
--//  NextUID = 0x3EB923,
--//  NextFilename = 0x3E83BB,
--//  SegmentFamily = 0x4444,
--//  ChapterTranslate = 0x6924,
--//  ChapterTranslateEditionUID = 0x69FC,
--//  ChapterTranslateCodec = 0x69BF,
--//  ChapterTranslateID = 0x69A5,
-+/* SegmentUID = 0x73A4, */
-+/* SegmentFilename = 0x7384, */
-+/* PrevUID = 0x3CB923, */
-+/* PrevFilename = 0x3C83AB, */
-+/* NextUID = 0x3EB923, */
-+/* NextFilename = 0x3E83BB, */
-+/* SegmentFamily = 0x4444, */
-+/* ChapterTranslate = 0x6924, */
-+/* ChapterTranslateEditionUID = 0x69FC, */
-+/* ChapterTranslateCodec = 0x69BF, */
-+/* ChapterTranslateID = 0x69A5, */
-     TimecodeScale = 0x2AD7B1,
-     Segment_Duration = 0x4489,
-     DateUTC = 0x4461,
--//  Title = 0x7BA9,
-+/* Title = 0x7BA9, */
-     MuxingApp = 0x4D80,
-     WritingApp = 0x5741,
--    //Cluster
-+    /* Cluster */
-     Cluster = 0x1F43B675,
-     Timecode = 0xE7,
--//  SilentTracks = 0x5854,
--//  SilentTrackNumber = 0x58D7,
--//  Position = 0xA7,
-+/* SilentTracks = 0x5854, */
-+/* SilentTrackNumber = 0x58D7, */
-+/* Position = 0xA7, */
-     PrevSize = 0xAB,
-     BlockGroup = 0xA0,
-     Block = 0xA1,
--//  BlockVirtual = 0xA2,
--//  BlockAdditions = 0x75A1,
--//  BlockMore = 0xA6,
--//  BlockAddID = 0xEE,
--//  BlockAdditional = 0xA5,
-+/* BlockVirtual = 0xA2, */
-+/* BlockAdditions = 0x75A1, */
-+/* BlockMore = 0xA6, */
-+/* BlockAddID = 0xEE, */
-+/* BlockAdditional = 0xA5, */
-     BlockDuration = 0x9B,
--//  ReferencePriority = 0xFA,
-+/* ReferencePriority = 0xFA, */
-     ReferenceBlock = 0xFB,
--//  ReferenceVirtual = 0xFD,
--//  CodecState = 0xA4,
--//  Slices = 0x8E,
--//  TimeSlice = 0xE8,
-+/* ReferenceVirtual = 0xFD, */
-+/* CodecState = 0xA4, */
-+/* Slices = 0x8E, */
-+/* TimeSlice = 0xE8, */
-     LaceNumber = 0xCC,
--//  FrameNumber = 0xCD,
--//  BlockAdditionID = 0xCB,
--//  MkvDelay = 0xCE,
--//  Cluster_Duration = 0xCF,
-+/* FrameNumber = 0xCD, */
-+/* BlockAdditionID = 0xCB, */
-+/* MkvDelay = 0xCE, */
-+/* Cluster_Duration = 0xCF, */
-     SimpleBlock = 0xA3,
--//  EncryptedBlock = 0xAF,
--    //Track
-+/* EncryptedBlock = 0xAF, */
-+    /* Track */
-     Tracks = 0x1654AE6B,
-     TrackEntry = 0xAE,
-     TrackNumber = 0xD7,
-@@ -96,28 +96,28 @@ enum mkv
-     FlagDefault = 0x88,
-     FlagForced = 0x55AA,
-     FlagLacing = 0x9C,
--//  MinCache = 0x6DE7,
--//  MaxCache = 0x6DF8,
-+/* MinCache = 0x6DE7, */
-+/* MaxCache = 0x6DF8, */
-     DefaultDuration = 0x23E383,
--//  TrackTimecodeScale = 0x23314F,
--//  TrackOffset = 0x537F,
--//  MaxBlockAdditionID = 0x55EE,
-+/* TrackTimecodeScale = 0x23314F, */
-+/* TrackOffset = 0x537F, */
-+/* MaxBlockAdditionID = 0x55EE, */
-     Name = 0x536E,
-     Language = 0x22B59C,
-     CodecID = 0x86,
-     CodecPrivate = 0x63A2,
-     CodecName = 0x258688,
--//  AttachmentLink = 0x7446,
--//  CodecSettings = 0x3A9697,
--//  CodecInfoURL = 0x3B4040,
--//  CodecDownloadURL = 0x26B240,
--//  CodecDecodeAll = 0xAA,
--//  TrackOverlay = 0x6FAB,
--//  TrackTranslate = 0x6624,
--//  TrackTranslateEditionUID = 0x66FC,
--//  TrackTranslateCodec = 0x66BF,
--//  TrackTranslateTrackID = 0x66A5,
--    //video
-+/* AttachmentLink = 0x7446, */
-+/* CodecSettings = 0x3A9697, */
-+/* CodecInfoURL = 0x3B4040, */
-+/* CodecDownloadURL = 0x26B240, */
-+/* CodecDecodeAll = 0xAA, */
-+/* TrackOverlay = 0x6FAB, */
-+/* TrackTranslate = 0x6624, */
-+/* TrackTranslateEditionUID = 0x66FC, */
-+/* TrackTranslateCodec = 0x66BF, */
-+/* TrackTranslateTrackID = 0x66A5, */
-+    /* video */
-     Video = 0xE0,
-     FlagInterlaced = 0x9A,
-     StereoMode = 0x53B8,
-@@ -131,101 +131,101 @@ enum mkv
-     DisplayHeight = 0x54BA,
-     DisplayUnit = 0x54B2,
-     AspectRatioType = 0x54B3,
--//  ColourSpace = 0x2EB524,
--//  GammaValue = 0x2FB523,
-+/* ColourSpace = 0x2EB524, */
-+/* GammaValue = 0x2FB523, */
-     FrameRate = 0x2383E3,
--    //end video
--    //audio
-+    /* end video */
-+    /* audio */
-     Audio = 0xE1,
-     SamplingFrequency = 0xB5,
-     OutputSamplingFrequency = 0x78B5,
-     Channels = 0x9F,
--//  ChannelPositions = 0x7D7B,
-+/* ChannelPositions = 0x7D7B, */
-     BitDepth = 0x6264,
--    //end audio
--    //content encoding
--//  ContentEncodings = 0x6d80,
--//  ContentEncoding = 0x6240,
--//  ContentEncodingOrder = 0x5031,
--//  ContentEncodingScope = 0x5032,
--//  ContentEncodingType = 0x5033,
--//  ContentCompression = 0x5034,
--//  ContentCompAlgo = 0x4254,
--//  ContentCompSettings = 0x4255,
--//  ContentEncryption = 0x5035,
--//  ContentEncAlgo = 0x47e1,
--//  ContentEncKeyID = 0x47e2,
--//  ContentSignature = 0x47e3,
--//  ContentSigKeyID = 0x47e4,
--//  ContentSigAlgo = 0x47e5,
--//  ContentSigHashAlgo = 0x47e6,
--    //end content encoding
--    //Cueing Data
-+    /* end audio */
-+    /* content encoding */
-+/* ContentEncodings = 0x6d80, */
-+/* ContentEncoding = 0x6240, */
-+/* ContentEncodingOrder = 0x5031, */
-+/* ContentEncodingScope = 0x5032, */
-+/* ContentEncodingType = 0x5033, */
-+/* ContentCompression = 0x5034, */
-+/* ContentCompAlgo = 0x4254, */
-+/* ContentCompSettings = 0x4255, */
-+/* ContentEncryption = 0x5035, */
-+/* ContentEncAlgo = 0x47e1, */
-+/* ContentEncKeyID = 0x47e2, */
-+/* ContentSignature = 0x47e3, */
-+/* ContentSigKeyID = 0x47e4, */
-+/* ContentSigAlgo = 0x47e5, */
-+/* ContentSigHashAlgo = 0x47e6, */
-+    /* end content encoding */
-+    /* Cueing Data */
-     Cues = 0x1C53BB6B,
-     CuePoint = 0xBB,
-     CueTime = 0xB3,
-     CueTrackPositions = 0xB7,
-     CueTrack = 0xF7,
-     CueClusterPosition = 0xF1,
--    CueBlockNumber = 0x5378,
--//  CueCodecState = 0xEA,
--//  CueReference = 0xDB,
--//  CueRefTime = 0x96,
--//  CueRefCluster = 0x97,
--//  CueRefNumber = 0x535F,
--//  CueRefCodecState = 0xEB,
--    //Attachment
--//  Attachments = 0x1941A469,
--//  AttachedFile = 0x61A7,
--//  FileDescription = 0x467E,
--//  FileName = 0x466E,
--//  FileMimeType = 0x4660,
--//  FileData = 0x465C,
--//  FileUID = 0x46AE,
--//  FileReferral = 0x4675,
--    //Chapters
--//  Chapters = 0x1043A770,
--//  EditionEntry = 0x45B9,
--//  EditionUID = 0x45BC,
--//  EditionFlagHidden = 0x45BD,
--//  EditionFlagDefault = 0x45DB,
--//  EditionFlagOrdered = 0x45DD,
--//  ChapterAtom = 0xB6,
--//  ChapterUID = 0x73C4,
--//  ChapterTimeStart = 0x91,
--//  ChapterTimeEnd = 0x92,
--//  ChapterFlagHidden = 0x98,
--//  ChapterFlagEnabled = 0x4598,
--//  ChapterSegmentUID = 0x6E67,
--//  ChapterSegmentEditionUID = 0x6EBC,
--//  ChapterPhysicalEquiv = 0x63C3,
--//  ChapterTrack = 0x8F,
--//  ChapterTrackNumber = 0x89,
--//  ChapterDisplay = 0x80,
--//  ChapString = 0x85,
--//  ChapLanguage = 0x437C,
--//  ChapCountry = 0x437E,
--//  ChapProcess = 0x6944,
--//  ChapProcessCodecID = 0x6955,
--//  ChapProcessPrivate = 0x450D,
--//  ChapProcessCommand = 0x6911,
--//  ChapProcessTime = 0x6922,
--//  ChapProcessData = 0x6933,
--    //Tagging
--//  Tags = 0x1254C367,
--//  Tag = 0x7373,
--//  Targets = 0x63C0,
--//  TargetTypeValue = 0x68CA,
--//  TargetType = 0x63CA,
--//  Tagging_TrackUID = 0x63C5,
--//  Tagging_EditionUID = 0x63C9,
--//  Tagging_ChapterUID = 0x63C4,
--//  AttachmentUID = 0x63C6,
--//  SimpleTag = 0x67C8,
--//  TagName = 0x45A3,
--//  TagLanguage = 0x447A,
--//  TagDefault = 0x4484,
--//  TagString = 0x4487,
--//  TagBinary = 0x4485,
-+    CueBlockNumber = 0x5378
-+/* CueCodecState = 0xEA, */
-+/* CueReference = 0xDB, */
-+/* CueRefTime = 0x96, */
-+/* CueRefCluster = 0x97, */
-+/* CueRefNumber = 0x535F, */
-+/* CueRefCodecState = 0xEB, */
-+    /* Attachment */
-+/* Attachments = 0x1941A469, */
-+/* AttachedFile = 0x61A7, */
-+/* FileDescription = 0x467E, */
-+/* FileName = 0x466E, */
-+/* FileMimeType = 0x4660, */
-+/* FileData = 0x465C, */
-+/* FileUID = 0x46AE, */
-+/* FileReferral = 0x4675, */
-+    /* Chapters */
-+/* Chapters = 0x1043A770, */
-+/* EditionEntry = 0x45B9, */
-+/* EditionUID = 0x45BC, */
-+/* EditionFlagHidden = 0x45BD, */
-+/* EditionFlagDefault = 0x45DB, */
-+/* EditionFlagOrdered = 0x45DD, */
-+/* ChapterAtom = 0xB6, */
-+/* ChapterUID = 0x73C4, */
-+/* ChapterTimeStart = 0x91, */
-+/* ChapterTimeEnd = 0x92, */
-+/* ChapterFlagHidden = 0x98, */
-+/* ChapterFlagEnabled = 0x4598, */
-+/* ChapterSegmentUID = 0x6E67, */
-+/* ChapterSegmentEditionUID = 0x6EBC, */
-+/* ChapterPhysicalEquiv = 0x63C3, */
-+/* ChapterTrack = 0x8F, */
-+/* ChapterTrackNumber = 0x89, */
-+/* ChapterDisplay = 0x80, */
-+/* ChapString = 0x85, */
-+/* ChapLanguage = 0x437C, */
-+/* ChapCountry = 0x437E, */
-+/* ChapProcess = 0x6944, */
-+/* ChapProcessCodecID = 0x6955, */
-+/* ChapProcessPrivate = 0x450D, */
-+/* ChapProcessCommand = 0x6911, */
-+/* ChapProcessTime = 0x6922, */
-+/* ChapProcessData = 0x6933, */
-+    /* Tagging */
-+/* Tags = 0x1254C367, */
-+/* Tag = 0x7373, */
-+/* Targets = 0x63C0, */
-+/* TargetTypeValue = 0x68CA, */
-+/* TargetType = 0x63CA, */
-+/* Tagging_TrackUID = 0x63C5, */
-+/* Tagging_EditionUID = 0x63C9, */
-+/* Tagging_ChapterUID = 0x63C4, */
-+/* AttachmentUID = 0x63C6, */
-+/* SimpleTag = 0x67C8, */
-+/* TagName = 0x45A3, */
-+/* TagLanguage = 0x447A, */
-+/* TagDefault = 0x4484, */
-+/* TagString = 0x4487, */
-+/* TagBinary = 0x4485, */
- };
- #endif
-diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c
-index fbf2c66..d70f06e 100644
---- a/libmkv/EbmlWriter.c
-+++ b/libmkv/EbmlWriter.c
-@@ -1,12 +1,12 @@
--// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
--//
--// Use of this source code is governed by a BSD-style license
--// that can be found in the LICENSE file in the root of the source
--// tree. An additional intellectual property rights grant can be found
--// in the file PATENTS.  All contributing project authors may
--// be found in the AUTHORS file in the root of the source tree.
--
--
-+/*
-+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
- #include "EbmlWriter.h"
- #include <stdlib.h>
- #include <wchar.h>
-@@ -18,11 +18,13 @@
- #define LITERALU64(n) n##LLU
- #endif
- 
--void Ebml_WriteLen(EbmlGlobal *glob, long long val)
-+void Ebml_WriteLen(EbmlGlobal *glob, int64_t val)
- {
--    //TODO check and make sure we are not > than 0x0100000000000000LLU
--    unsigned char size = 8; //size in bytes to output
--    unsigned long long minVal = LITERALU64(0x00000000000000ff); //mask to compare for byte size
-+    /* TODO check and make sure we are not > than 0x0100000000000000LLU */
-+    unsigned char size = 8; /* size in bytes to output */
-+
-+    /* mask to compare for byte size */
-+    int64_t minVal = 0xff;
- 
-     for (size = 1; size < 8; size ++)
-     {
-@@ -32,7 +34,7 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val)
-         minVal = (minVal << 7);
-     }
- 
--    val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7));
-+    val |= (((uint64_t)0x80) << ((size - 1) * 7));
- 
-     Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
- }
-@@ -40,23 +42,25 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val)
- void Ebml_WriteString(EbmlGlobal *glob, const char *str)
- {
-     const size_t size_ = strlen(str);
--    const unsigned long long  size = size_;
-+    const uint64_t  size = size_;
-     Ebml_WriteLen(glob, size);
--    //TODO: it's not clear from the spec whether the nul terminator
--    //should be serialized too.  For now we omit the null terminator.
--    Ebml_Write(glob, str, size);
-+    /* TODO: it's not clear from the spec whether the nul terminator
-+     * should be serialized too.  For now we omit the null terminator.
-+     */
-+    Ebml_Write(glob, str, (unsigned long)size);
- }
- 
- void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr)
- {
-     const size_t strlen = wcslen(wstr);
- 
--    //TODO: it's not clear from the spec whether the nul terminator
--    //should be serialized too.  For now we include it.
--    const unsigned long long  size = strlen;
-+    /* TODO: it's not clear from the spec whether the nul terminator
-+     * should be serialized too.  For now we include it.
-+     */
-+    const uint64_t  size = strlen;
- 
-     Ebml_WriteLen(glob, size);
--    Ebml_Write(glob, wstr, size);
-+    Ebml_Write(glob, wstr, (unsigned long)size);
- }
- 
- void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id)
-@@ -85,12 +89,12 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t
- 
- void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui)
- {
--    unsigned char size = 8; //size in bytes to output
-+    unsigned char size = 8; /* size in bytes to output */
-     unsigned char sizeSerialized = 0;
-     unsigned long minVal;
- 
-     Ebml_WriteID(glob, class_id);
--    minVal = 0x7fLU; //mask to compare for byte size
-+    minVal = 0x7fLU; /* mask to compare for byte size */
- 
-     for (size = 1; size < 4; size ++)
-     {
-@@ -106,7 +110,7 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l
-     Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
-     Ebml_Serialize(glob, &ui, sizeof(ui), size);
- }
--//TODO: perhaps this is a poor name for this id serializer helper function
-+/* TODO: perhaps this is a poor name for this id serializer helper function */
- void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin)
- {
-     int size;
-@@ -168,4 +172,4 @@ void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize)
-     }
- }
- 
--//TODO Serialize Date
-+/* TODO Serialize Date */
-diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h
-index 324c9bc..b94f757 100644
---- a/libmkv/EbmlWriter.h
-+++ b/libmkv/EbmlWriter.h
-@@ -1,26 +1,30 @@
-+/*
-+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
- #ifndef EBMLWRITER_HPP
- #define EBMLWRITER_HPP
--
--// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
--//
--// Use of this source code is governed by a BSD-style license
--// that can be found in the LICENSE file in the root of the source
--// tree. An additional intellectual property rights grant can be found
--// in the file PATENTS.  All contributing project authors may
--// be found in the AUTHORS file in the root of the source tree.
--
--//note: you must define write and serialize functions as well as your own EBML_GLOBAL
--//These functions MUST be implemented
- #include <stddef.h>
- #include "vpx/vpx_integer.h"
- 
-+/* note: you must define write and serialize functions as well as your own
-+ * EBML_GLOBAL
-+ *
-+ * These functions MUST be implemented
-+ */
-+
- typedef struct EbmlGlobal EbmlGlobal;
- void  Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long);
- void  Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
--/////
- 
-+/*****/
- 
--void Ebml_WriteLen(EbmlGlobal *glob, long long val);
-+void Ebml_WriteLen(EbmlGlobal *glob, int64_t val);
- void Ebml_WriteString(EbmlGlobal *glob, const char *str);
- void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr);
- void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id);
-@@ -28,11 +32,11 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t
- void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
- void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
- void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d);
--//TODO make this more generic to signed
-+/* TODO make this more generic to signed */
- void Ebml_WriteSigned16(EbmlGlobal *glob, short val);
- void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s);
- void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s);
- void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length);
- void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize);
--//TODO need date function
-+/* TODO need date function */
- #endif
-diff --git a/libs.mk b/libs.mk
-index e2ba737..4115dd8 100644
---- a/libs.mk
-+++ b/libs.mk
-@@ -20,8 +20,16 @@ endif
- CODEC_SRCS-yes += CHANGELOG
- CODEC_SRCS-yes += libs.mk
- 
-+# If this is a universal (fat) binary, then all the subarchitectures have
-+# already been built and our job is to stitch them together. The
-+# BUILD_LIBVPX variable indicates whether we should be building
-+# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
-+# that we're stitching.
-+$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
-+
- include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
- CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
-+CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS))
- 
- include $(SRC_PATH_BARE)/vpx_mem/vpx_mem.mk
- CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS))
-@@ -29,17 +37,17 @@ CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS))
- include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk
- CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))
- 
-+include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk
-+CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))
-+
- 
- ifeq ($(CONFIG_VP8_ENCODER),yes)
-   VP8_PREFIX=vp8/
-   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk
-   CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))
-   CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS))
--  CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h
--  CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8cx_arm.mk
-   INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
-   INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
--  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
-   CODEC_DOC_SECTIONS += vp8 vp8_encoder
- endif
- 
-@@ -48,10 +56,8 @@ ifeq ($(CONFIG_VP8_DECODER),yes)
-   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk
-   CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS))
-   CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS))
--  CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h
-   INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
-   INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
--  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
-   CODEC_DOC_SECTIONS += vp8 vp8_decoder
- endif
- 
-@@ -66,6 +72,7 @@ endif
- 
- ifeq ($(CONFIG_MSVS),yes)
- CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)
-+GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd)
- # This variable uses deferred expansion intentionally, since the results of
- # $(wildcard) may change during the course of the Make.
- VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d))))
-@@ -82,29 +89,10 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Release/%)
- INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Debug/%)
- endif
- 
--# If this is a universal (fat) binary, then all the subarchitectures have
--# already been built and our job is to stitch them together. The
--# BUILD_LIBVPX variable indicates whether we should be building
--# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
--# that we're stitching.
--$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
--
- CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
- CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh
--CODEC_SRCS-$(BUILD_LIBVPX) += vpx/vpx_integer.h
--CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/asm_offsets.h
--CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_timer.h
--CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem.h
- CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
- INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
--ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
--CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emms.asm
--CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h
--CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
--CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c
--endif
--CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c
--CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm.h
- CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
- CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
- CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
-@@ -146,7 +134,7 @@ ifeq ($(CONFIG_MSVS),yes)
- obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c
- 	@cp $(SRC_PATH_BARE)/build/x86-msvs/obj_int_extract.bat .
- 	@echo "    [CREATE] $@"
--	$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
-+	$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
-     --exe \
-     --target=$(TOOLCHAIN) \
-     --name=obj_int_extract \
-@@ -162,14 +150,14 @@ PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat
- 
- vpx.def: $(call enabled,CODEC_EXPORTS)
- 	@echo "    [CREATE] $@"
--	$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
-+	$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
-             --name=vpx\
-             --out=$@ $^
- CLEAN-OBJS += vpx.def
- 
- vpx.vcproj: $(CODEC_SRCS) vpx.def
- 	@echo "    [CREATE] $@"
--	$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
-+	$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
- 			--lib \
- 			--target=$(TOOLCHAIN) \
-             $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
-@@ -242,6 +230,7 @@ vpx.pc: config.mk libs.mk
- 	$(qexec)echo 'Requires:' >> $@
- 	$(qexec)echo 'Conflicts:' >> $@
- 	$(qexec)echo 'Libs: -L$${libdir} -lvpx' >> $@
-+	$(qexec)echo 'Libs.private: -lm -lpthread' >> $@
- 	$(qexec)echo 'Cflags: -I$${includedir}' >> $@
- INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc
- INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
-@@ -284,38 +273,44 @@ OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
- 
- ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
-     $(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S
--	LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
-+	@echo "    [CREATE] $@"
-+	$(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
-     $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S: $(VP8_PREFIX)common/asm_com_offsets.c
-     CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S
- 
-     $(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S
--	LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
-+	@echo "    [CREATE] $@"
-+	$(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
-     $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: $(VP8_PREFIX)encoder/asm_enc_offsets.c
-     CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S
- 
-     $(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S
--	LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
-+	@echo "    [CREATE] $@"
-+	$(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
-     $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: $(VP8_PREFIX)decoder/asm_dec_offsets.c
-     CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S
- else
-   ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
-     asm_com_offsets.asm: obj_int_extract
-     asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o
--	./obj_int_extract rvds $< $(ADS2GAS) > $@
-+	@echo "    [CREATE] $@"
-+	$(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
-     OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o
-     CLEAN-OBJS += asm_com_offsets.asm
-     $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
- 
-     asm_enc_offsets.asm: obj_int_extract
-     asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
--	./obj_int_extract rvds $< $(ADS2GAS) > $@
-+	@echo "    [CREATE] $@"
-+	$(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
-     OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
-     CLEAN-OBJS += asm_enc_offsets.asm
-     $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
- 
-     asm_dec_offsets.asm: obj_int_extract
-     asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
--	./obj_int_extract rvds $< $(ADS2GAS) > $@
-+	@echo "    [CREATE] $@"
-+	$(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
-     OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
-     CLEAN-OBJS += asm_dec_offsets.asm
-     $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
-@@ -328,7 +323,6 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
- #
- # Rule to generate runtime cpu detection files
- #
--$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h
- $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS)))
- 	@echo "    [CREATE] $@"
- 	$(qexec)$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) \
-@@ -337,25 +331,43 @@ $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_S
-           $(RTCD_OPTIONS) $^ > $@
- CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h
- 
--CODEC_DOC_SRCS += vpx/vpx_codec.h \
--                  vpx/vpx_decoder.h \
--                  vpx/vpx_encoder.h \
--                  vpx/vpx_image.h
--
- ##
- ## libvpx test directives
- ##
--
- ifeq ($(CONFIG_UNIT_TESTS),yes)
-+LIBVPX_TEST_DATA_PATH ?= .
-+
-+include $(SRC_PATH_BARE)/test/test.mk
-+LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
-+LIBVPX_TEST_BINS=./test_libvpx
-+LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
-+                     $(call enabled,LIBVPX_TEST_DATA))
-+libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
-+
-+$(LIBVPX_TEST_DATA):
-+	@echo "    [DOWNLOAD] $@"
-+	$(qexec)trap 'rm -f $@' INT TERM &&\
-+            curl -L -o $@ $(call libvpx_test_data_url,$(@F))
-+
-+testdata:: $(LIBVPX_TEST_DATA)
-+	$(qexec)if [ -x "$$(which sha1sum)" ]; then\
-+            echo "Checking test data:";\
-+            if [ -n "$(LIBVPX_TEST_DATA)" ]; then\
-+                for f in $(call enabled,LIBVPX_TEST_DATA); do\
-+                    grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
-+                        (cd $(LIBVPX_TEST_DATA_PATH); sha1sum -c);\
-+                done; \
-+            fi; \
-+        else\
-+            echo "Skipping test data integrity check, sha1sum not found.";\
-+        fi
-+
- ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
- ifeq ($(CONFIG_MSVS),yes)
- 
--LIBVPX_TEST_SRCS=$(filter %_test.cc,$(call enabled,CODEC_SRCS))
--LIBVPX_TEST_BINS=$(sort $(LIBVPX_TEST_SRCS:.cc.o=))
--
- gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc
- 	@echo "    [CREATE] $@"
--	$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
-+	$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
-             --lib \
-             --target=$(TOOLCHAIN) \
-             $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
-@@ -368,27 +380,22 @@ gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc
- 
- PROJECTS-$(CONFIG_MSVS) += gtest.vcproj
- 
--define unit_test_vcproj_template
--$(notdir $(1:.cc=.vcproj)): $(SRC_PATH_BARE)/$(1)
--	@echo "    [vcproj] $$@"
--	$$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\
--            --exe\
--            --target=$$(TOOLCHAIN)\
--            --name=$(notdir $(1:.cc=))\
--            --ver=$$(CONFIG_VS_VERSION)\
--            $$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \
--            --out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \
-+test_libvpx.vcproj: $(LIBVPX_TEST_SRCS)
-+	@echo "    [CREATE] $@"
-+	$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
-+            --exe \
-+            --target=$(TOOLCHAIN) \
-+            --name=test_libvpx \
-+            --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
-+            --ver=$(CONFIG_VS_VERSION) \
-+            $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
-+            --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
-             -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
--            -L. -lvpxmt -lwinmm -lgtestmt $$^
--endef
-+            -L. -l$(CODEC_LIB) -lwinmm -l$(GTEST_LIB) $^
- 
--$(foreach proj,$(LIBVPX_TEST_BINS),\
--    $(eval $(call unit_test_vcproj_template,$(proj))))
-+PROJECTS-$(CONFIG_MSVS) += test_libvpx.vcproj
- 
--PROJECTS-$(CONFIG_MSVS) += $(foreach proj,$(LIBVPX_TEST_BINS),\
--     $(notdir $(proj:.cc=.vcproj)))
--
--test::
-+test:: testdata
- 	@set -e; for t in $(addprefix Win32/Release/,$(notdir $(LIBVPX_TEST_BINS:.cc=.exe))); do $$t; done
- endif
- else
-@@ -396,28 +403,35 @@ else
- include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk
- GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS))
- GTEST_OBJS=$(call objs,$(GTEST_SRCS))
--$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
--$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
- OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
- LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
- $(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS)
- 
--LIBVPX_TEST_SRCS=$(filter %_test.cc,$(call enabled,CODEC_SRCS))
--LIBVPX_TEST_OBJS=$(call objs,$(LIBVPX_TEST_SRCS))
--$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
--$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
--LIBVPX_TEST_BINS=$(sort $(LIBVPX_TEST_OBJS:.cc.o=))
-+LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS)))
-+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
- OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS)
-+BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS)
-+
-+# Install test sources only if codec source is included
-+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
-+    $(shell find $(SRC_PATH_BARE)/third_party/googletest -type f))
-+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS)
- 
-+CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-+CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
- $(foreach bin,$(LIBVPX_TEST_BINS),\
--    $(if $(BUILD_LIBVPX),$(eval $(bin): libvpx.a libgtest.a ))\
-+    $(if $(BUILD_LIBVPX),$(eval $(bin): \
-+        lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
-     $(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
--        $(bin).cc.o \
-+        $(LIBVPX_TEST_OBJS) \
-         -L. -lvpx -lgtest -lpthread -lm)\
-         )))\
-     $(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\
- 
--test:: $(LIBVPX_TEST_BINS)
-+test:: $(LIBVPX_TEST_BINS) testdata
- 	@set -e; for t in $(LIBVPX_TEST_BINS); do $$t; done
- 
- endif
-@@ -435,3 +449,6 @@ libs.doxy: $(CODEC_DOC_SRCS)
- 	@echo "PREDEFINED = VPX_CODEC_DISABLE_COMPAT" >> $@
- 	@echo "INCLUDE_PATH += ." >> $@;
- 	@echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@
-+
-+## Generate vpx_rtcd.h for all objects
-+$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h
-diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c
-index 63a0e83..cc87788 100644
---- a/nestegg/src/nestegg.c
-+++ b/nestegg/src/nestegg.c
-@@ -1272,7 +1272,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
-   if (total > block_size)
-     return -1;
- 
--  entry = ne_find_track_entry(ctx, track - 1);
-+  entry = ne_find_track_entry(ctx, (unsigned int)(track - 1));
-   if (!entry)
-     return -1;
- 
-@@ -1291,7 +1291,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
- 
-   pkt = ne_alloc(sizeof(*pkt));
-   pkt->track = track - 1;
--  pkt->timecode = abs_timecode * tc_scale * track_scale;
-+  pkt->timecode = (uint64_t)(abs_timecode * tc_scale * track_scale);
- 
-   ctx->log(ctx, NESTEGG_LOG_DEBUG, "%sblock t %lld pts %f f %llx frames: %llu",
-            block_id == ID_BLOCK ? "" : "simple", pkt->track, pkt->timecode / 1e9, flags, frames);
-@@ -1774,35 +1774,35 @@ nestegg_track_video_params(nestegg * ctx, unsigned int track,
- 
-   if (ne_get_uint(entry->video.pixel_width, &value) != 0)
-     return -1;
--  params->width = value;
-+  params->width = (unsigned int)value;
- 
-   if (ne_get_uint(entry->video.pixel_height, &value) != 0)
-     return -1;
--  params->height = value;
-+  params->height = (unsigned int)value;
- 
-   value = 0;
-   ne_get_uint(entry->video.pixel_crop_bottom, &value);
--  params->crop_bottom = value;
-+  params->crop_bottom = (unsigned int)value;
- 
-   value = 0;
-   ne_get_uint(entry->video.pixel_crop_top, &value);
--  params->crop_top = value;
-+  params->crop_top = (unsigned int)value;
- 
-   value = 0;
-   ne_get_uint(entry->video.pixel_crop_left, &value);
--  params->crop_left = value;
-+  params->crop_left = (unsigned int)value;
- 
-   value = 0;
-   ne_get_uint(entry->video.pixel_crop_right, &value);
--  params->crop_right = value;
-+  params->crop_right = (unsigned int)value;
- 
-   value = params->width;
-   ne_get_uint(entry->video.display_width, &value);
--  params->display_width = value;
-+  params->display_width = (unsigned int)value;
- 
-   value = params->height;
-   ne_get_uint(entry->video.display_height, &value);
--  params->display_height = value;
-+  params->display_height = (unsigned int)value;
- 
-   return 0;
- }
-@@ -1828,11 +1828,11 @@ nestegg_track_audio_params(nestegg * ctx, unsigned int track,
- 
-   value = 1;
-   ne_get_uint(entry->audio.channels, &value);
--  params->channels = value;
-+  params->channels = (unsigned int)value;
- 
-   value = 16;
-   ne_get_uint(entry->audio.bit_depth, &value);
--  params->depth = value;
-+  params->depth = (unsigned int)value;
- 
-   return 0;
- }
-@@ -1888,7 +1888,7 @@ nestegg_free_packet(nestegg_packet * pkt)
- int
- nestegg_packet_track(nestegg_packet * pkt, unsigned int * track)
- {
--  *track = pkt->track;
-+  *track = (unsigned int)pkt->track;
-   return 0;
- }
- 
-diff --git a/solution.mk b/solution.mk
-index 2de1d8d..948305f 100644
---- a/solution.mk
-+++ b/solution.mk
-@@ -8,18 +8,19 @@
- ##  be found in the AUTHORS file in the root of the source tree.
- ##
- 
-+# libvpx reverse dependencies (targets that depend on libvpx)
-+VPX_NONDEPS=$(addsuffix .vcproj,vpx gtest obj_int_extract)
-+VPX_RDEPS=$(foreach vcp,\
-+              $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.vcproj=):vpx)
- 
- vpx.sln: $(wildcard *.vcproj)
- 	@echo "    [CREATE] $@"
- 	$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
--            $(if $(filter %vpx.vcproj,$^),\
--                $(foreach vcp,$(filter-out %vpx.vcproj %gtest.vcproj %obj_int_extract.vcproj,$^),\
--                  --dep=$(vcp:.vcproj=):vpx) \
--                $(foreach vcp,$(filter %_test.vcproj,$^),\
--                  --dep=$(vcp:.vcproj=):gtest)) \
--                  --dep=vpx:obj_int_extract \
--                  --ver=$(CONFIG_VS_VERSION)\
--                  --out=$@ $^
-+            $(if $(filter vpx.vcproj,$^),$(VPX_RDEPS)) \
-+            --dep=vpx:obj_int_extract \
-+            --dep=test_libvpx:gtest \
-+            --ver=$(CONFIG_VS_VERSION)\
-+            --out=$@ $^
- vpx.sln.mk: vpx.sln
- 	@true
- 
-diff --git a/test/acm_random.h b/test/acm_random.h
-new file mode 100644
-index 0000000..514894e
---- /dev/null
-+++ b/test/acm_random.h
-@@ -0,0 +1,53 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+#ifndef LIBVPX_TEST_ACM_RANDOM_H_
-+#define LIBVPX_TEST_ACM_RANDOM_H_
-+
-+#include <stdlib.h>
-+
-+#include "vpx/vpx_integer.h"
-+
-+namespace libvpx_test {
-+
-+class ACMRandom {
-+ public:
-+  ACMRandom() {
-+    Reset(DeterministicSeed());
-+  }
-+
-+  explicit ACMRandom(int seed) {
-+    Reset(seed);
-+  }
-+
-+  void Reset(int seed) {
-+    srand(seed);
-+  }
-+
-+  uint8_t Rand8(void) {
-+    return (rand() >> 8) & 0xff;
-+  }
-+
-+  int PseudoUniform(int range) {
-+    return (rand() >> 8) % range;
-+  }
-+
-+  int operator()(int n) {
-+    return PseudoUniform(n);
-+  }
-+
-+  static int DeterministicSeed(void) {
-+    return 0xbaba;
-+  }
-+};
-+
-+}  // namespace libvpx_test
-+
-+#endif  // LIBVPX_TEST_ACM_RANDOM_H_
-diff --git a/test/altref_test.cc b/test/altref_test.cc
-new file mode 100644
-index 0000000..ca05577
---- /dev/null
-+++ b/test/altref_test.cc
-@@ -0,0 +1,71 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "test/encode_test_driver.h"
-+#include "test/i420_video_source.h"
-+
-+namespace {
-+
-+// lookahead range: [kLookAheadMin, kLookAheadMax).
-+const int kLookAheadMin = 5;
-+const int kLookAheadMax = 26;
-+
-+class AltRefTest : public libvpx_test::EncoderTest,
-+    public ::testing::TestWithParam<int> {
-+ protected:
-+  AltRefTest() : altref_count_(0) {}
-+  virtual ~AltRefTest() {}
-+
-+  virtual void SetUp() {
-+    InitializeConfig();
-+    SetMode(libvpx_test::kTwoPassGood);
-+  }
-+
-+  virtual void BeginPassHook(unsigned int pass) {
-+    altref_count_ = 0;
-+  }
-+
-+  virtual bool Continue() const {
-+    return !HasFatalFailure() && !abort_;
-+  }
-+
-+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-+                                  libvpx_test::Encoder *encoder) {
-+    if (video->frame() == 1) {
-+      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
-+      encoder->Control(VP8E_SET_CPUUSED, 3);
-+    }
-+  }
-+
-+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-+    if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE) ++altref_count_;
-+  }
-+
-+  int altref_count() const { return altref_count_; }
-+
-+ private:
-+  int altref_count_;
-+};
-+
-+TEST_P(AltRefTest, MonotonicTimestamps) {
-+  const vpx_rational timebase = { 33333333, 1000000000 };
-+  cfg_.g_timebase = timebase;
-+  cfg_.rc_target_bitrate = 1000;
-+  cfg_.g_lag_in_frames = GetParam();
-+
-+  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-+                                     timebase.den, timebase.num, 0, 30);
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+  EXPECT_GE(altref_count(), 1);
-+}
-+
-+INSTANTIATE_TEST_CASE_P(NonZeroLag, AltRefTest,
-+                        ::testing::Range(kLookAheadMin, kLookAheadMax));
-+}  // namespace
-diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc
-new file mode 100644
-index 0000000..4e21be8
---- /dev/null
-+++ b/test/boolcoder_test.cc
-@@ -0,0 +1,90 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+extern "C" {
-+#include "vp8/encoder/boolhuff.h"
-+#include "vp8/decoder/dboolhuff.h"
-+}
-+
-+#include <math.h>
-+#include <stddef.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <sys/types.h>
-+
-+#include "test/acm_random.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "vpx/vpx_integer.h"
-+
-+namespace {
-+const int num_tests = 10;
-+}  // namespace
-+
-+using libvpx_test::ACMRandom;
-+
-+TEST(VP8, TestBitIO) {
-+  ACMRandom rnd(ACMRandom::DeterministicSeed());
-+  for (int n = 0; n < num_tests; ++n) {
-+    for (int method = 0; method <= 7; ++method) {   // we generate various proba
-+      const int bits_to_test = 1000;
-+      uint8_t probas[bits_to_test];
-+
-+      for (int i = 0; i < bits_to_test; ++i) {
-+        const int parity = i & 1;
-+        probas[i] =
-+            (method == 0) ? 0 : (method == 1) ? 255 :
-+            (method == 2) ? 128 :
-+            (method == 3) ? rnd.Rand8() :
-+            (method == 4) ? (parity ? 0 : 255) :
-+            // alternate between low and high proba:
-+            (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :
-+            (method == 6) ?
-+                (parity ? rnd(64) : 255 - rnd(64)) :
-+                (parity ? rnd(32) : 255 - rnd(32));
-+      }
-+      for (int bit_method = 0; bit_method <= 3; ++bit_method) {
-+        const int random_seed = 6432;
-+        const int buffer_size = 10000;
-+        ACMRandom bit_rnd(random_seed);
-+        BOOL_CODER bw;
-+        uint8_t bw_buffer[buffer_size];
-+        vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size);
-+
-+        int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
-+        for (int i = 0; i < bits_to_test; ++i) {
-+          if (bit_method == 2) {
-+            bit = (i & 1);
-+          } else if (bit_method == 3) {
-+            bit = bit_rnd(2);
-+          }
-+          vp8_encode_bool(&bw, bit, static_cast<int>(probas[i]));
-+        }
-+
-+        vp8_stop_encode(&bw);
-+
-+        BOOL_DECODER br;
-+        vp8dx_start_decode(&br, bw_buffer, buffer_size);
-+        bit_rnd.Reset(random_seed);
-+        for (int i = 0; i < bits_to_test; ++i) {
-+          if (bit_method == 2) {
-+            bit = (i & 1);
-+          } else if (bit_method == 3) {
-+            bit = bit_rnd(2);
-+          }
-+          GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit)
-+              << "pos: "<< i << " / " << bits_to_test
-+              << " bit_method: " << bit_method
-+              << " method: " << method;
-+        }
-+      }
-+    }
-+  }
-+}
-diff --git a/test/config_test.cc b/test/config_test.cc
-new file mode 100644
-index 0000000..c4da46e
---- /dev/null
-+++ b/test/config_test.cc
-@@ -0,0 +1,61 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "test/encode_test_driver.h"
-+#include "test/video_source.h"
-+
-+namespace {
-+
-+class ConfigTest : public ::libvpx_test::EncoderTest,
-+    public ::testing::TestWithParam<enum libvpx_test::TestMode> {
-+ public:
-+  ConfigTest() : frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {}
-+
-+ protected:
-+  virtual void SetUp() {
-+    InitializeConfig();
-+    SetMode(GetParam());
-+  }
-+
-+  virtual void BeginPassHook(unsigned int /*pass*/) {
-+    frame_count_in_ = 0;
-+    frame_count_out_ = 0;
-+  }
-+
-+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource* /*video*/) {
-+    ++frame_count_in_;
-+    abort_ |= (frame_count_in_ >= frame_count_max_);
-+  }
-+
-+  virtual void FramePktHook(const vpx_codec_cx_pkt_t* /*pkt*/) {
-+    ++frame_count_out_;
-+  }
-+
-+  virtual bool Continue() const {
-+    return !HasFatalFailure() && !abort_;
-+  }
-+
-+  unsigned int frame_count_in_;
-+  unsigned int frame_count_out_;
-+  unsigned int frame_count_max_;
-+};
-+
-+TEST_P(ConfigTest, LagIsDisabled) {
-+  frame_count_max_ = 2;
-+  cfg_.g_lag_in_frames = 15;
-+
-+  libvpx_test::DummyVideoSource video;
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+
-+  EXPECT_EQ(frame_count_in_, frame_count_out_);
-+}
-+
-+INSTANTIATE_TEST_CASE_P(OnePassModes, ConfigTest, ONE_PASS_TEST_MODES);
-+}  // namespace
-diff --git a/test/cq_test.cc b/test/cq_test.cc
-new file mode 100644
-index 0000000..42ee2a2
---- /dev/null
-+++ b/test/cq_test.cc
-@@ -0,0 +1,106 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#include <cmath>
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "test/encode_test_driver.h"
-+#include "test/i420_video_source.h"
-+
-+// CQ level range: [kCQLevelMin, kCQLevelMax).
-+const int kCQLevelMin = 4;
-+const int kCQLevelMax = 63;
-+const int kCQLevelStep = 8;
-+const int kCQTargetBitrate = 2000;
-+
-+namespace {
-+
-+class CQTest : public libvpx_test::EncoderTest,
-+    public ::testing::TestWithParam<int> {
-+ protected:
-+  CQTest() : cq_level_(GetParam()) { init_flags_ = VPX_CODEC_USE_PSNR; }
-+  virtual ~CQTest() {}
-+
-+  virtual void SetUp() {
-+    InitializeConfig();
-+    SetMode(libvpx_test::kTwoPassGood);
-+  }
-+
-+  virtual void BeginPassHook(unsigned int /*pass*/) {
-+    file_size_ = 0;
-+    psnr_ = 0.0;
-+    n_frames_ = 0;
-+  }
-+
-+  virtual bool Continue() const {
-+    return !HasFatalFailure() && !abort_;
-+  }
-+
-+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-+                                  libvpx_test::Encoder *encoder) {
-+    if (video->frame() == 1) {
-+      if (cfg_.rc_end_usage == VPX_CQ) {
-+        encoder->Control(VP8E_SET_CQ_LEVEL, cq_level_);
-+      }
-+      encoder->Control(VP8E_SET_CPUUSED, 3);
-+    }
-+  }
-+
-+  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
-+    psnr_ += pow(10.0, pkt->data.psnr.psnr[0] / 10.0);
-+    n_frames_++;
-+  }
-+
-+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-+    file_size_ += pkt->data.frame.sz;
-+  }
-+
-+  double GetLinearPSNROverBitrate() const {
-+    double avg_psnr = log10(psnr_ / n_frames_) * 10.0;
-+    return pow(10.0, avg_psnr / 10.0) / file_size_;
-+  }
-+
-+  int file_size() const { return file_size_; }
-+  int n_frames() const { return n_frames_; }
-+
-+ private:
-+  int cq_level_;
-+  int file_size_;
-+  double psnr_;
-+  int n_frames_;
-+};
-+
-+int prev_actual_bitrate = kCQTargetBitrate;
-+TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) {
-+  const vpx_rational timebase = { 33333333, 1000000000 };
-+  cfg_.g_timebase = timebase;
-+  cfg_.rc_target_bitrate = kCQTargetBitrate;
-+  cfg_.g_lag_in_frames = 25;
-+
-+  cfg_.rc_end_usage = VPX_CQ;
-+  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-+                                     timebase.den, timebase.num, 0, 30);
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+  const double cq_psnr_lin = GetLinearPSNROverBitrate();
-+  const int cq_actual_bitrate = file_size() * 8 * 30 / (n_frames() * 1000);
-+  EXPECT_LE(cq_actual_bitrate, kCQTargetBitrate);
-+  EXPECT_LE(cq_actual_bitrate, prev_actual_bitrate);
-+  prev_actual_bitrate = cq_actual_bitrate;
-+
-+  // try targeting the approximate same bitrate with VBR mode
-+  cfg_.rc_end_usage = VPX_VBR;
-+  cfg_.rc_target_bitrate = cq_actual_bitrate;
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+  const double vbr_psnr_lin = GetLinearPSNROverBitrate();
-+  EXPECT_GE(cq_psnr_lin, vbr_psnr_lin);
-+}
-+
-+INSTANTIATE_TEST_CASE_P(CQLevelRange, CQTest,
-+                        ::testing::Range(kCQLevelMin, kCQLevelMax,
-+                                         kCQLevelStep));
-+}  // namespace
-diff --git a/test/datarate_test.cc b/test/datarate_test.cc
-new file mode 100644
-index 0000000..6fbcb64
---- /dev/null
-+++ b/test/datarate_test.cc
-@@ -0,0 +1,178 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#include "test/encode_test_driver.h"
-+#include "test/i420_video_source.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+namespace {
-+
-+class DatarateTest : public ::libvpx_test::EncoderTest,
-+    public ::testing::TestWithParam<enum libvpx_test::TestMode> {
-+ protected:
-+  virtual void SetUp() {
-+    InitializeConfig();
-+    SetMode(GetParam());
-+    ResetModel();
-+  }
-+
-+  virtual void ResetModel() {
-+    last_pts_ = 0;
-+    bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
-+    frame_number_ = 0;
-+    first_drop_ = 0;
-+    bits_total_ = 0;
-+    duration_ = 0.0;
-+  }
-+
-+  virtual bool Continue() const {
-+    return !HasFatalFailure() && !abort_;
-+  }
-+
-+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-+                                  ::libvpx_test::Encoder *encoder) {
-+    const vpx_rational_t tb = video->timebase();
-+    timebase_ = static_cast<double>(tb.num) / tb.den;
-+    duration_ = 0;
-+  }
-+
-+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-+    // Time since last timestamp = duration.
-+    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
-+
-+    // TODO(jimbankoski): Remove these lines when the issue:
-+    // http://code.google.com/p/webm/issues/detail?id=496 is fixed.
-+    // For now the codec assumes buffer starts at starting buffer rate
-+    // plus one frame's time.
-+    if (last_pts_ == 0)
-+      duration = 1;
-+
-+    // Add to the buffer the bits we'd expect from a constant bitrate server.
-+    bits_in_buffer_model_ += duration * timebase_ * cfg_.rc_target_bitrate
-+        * 1000;
-+
-+    /* Test the buffer model here before subtracting the frame. Do so because
-+     * the way the leaky bucket model works in libvpx is to allow the buffer to
-+     * empty - and then stop showing frames until we've got enough bits to
-+     * show one. As noted in comment below (issue 495), this does not currently
-+     * apply to key frames. For now exclude key frames in condition below. */
-+    bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true: false;
-+    if (!key_frame) {
-+      ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
-+          << pkt->data.frame.pts;
-+    }
-+
-+    const int frame_size_in_bits = pkt->data.frame.sz * 8;
-+
-+    // Subtract from the buffer the bits associated with a played back frame.
-+    bits_in_buffer_model_ -= frame_size_in_bits;
-+
-+    // Update the running total of bits for end of test datarate checks.
-+    bits_total_ += frame_size_in_bits ;
-+
-+    // If first drop not set and we have a drop set it to this time.
-+    if (!first_drop_ && duration > 1)
-+      first_drop_ = last_pts_ + 1;
-+
-+    // Update the most recent pts.
-+    last_pts_ = pkt->data.frame.pts;
-+
-+    // We update this so that we can calculate the datarate minus the last
-+    // frame encoded in the file.
-+    bits_in_last_frame_ = frame_size_in_bits;
-+
-+    ++frame_number_;
-+  }
-+
-+  virtual void EndPassHook(void) {
-+    if (bits_total_) {
-+      const double file_size_in_kb = bits_total_ / 1000;  /* bits per kilobit */
-+
-+      duration_ = (last_pts_ + 1) * timebase_;
-+
-+      // Effective file datarate includes the time spent prebuffering.
-+      effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0
-+          / (cfg_.rc_buf_initial_sz / 1000.0 + duration_);
-+
-+      file_datarate_ = file_size_in_kb / duration_;
-+    }
-+  }
-+
-+  vpx_codec_pts_t last_pts_;
-+  int bits_in_buffer_model_;
-+  double timebase_;
-+  int frame_number_;
-+  vpx_codec_pts_t first_drop_;
-+  int64_t bits_total_;
-+  double duration_;
-+  double file_datarate_;
-+  double effective_datarate_;
-+  int bits_in_last_frame_;
-+};
-+
-+TEST_P(DatarateTest, BasicBufferModel) {
-+  cfg_.rc_buf_initial_sz = 500;
-+  cfg_.rc_dropframe_thresh = 1;
-+  cfg_.rc_max_quantizer = 56;
-+  cfg_.rc_end_usage = VPX_CBR;
-+  // 2 pass cbr datarate control has a bug hidden by the small # of
-+  // frames selected in this encode. The problem is that even if the buffer is
-+  // negative we produce a keyframe on a cutscene. Ignoring datarate
-+  // constraints
-+  // TODO(jimbankoski): ( Fix when issue
-+  // http://code.google.com/p/webm/issues/detail?id=495 is addressed. )
-+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-+                                       30, 1, 0, 140);
-+
-+  // There is an issue for low bitrates in real-time mode, where the
-+  // effective_datarate slightly overshoots the target bitrate.
-+  // This is same the issue as noted about (#495).
-+  // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100),
-+  // when the issue is resolved.
-+  for (int i = 100; i < 800; i += 200) {
-+    cfg_.rc_target_bitrate = i;
-+    ResetModel();
-+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_)
-+        << " The datarate for the file exceeds the target!";
-+
-+    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
-+        << " The datarate for the file missed the target!";
-+  }
-+}
-+
-+TEST_P(DatarateTest, ChangingDropFrameThresh) {
-+  cfg_.rc_buf_initial_sz = 500;
-+  cfg_.rc_max_quantizer = 36;
-+  cfg_.rc_end_usage = VPX_CBR;
-+  cfg_.rc_target_bitrate = 200;
-+  cfg_.kf_mode = VPX_KF_DISABLED;
-+
-+  const int frame_count = 40;
-+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-+                                       30, 1, 0, frame_count);
-+
-+  // Here we check that the first dropped frame gets earlier and earlier
-+  // as the drop frame threshold is increased.
-+
-+  const int kDropFrameThreshTestStep = 30;
-+  vpx_codec_pts_t last_drop = frame_count;
-+  for (int i = 1; i < 91; i += kDropFrameThreshTestStep) {
-+    cfg_.rc_dropframe_thresh = i;
-+    ResetModel();
-+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+    ASSERT_LE(first_drop_, last_drop)
-+        << " The first dropped frame for drop_thresh " << i
-+        << " > first dropped frame for drop_thresh "
-+        << i - kDropFrameThreshTestStep;
-+    last_drop = first_drop_;
-+  }
-+}
-+
-+INSTANTIATE_TEST_CASE_P(AllModes, DatarateTest, ALL_TEST_MODES);
-+}  // namespace
-diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc
-new file mode 100644
-index 0000000..84afe7f
---- /dev/null
-+++ b/test/decode_test_driver.cc
-@@ -0,0 +1,48 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#include "test/decode_test_driver.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "test/register_state_check.h"
-+#include "test/video_source.h"
-+
-+namespace libvpx_test {
-+#if CONFIG_VP8_DECODER
-+void Decoder::DecodeFrame(const uint8_t *cxdata, int size) {
-+  if (!decoder_.priv) {
-+    const vpx_codec_err_t res_init = vpx_codec_dec_init(&decoder_,
-+                                                        &vpx_codec_vp8_dx_algo,
-+                                                        &cfg_, 0);
-+    ASSERT_EQ(VPX_CODEC_OK, res_init) << DecodeError();
-+  }
-+
-+  vpx_codec_err_t res_dec;
-+  REGISTER_STATE_CHECK(res_dec = vpx_codec_decode(&decoder_,
-+                                                  cxdata, size, NULL, 0));
-+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << DecodeError();
-+}
-+
-+void DecoderTest::RunLoop(CompressedVideoSource *video) {
-+  vpx_codec_dec_cfg_t dec_cfg = {0};
-+  Decoder decoder(dec_cfg, 0);
-+
-+  // Decode frames.
-+  for (video->Begin(); video->cxdata(); video->Next()) {
-+    decoder.DecodeFrame(video->cxdata(), video->frame_size());
-+
-+    DxDataIterator dec_iter = decoder.GetDxData();
-+    const vpx_image_t *img = NULL;
-+
-+    // Get decompressed data
-+    while ((img = dec_iter.Next()))
-+      DecompressedFrameHook(*img, video->frame_number());
-+  }
-+}
-+#endif
-+}  // namespace libvpx_test
-diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h
-new file mode 100644
-index 0000000..6408bee
---- /dev/null
-+++ b/test/decode_test_driver.h
-@@ -0,0 +1,97 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+#ifndef TEST_DECODE_TEST_DRIVER_H_
-+#define TEST_DECODE_TEST_DRIVER_H_
-+#include <cstring>
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "vpx_config.h"
-+#include "vpx/vpx_decoder.h"
-+#include "vpx/vp8dx.h"
-+
-+namespace libvpx_test {
-+
-+class CompressedVideoSource;
-+
-+// Provides an object to handle decoding output
-+class DxDataIterator {
-+ public:
-+  explicit DxDataIterator(vpx_codec_ctx_t *decoder)
-+      : decoder_(decoder), iter_(NULL) {}
-+
-+  const vpx_image_t *Next() {
-+    return vpx_codec_get_frame(decoder_, &iter_);
-+  }
-+
-+ private:
-+  vpx_codec_ctx_t  *decoder_;
-+  vpx_codec_iter_t  iter_;
-+};
-+
-+// Provides a simplified interface to manage one video decoding.
-+//
-+// TODO: similar to Encoder class, the exact services should be
-+// added as more tests are added.
-+class Decoder {
-+ public:
-+  Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
-+      : cfg_(cfg), deadline_(deadline) {
-+    memset(&decoder_, 0, sizeof(decoder_));
-+  }
-+
-+  ~Decoder() {
-+    vpx_codec_destroy(&decoder_);
-+  }
-+
-+  void DecodeFrame(const uint8_t *cxdata, int size);
-+
-+  DxDataIterator GetDxData() {
-+    return DxDataIterator(&decoder_);
-+  }
-+
-+  void set_deadline(unsigned long deadline) {
-+    deadline_ = deadline;
-+  }
-+
-+  void Control(int ctrl_id, int arg) {
-+    const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
-+    ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
-+  }
-+
-+ protected:
-+  const char *DecodeError() {
-+    const char *detail = vpx_codec_error_detail(&decoder_);
-+    return detail ? detail : vpx_codec_error(&decoder_);
-+  }
-+
-+  vpx_codec_ctx_t     decoder_;
-+  vpx_codec_dec_cfg_t cfg_;
-+  unsigned int        deadline_;
-+};
-+
-+// Common test functionality for all Decoder tests.
-+class DecoderTest {
-+ public:
-+  // Main loop.
-+  virtual void RunLoop(CompressedVideoSource *video);
-+
-+  // Hook to be called on every decompressed frame.
-+  virtual void DecompressedFrameHook(const vpx_image_t& img,
-+                                     const unsigned int frame_number) {}
-+
-+ protected:
-+  DecoderTest() {}
-+
-+  virtual ~DecoderTest() {}
-+};
-+
-+}  // namespace libvpx_test
-+
-+#endif  // TEST_DECODE_TEST_DRIVER_H_
-diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc
-new file mode 100644
-index 0000000..56339ca
---- /dev/null
-+++ b/test/encode_test_driver.cc
-@@ -0,0 +1,206 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#include "vpx_config.h"
-+#include "test/encode_test_driver.h"
-+#if CONFIG_VP8_DECODER
-+#include "test/decode_test_driver.h"
-+#endif
-+#include "test/register_state_check.h"
-+#include "test/video_source.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+
-+namespace libvpx_test {
-+void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) {
-+  if (video->img())
-+    EncodeFrameInternal(*video, frame_flags);
-+  else
-+    Flush();
-+
-+  // Handle twopass stats
-+  CxDataIterator iter = GetCxData();
-+
-+  while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {
-+    if (pkt->kind != VPX_CODEC_STATS_PKT)
-+      continue;
-+
-+    stats_->Append(*pkt);
-+  }
-+}
-+
-+void Encoder::EncodeFrameInternal(const VideoSource &video,
-+                                  const unsigned long frame_flags) {
-+  vpx_codec_err_t res;
-+  const vpx_image_t *img = video.img();
-+
-+  // Handle first frame initialization
-+  if (!encoder_.priv) {
-+    cfg_.g_w = img->d_w;
-+    cfg_.g_h = img->d_h;
-+    cfg_.g_timebase = video.timebase();
-+    cfg_.rc_twopass_stats_in = stats_->buf();
-+    res = vpx_codec_enc_init(&encoder_, &vpx_codec_vp8_cx_algo, &cfg_,
-+                             init_flags_);
-+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-+  }
-+
-+  // Handle frame resizing
-+  if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) {
-+    cfg_.g_w = img->d_w;
-+    cfg_.g_h = img->d_h;
-+    res = vpx_codec_enc_config_set(&encoder_, &cfg_);
-+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-+  }
-+
-+  // Encode the frame
-+  REGISTER_STATE_CHECK(
-+      res = vpx_codec_encode(&encoder_,
-+                             video.img(), video.pts(), video.duration(),
-+                             frame_flags, deadline_));
-+  ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-+}
-+
-+void Encoder::Flush() {
-+  const vpx_codec_err_t res = vpx_codec_encode(&encoder_, NULL, 0, 0, 0,
-+                                               deadline_);
-+  ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-+}
-+
-+void EncoderTest::SetMode(TestMode mode) {
-+  switch (mode) {
-+    case kRealTime:
-+      deadline_ = VPX_DL_REALTIME;
-+      break;
-+
-+    case kOnePassGood:
-+    case kTwoPassGood:
-+      deadline_ = VPX_DL_GOOD_QUALITY;
-+      break;
-+
-+    case kOnePassBest:
-+    case kTwoPassBest:
-+      deadline_ = VPX_DL_BEST_QUALITY;
-+      break;
-+
-+    default:
-+      ASSERT_TRUE(false) << "Unexpected mode " << mode;
-+  }
-+
-+  if (mode == kTwoPassGood || mode == kTwoPassBest)
-+    passes_ = 2;
-+  else
-+    passes_ = 1;
-+}
-+// The function should return "true" most of the time, therefore no early
-+// break-out is implemented within the match checking process.
-+static bool compare_img(const vpx_image_t *img1,
-+                        const vpx_image_t *img2) {
-+  bool match = (img1->fmt == img2->fmt) &&
-+               (img1->d_w == img2->d_w) &&
-+               (img1->d_h == img2->d_h);
-+
-+  const unsigned int width_y  = img1->d_w;
-+  const unsigned int height_y = img1->d_h;
-+  unsigned int i;
-+  for (i = 0; i < height_y; ++i)
-+    match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
-+                     img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
-+                     width_y) == 0) && match;
-+  const unsigned int width_uv  = (img1->d_w + 1) >> 1;
-+  const unsigned int height_uv = (img1->d_h + 1) >> 1;
-+  for (i = 0; i <  height_uv; ++i)
-+    match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
-+                     img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
-+                     width_uv) == 0) && match;
-+  for (i = 0; i < height_uv; ++i)
-+    match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
-+                     img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
-+                     width_uv) == 0) && match;
-+  return match;
-+}
-+
-+void EncoderTest::RunLoop(VideoSource *video) {
-+#if CONFIG_VP8_DECODER
-+  vpx_codec_dec_cfg_t dec_cfg = {0};
-+#endif
-+
-+  stats_.Reset();
-+
-+  for (unsigned int pass = 0; pass < passes_; pass++) {
-+    last_pts_ = 0;
-+
-+    if (passes_ == 1)
-+      cfg_.g_pass = VPX_RC_ONE_PASS;
-+    else if (pass == 0)
-+      cfg_.g_pass = VPX_RC_FIRST_PASS;
-+    else
-+      cfg_.g_pass = VPX_RC_LAST_PASS;
-+
-+    BeginPassHook(pass);
-+    Encoder encoder(cfg_, deadline_, init_flags_, &stats_);
-+#if CONFIG_VP8_DECODER
-+    Decoder decoder(dec_cfg, 0);
-+    bool has_cxdata = false;
-+#endif
-+    bool again;
-+    for (again = true, video->Begin(); again; video->Next()) {
-+      again = video->img() != NULL;
-+
-+      PreEncodeFrameHook(video);
-+      PreEncodeFrameHook(video, &encoder);
-+      encoder.EncodeFrame(video, frame_flags_);
-+
-+      CxDataIterator iter = encoder.GetCxData();
-+
-+      while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {
-+        again = true;
-+
-+        switch (pkt->kind) {
-+          case VPX_CODEC_CX_FRAME_PKT:
-+#if CONFIG_VP8_DECODER
-+            has_cxdata = true;
-+            decoder.DecodeFrame((const uint8_t*)pkt->data.frame.buf,
-+                                pkt->data.frame.sz);
-+#endif
-+            ASSERT_GE(pkt->data.frame.pts, last_pts_);
-+            last_pts_ = pkt->data.frame.pts;
-+            FramePktHook(pkt);
-+            break;
-+
-+          case VPX_CODEC_PSNR_PKT:
-+            PSNRPktHook(pkt);
-+            break;
-+
-+          default:
-+            break;
-+        }
-+      }
-+
-+#if CONFIG_VP8_DECODER
-+      if (has_cxdata) {
-+        const vpx_image_t *img_enc = encoder.GetPreviewFrame();
-+        DxDataIterator dec_iter = decoder.GetDxData();
-+        const vpx_image_t *img_dec = dec_iter.Next();
-+        if(img_enc && img_dec) {
-+          const bool res = compare_img(img_enc, img_dec);
-+          ASSERT_TRUE(res)<< "Encoder/Decoder mismatch found.";
-+        }
-+      }
-+#endif
-+      if (!Continue())
-+        break;
-+    }
-+
-+    EndPassHook();
-+
-+    if (!Continue())
-+      break;
-+  }
-+}
-+}  // namespace libvpx_test
-diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h
-new file mode 100644
-index 0000000..0141fa9
---- /dev/null
-+++ b/test/encode_test_driver.h
-@@ -0,0 +1,197 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#ifndef TEST_ENCODE_TEST_DRIVER_H_
-+#define TEST_ENCODE_TEST_DRIVER_H_
-+#include <string>
-+#include <vector>
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "vpx/vpx_encoder.h"
-+#include "vpx/vp8cx.h"
-+
-+namespace libvpx_test {
-+
-+class VideoSource;
-+
-+enum TestMode {
-+  kRealTime,
-+  kOnePassGood,
-+  kOnePassBest,
-+  kTwoPassGood,
-+  kTwoPassBest
-+};
-+#define ALL_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \
-+                                         ::libvpx_test::kOnePassGood, \
-+                                         ::libvpx_test::kOnePassBest, \
-+                                         ::libvpx_test::kTwoPassGood, \
-+                                         ::libvpx_test::kTwoPassBest)
-+
-+#define ONE_PASS_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \
-+                                              ::libvpx_test::kOnePassGood, \
-+                                              ::libvpx_test::kOnePassBest)
-+
-+
-+// Provides an object to handle the libvpx get_cx_data() iteration pattern
-+class CxDataIterator {
-+ public:
-+  explicit CxDataIterator(vpx_codec_ctx_t *encoder)
-+    : encoder_(encoder), iter_(NULL) {}
-+
-+  const vpx_codec_cx_pkt_t *Next() {
-+    return vpx_codec_get_cx_data(encoder_, &iter_);
-+  }
-+
-+ private:
-+  vpx_codec_ctx_t  *encoder_;
-+  vpx_codec_iter_t  iter_;
-+};
-+
-+// Implements an in-memory store for libvpx twopass statistics
-+class TwopassStatsStore {
-+ public:
-+  void Append(const vpx_codec_cx_pkt_t &pkt) {
-+    buffer_.append(reinterpret_cast<char *>(pkt.data.twopass_stats.buf),
-+                   pkt.data.twopass_stats.sz);
-+  }
-+
-+  vpx_fixed_buf_t buf() {
-+    const vpx_fixed_buf_t buf = { &buffer_[0], buffer_.size() };
-+    return buf;
-+  }
-+
-+  void Reset() {
-+    buffer_.clear();
-+  }
-+
-+ protected:
-+  std::string  buffer_;
-+};
-+
-+
-+// Provides a simplified interface to manage one video encoding pass, given
-+// a configuration and video source.
-+//
-+// TODO(jkoleszar): The exact services it provides and the appropriate
-+// level of abstraction will be fleshed out as more tests are written.
-+class Encoder {
-+ public:
-+  Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline,
-+          const unsigned long init_flags, TwopassStatsStore *stats)
-+    : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) {
-+    memset(&encoder_, 0, sizeof(encoder_));
-+  }
-+
-+  ~Encoder() {
-+    vpx_codec_destroy(&encoder_);
-+  }
-+
-+  CxDataIterator GetCxData() {
-+    return CxDataIterator(&encoder_);
-+  }
-+
-+  const vpx_image_t *GetPreviewFrame() {
-+    return vpx_codec_get_preview_frame(&encoder_);
-+  }
-+  // This is a thin wrapper around vpx_codec_encode(), so refer to
-+  // vpx_encoder.h for its semantics.
-+  void EncodeFrame(VideoSource *video, const unsigned long frame_flags);
-+
-+  // Convenience wrapper for EncodeFrame()
-+  void EncodeFrame(VideoSource *video) {
-+    EncodeFrame(video, 0);
-+  }
-+
-+  void Control(int ctrl_id, int arg) {
-+    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
-+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-+  }
-+
-+  void set_deadline(unsigned long deadline) {
-+    deadline_ = deadline;
-+  }
-+
-+ protected:
-+  const char *EncoderError() {
-+    const char *detail = vpx_codec_error_detail(&encoder_);
-+    return detail ? detail : vpx_codec_error(&encoder_);
-+  }
-+
-+  // Encode an image
-+  void EncodeFrameInternal(const VideoSource &video,
-+                           const unsigned long frame_flags);
-+
-+  // Flush the encoder on EOS
-+  void Flush();
-+
-+  vpx_codec_ctx_t      encoder_;
-+  vpx_codec_enc_cfg_t  cfg_;
-+  unsigned long        deadline_;
-+  unsigned long        init_flags_;
-+  TwopassStatsStore   *stats_;
-+};
-+
-+// Common test functionality for all Encoder tests.
-+//
-+// This class is a mixin which provides the main loop common to all
-+// encoder tests. It provides hooks which can be overridden by subclasses
-+// to implement each test's specific behavior, while centralizing the bulk
-+// of the boilerplate. Note that it doesn't inherit the gtest testing
-+// classes directly, so that tests can be parameterized differently.
-+class EncoderTest {
-+ protected:
-+  EncoderTest() : abort_(false), init_flags_(0), frame_flags_(0),
-+                  last_pts_(0) {}
-+
-+  virtual ~EncoderTest() {}
-+
-+  // Initialize the cfg_ member with the default configuration.
-+  void InitializeConfig() {
-+    const vpx_codec_err_t res = vpx_codec_enc_config_default(
-+                                    &vpx_codec_vp8_cx_algo, &cfg_, 0);
-+    ASSERT_EQ(VPX_CODEC_OK, res);
-+  }
-+
-+  // Map the TestMode enum to the deadline_ and passes_ variables.
-+  void SetMode(TestMode mode);
-+
-+  // Main loop.
-+  virtual void RunLoop(VideoSource *video);
-+
-+  // Hook to be called at the beginning of a pass.
-+  virtual void BeginPassHook(unsigned int pass) {}
-+
-+  // Hook to be called at the end of a pass.
-+  virtual void EndPassHook() {}
-+
-+  // Hook to be called before encoding a frame.
-+  virtual void PreEncodeFrameHook(VideoSource *video) {}
-+  virtual void PreEncodeFrameHook(VideoSource *video, Encoder *encoder) {}
-+
-+  // Hook to be called on every compressed data packet.
-+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {}
-+
-+  // Hook to be called on every PSNR packet.
-+  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {}
-+
-+  // Hook to determine whether the encode loop should continue.
-+  virtual bool Continue() const { return !abort_; }
-+
-+  bool                 abort_;
-+  vpx_codec_enc_cfg_t  cfg_;
-+  unsigned int         passes_;
-+  unsigned long        deadline_;
-+  TwopassStatsStore    stats_;
-+  unsigned long        init_flags_;
-+  unsigned long        frame_flags_;
-+  vpx_codec_pts_t      last_pts_;
-+};
-+
-+}  // namespace libvpx_test
-+
-+#endif  // TEST_ENCODE_TEST_DRIVER_H_
-diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc
-new file mode 100644
-index 0000000..25c6731
---- /dev/null
-+++ b/test/error_resilience_test.cc
-@@ -0,0 +1,90 @@
-+/*
-+  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+
-+  Use of this source code is governed by a BSD-style license
-+  that can be found in the LICENSE file in the root of the source
-+  tree. An additional intellectual property rights grant can be found
-+  in the file PATENTS.  All contributing project authors may
-+  be found in the AUTHORS file in the root of the source tree.
-+*/
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "test/encode_test_driver.h"
-+#include "test/i420_video_source.h"
-+
-+namespace {
-+
-+class ErrorResilienceTest : public libvpx_test::EncoderTest,
-+    public ::testing::TestWithParam<int> {
-+ protected:
-+  ErrorResilienceTest() {
-+    psnr_ = 0.0;
-+    nframes_ = 0;
-+    encoding_mode_ = static_cast<libvpx_test::TestMode>(GetParam());
-+  }
-+  virtual ~ErrorResilienceTest() {}
-+
-+  virtual void SetUp() {
-+    InitializeConfig();
-+    SetMode(encoding_mode_);
-+  }
-+
-+  virtual void BeginPassHook(unsigned int /*pass*/) {
-+    psnr_ = 0.0;
-+    nframes_ = 0;
-+  }
-+
-+  virtual bool Continue() const {
-+    return !HasFatalFailure() && !abort_;
-+  }
-+
-+  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
-+    psnr_ += pkt->data.psnr.psnr[0];
-+    nframes_++;
-+  }
-+
-+  double GetAveragePsnr() const {
-+    if (nframes_)
-+      return psnr_ / nframes_;
-+    return 0.0;
-+  }
-+
-+ private:
-+  double psnr_;
-+  unsigned int nframes_;
-+  libvpx_test::TestMode encoding_mode_;
-+};
-+
-+TEST_P(ErrorResilienceTest, OnVersusOff) {
-+  const vpx_rational timebase = { 33333333, 1000000000 };
-+  cfg_.g_timebase = timebase;
-+  cfg_.rc_target_bitrate = 2000;
-+  cfg_.g_lag_in_frames = 25;
-+
-+  init_flags_ = VPX_CODEC_USE_PSNR;
-+
-+  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-+                                     timebase.den, timebase.num, 0, 30);
-+
-+  // Error resilient mode OFF.
-+  cfg_.g_error_resilient = 0;
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+  const double psnr_resilience_off = GetAveragePsnr();
-+  EXPECT_GT(psnr_resilience_off, 25.0);
-+
-+  // Error resilient mode ON.
-+  cfg_.g_error_resilient = 1;
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+  const double psnr_resilience_on = GetAveragePsnr();
-+  EXPECT_GT(psnr_resilience_on, 25.0);
-+
-+  // Test that turning on error resilient mode hurts by 10% at most.
-+  if (psnr_resilience_off > 0.0) {
-+    const double psnr_ratio = psnr_resilience_on / psnr_resilience_off;
-+    EXPECT_GE(psnr_ratio, 0.9);
-+    EXPECT_LE(psnr_ratio, 1.1);
-+  }
-+}
-+
-+INSTANTIATE_TEST_CASE_P(OnOffTest, ErrorResilienceTest,
-+                        ONE_PASS_TEST_MODES);
-+}  // namespace
-diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
-new file mode 100644
-index 0000000..619b23d
---- /dev/null
-+++ b/test/fdct4x4_test.cc
-@@ -0,0 +1,169 @@
-+/*
-+*  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+*
-+*  Use of this source code is governed by a BSD-style license
-+*  that can be found in the LICENSE file in the root of the source
-+*  tree. An additional intellectual property rights grant can be found
-+*  in the file PATENTS.  All contributing project authors may
-+*  be found in the AUTHORS file in the root of the source tree.
-+*/
-+
-+
-+#include <math.h>
-+#include <stddef.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <sys/types.h>
-+
-+
-+extern "C" {
-+#include "vpx_rtcd.h"
-+}
-+
-+#include "test/acm_random.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "vpx/vpx_integer.h"
-+
-+
-+namespace {
-+
-+const int cospi8sqrt2minus1 = 20091;
-+const int sinpi8sqrt2 = 35468;
-+
-+void reference_idct4x4(const int16_t *input, int16_t *output) {
-+  const int16_t *ip = input;
-+  int16_t *op = output;
-+
-+  for (int i = 0; i < 4; ++i) {
-+    const int a1 = ip[0] + ip[8];
-+    const int b1 = ip[0] - ip[8];
-+    const int temp1 = (ip[4] * sinpi8sqrt2) >> 16;
-+    const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
-+    const int c1 = temp1 - temp2;
-+    const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
-+    const int temp4 = (ip[12] * sinpi8sqrt2) >> 16;
-+    const int d1 = temp3 + temp4;
-+    op[0] = a1 + d1;
-+    op[12] = a1 - d1;
-+    op[4] = b1 + c1;
-+    op[8] = b1 - c1;
-+    ++ip;
-+    ++op;
-+  }
-+  ip = output;
-+  op = output;
-+  for (int i = 0; i < 4; ++i) {
-+    const int a1 = ip[0] + ip[2];
-+    const int b1 = ip[0] - ip[2];
-+    const int temp1 = (ip[1] * sinpi8sqrt2) >> 16;
-+    const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
-+    const int c1 = temp1 - temp2;
-+    const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
-+    const int temp4 = (ip[3] * sinpi8sqrt2) >> 16;
-+    const int d1 = temp3 + temp4;
-+    op[0] = (a1 + d1 + 4) >> 3;
-+    op[3] = (a1 - d1 + 4) >> 3;
-+    op[1] = (b1 + c1 + 4) >> 3;
-+    op[2] = (b1 - c1 + 4) >> 3;
-+    ip += 4;
-+    op += 4;
-+  }
-+}
-+
-+using libvpx_test::ACMRandom;
-+
-+TEST(Vp8FdctTest, SignBiasCheck) {
-+  ACMRandom rnd(ACMRandom::DeterministicSeed());
-+  int16_t test_input_block[16];
-+  int16_t test_output_block[16];
-+  const int pitch = 8;
-+  int count_sign_block[16][2];
-+  const int count_test_block = 1000000;
-+
-+  memset(count_sign_block, 0, sizeof(count_sign_block));
-+
-+  for (int i = 0; i < count_test_block; ++i) {
-+    // Initialize a test block with input range [-255, 255].
-+    for (int j = 0; j < 16; ++j)
-+      test_input_block[j] = rnd.Rand8() - rnd.Rand8();
-+
-+    vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
-+
-+    for (int j = 0; j < 16; ++j) {
-+      if (test_output_block[j] < 0)
-+        ++count_sign_block[j][0];
-+      else if (test_output_block[j] > 0)
-+        ++count_sign_block[j][1];
-+    }
-+  }
-+
-+  bool bias_acceptable = true;
-+  for (int j = 0; j < 16; ++j)
-+    bias_acceptable = bias_acceptable &&
-+    (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000);
-+
-+  EXPECT_EQ(true, bias_acceptable)
-+    << "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]";
-+
-+  memset(count_sign_block, 0, sizeof(count_sign_block));
-+
-+  for (int i = 0; i < count_test_block; ++i) {
-+    // Initialize a test block with input range [-15, 15].
-+    for (int j = 0; j < 16; ++j)
-+      test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
-+
-+    vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
-+
-+    for (int j = 0; j < 16; ++j) {
-+      if (test_output_block[j] < 0)
-+        ++count_sign_block[j][0];
-+      else if (test_output_block[j] > 0)
-+        ++count_sign_block[j][1];
-+    }
-+  }
-+
-+  bias_acceptable = true;
-+  for (int j = 0; j < 16; ++j)
-+    bias_acceptable = bias_acceptable &&
-+    (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000);
-+
-+  EXPECT_EQ(true, bias_acceptable)
-+    << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]";
-+};
-+
-+TEST(Vp8FdctTest, RoundTripErrorCheck) {
-+  ACMRandom rnd(ACMRandom::DeterministicSeed());
-+  int max_error = 0;
-+  double total_error = 0;
-+  const int count_test_block = 1000000;
-+  for (int i = 0; i < count_test_block; ++i) {
-+    int16_t test_input_block[16];
-+    int16_t test_temp_block[16];
-+    int16_t test_output_block[16];
-+
-+    // Initialize a test block with input range [-255, 255].
-+    for (int j = 0; j < 16; ++j)
-+      test_input_block[j] = rnd.Rand8() - rnd.Rand8();
-+
-+    const int pitch = 8;
-+    vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
-+    reference_idct4x4(test_temp_block, test_output_block);
-+
-+    for (int j = 0; j < 16; ++j) {
-+      const int diff = test_input_block[j] - test_output_block[j];
-+      const int error = diff * diff;
-+      if (max_error < error)
-+        max_error = error;
-+      total_error += error;
-+    }
-+  }
-+
-+  EXPECT_GE(1, max_error )
-+    << "Error: FDCT/IDCT has an individual roundtrip error > 1";
-+
-+  EXPECT_GE(count_test_block, total_error)
-+    << "Error: FDCT/IDCT has average roundtrip error > 1 per block";
-+};
-+
-+}  // namespace
-diff --git a/test/i420_video_source.h b/test/i420_video_source.h
-new file mode 100644
-index 0000000..219bd33
---- /dev/null
-+++ b/test/i420_video_source.h
-@@ -0,0 +1,117 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#ifndef TEST_I420_VIDEO_SOURCE_H_
-+#define TEST_I420_VIDEO_SOURCE_H_
-+#include <cstdio>
-+#include <cstdlib>
-+
-+#include "test/video_source.h"
-+
-+namespace libvpx_test {
-+
-+// This class extends VideoSource to allow parsing of raw yv12
-+// so that we can do actual file encodes.
-+class I420VideoSource : public VideoSource {
-+ public:
-+  I420VideoSource(const std::string &file_name,
-+                  unsigned int width, unsigned int height,
-+                  int rate_numerator, int rate_denominator,
-+                  unsigned int start, int limit)
-+      : file_name_(file_name),
-+        input_file_(NULL),
-+        img_(NULL),
-+        start_(start),
-+        limit_(limit),
-+        frame_(0),
-+        width_(0),
-+        height_(0),
-+        framerate_numerator_(rate_numerator),
-+        framerate_denominator_(rate_denominator) {
-+
-+    // This initializes raw_sz_, width_, height_ and allocates an img.
-+    SetSize(width, height);
-+  }
-+
-+  virtual ~I420VideoSource() {
-+    vpx_img_free(img_);
-+    if (input_file_)
-+      fclose(input_file_);
-+  }
-+
-+  virtual void Begin() {
-+    if (input_file_)
-+      fclose(input_file_);
-+    input_file_ = OpenTestDataFile(file_name_);
-+    ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
-+        << file_name_;
-+    if (start_) {
-+      fseek(input_file_, raw_sz_ * start_, SEEK_SET);
-+    }
-+
-+    frame_ = start_;
-+    FillFrame();
-+  }
-+
-+  virtual void Next() {
-+    ++frame_;
-+    FillFrame();
-+  }
-+
-+  virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL;  }
-+
-+  // Models a stream where Timebase = 1/FPS, so pts == frame.
-+  virtual vpx_codec_pts_t pts() const { return frame_; }
-+
-+  virtual unsigned long duration() const { return 1; }
-+
-+  virtual vpx_rational_t timebase() const {
-+    const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
-+    return t;
-+  }
-+
-+  virtual unsigned int frame() const { return frame_; }
-+
-+  virtual unsigned int limit() const { return limit_; }
-+
-+  void SetSize(unsigned int width, unsigned int height) {
-+    if (width != width_ || height != height_) {
-+      vpx_img_free(img_);
-+      img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 1);
-+      ASSERT_TRUE(img_ != NULL);
-+      width_ = width;
-+      height_ = height;
-+      raw_sz_ = width * height * 3 / 2;
-+    }
-+  }
-+
-+  virtual void FillFrame() {
-+    // Read a frame from input_file.
-+    if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
-+      limit_ = frame_;
-+    }
-+  }
-+
-+ protected:
-+  std::string file_name_;
-+  FILE *input_file_;
-+  vpx_image_t *img_;
-+  size_t raw_sz_;
-+  unsigned int start_;
-+  unsigned int limit_;
-+  unsigned int frame_;
-+  unsigned int width_;
-+  unsigned int height_;
-+  unsigned int framerate_numerator_;
-+  unsigned int framerate_denominator_;
-+};
-+
-+}  // namespace libvpx_test
-+
-+#endif  // TEST_I420_VIDEO_SOURCE_H_
-diff --git a/test/idctllm_test.cc b/test/idctllm_test.cc
-new file mode 100644
-index 0000000..1be5fa0
---- /dev/null
-+++ b/test/idctllm_test.cc
-@@ -0,0 +1,126 @@
-+/*
-+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+
-+extern "C" {
-+#include "vpx_config.h"
-+#include "vpx_rtcd.h"
-+}
-+#include "test/register_state_check.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+
-+typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
-+                          int pred_stride, unsigned char *dst_ptr,
-+                          int dst_stride);
-+namespace {
-+class IDCTTest : public ::testing::TestWithParam<idct_fn_t>
-+{
-+  protected:
-+    virtual void SetUp()
-+    {
-+        int i;
-+
-+        UUT = GetParam();
-+        memset(input, 0, sizeof(input));
-+        /* Set up guard blocks */
-+        for(i=0; i<256; i++)
-+            output[i] = ((i&0xF)<4&&(i<64))?0:-1;
-+    }
-+
-+    idct_fn_t UUT;
-+    short input[16];
-+    unsigned char output[256];
-+    unsigned char predict[256];
-+};
-+
-+TEST_P(IDCTTest, TestGuardBlocks)
-+{
-+    int i;
-+
-+    for(i=0; i<256; i++)
-+        if((i&0xF) < 4 && i<64)
-+            EXPECT_EQ(0, output[i]) << i;
-+        else
-+            EXPECT_EQ(255, output[i]);
-+}
-+
-+TEST_P(IDCTTest, TestAllZeros)
-+{
-+    int i;
-+
-+    REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
-+
-+    for(i=0; i<256; i++)
-+        if((i&0xF) < 4 && i<64)
-+            EXPECT_EQ(0, output[i]) << "i==" << i;
-+        else
-+            EXPECT_EQ(255, output[i]) << "i==" << i;
-+}
-+
-+TEST_P(IDCTTest, TestAllOnes)
-+{
-+    int i;
-+
-+    input[0] = 4;
-+    REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
-+
-+    for(i=0; i<256; i++)
-+        if((i&0xF) < 4 && i<64)
-+            EXPECT_EQ(1, output[i]) << "i==" << i;
-+        else
-+            EXPECT_EQ(255, output[i]) << "i==" << i;
-+}
-+
-+TEST_P(IDCTTest, TestAddOne)
-+{
-+    int i;
-+
-+    for(i=0; i<256; i++)
-+        predict[i] = i;
-+
-+    input[0] = 4;
-+    REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
-+
-+    for(i=0; i<256; i++)
-+        if((i&0xF) < 4 && i<64)
-+            EXPECT_EQ(i+1, output[i]) << "i==" << i;
-+        else
-+            EXPECT_EQ(255, output[i]) << "i==" << i;
-+}
-+
-+TEST_P(IDCTTest, TestWithData)
-+{
-+    int i;
-+
-+    for(i=0; i<16; i++)
-+        input[i] = i;
-+
-+    REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
-+
-+    for(i=0; i<256; i++)
-+        if((i&0xF) > 3 || i>63)
-+            EXPECT_EQ(255, output[i]) << "i==" << i;
-+        else if(i == 0)
-+            EXPECT_EQ(11, output[i]) << "i==" << i;
-+        else if(i == 34)
-+            EXPECT_EQ(1, output[i]) << "i==" << i;
-+        else if(i == 2 || i == 17 || i == 32)
-+            EXPECT_EQ(3, output[i]) << "i==" << i;
-+        else
-+            EXPECT_EQ(0, output[i]) << "i==" << i;
-+}
-+
-+INSTANTIATE_TEST_CASE_P(C, IDCTTest,
-+                        ::testing::Values(vp8_short_idct4x4llm_c));
-+#if HAVE_MMX
-+INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
-+                        ::testing::Values(vp8_short_idct4x4llm_mmx));
-+#endif
-+}
-diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc
-new file mode 100644
-index 0000000..4c16c3f
---- /dev/null
-+++ b/test/intrapred_test.cc
-@@ -0,0 +1,357 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+
-+#include <string.h>
-+#include "test/acm_random.h"
-+#include "test/register_state_check.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+extern "C" {
-+#include "vpx_config.h"
-+#include "vpx_rtcd.h"
-+#include "vp8/common/blockd.h"
-+#include "vpx_mem/vpx_mem.h"
-+}
-+
-+namespace {
-+
-+using libvpx_test::ACMRandom;
-+
-+class IntraPredBase {
-+ protected:
-+  void SetupMacroblock(uint8_t *data, int block_size, int stride,
-+                       int num_planes) {
-+    memset(&mb_, 0, sizeof(mb_));
-+    memset(&mi_, 0, sizeof(mi_));
-+    mb_.up_available = 1;
-+    mb_.left_available = 1;
-+    mb_.mode_info_context = &mi_;
-+    stride_ = stride;
-+    block_size_ = block_size;
-+    num_planes_ = num_planes;
-+    for (int p = 0; p < num_planes; p++)
-+      data_ptr_[p] = data + stride * (block_size + 1) * p +
-+                     stride + block_size;
-+  }
-+
-+  void FillRandom() {
-+    // Fill edges with random data
-+    ACMRandom rnd(ACMRandom::DeterministicSeed());
-+    for (int p = 0; p < num_planes_; p++) {
-+      for (int x = -1 ; x <= block_size_; x++)
-+        data_ptr_[p][x - stride_] = rnd.Rand8();
-+      for (int y = 0; y < block_size_; y++)
-+        data_ptr_[p][y * stride_ - 1] = rnd.Rand8();
-+    }
-+  }
-+
-+  virtual void Predict(MB_PREDICTION_MODE mode) = 0;
-+
-+  void SetLeftUnavailable() {
-+    mb_.left_available = 0;
-+    for (int p = 0; p < num_planes_; p++)
-+      for (int i = -1; i < block_size_; ++i)
-+        data_ptr_[p][stride_ * i - 1] = 129;
-+  }
-+
-+  void SetTopUnavailable() {
-+    mb_.up_available = 0;
-+    for (int p = 0; p < num_planes_; p++)
-+      memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
-+  }
-+
-+  void SetTopLeftUnavailable() {
-+    SetLeftUnavailable();
-+    SetTopUnavailable();
-+  }
-+
-+  int BlockSizeLog2Min1() const {
-+    switch (block_size_) {
-+      case 16:
-+        return 3;
-+      case 8:
-+        return 2;
-+      default:
-+        return 0;
-+    }
-+  }
-+
-+  // check DC prediction output against a reference
-+  void CheckDCPrediction() const {
-+    for (int p = 0; p < num_planes_; p++) {
-+      // calculate expected DC
-+      int expected;
-+      if (mb_.up_available || mb_.left_available) {
-+        int sum = 0, shift = BlockSizeLog2Min1() + mb_.up_available +
-+                             mb_.left_available;
-+        if (mb_.up_available)
-+          for (int x = 0; x < block_size_; x++)
-+            sum += data_ptr_[p][x - stride_];
-+        if (mb_.left_available)
-+          for (int y = 0; y < block_size_; y++)
-+            sum += data_ptr_[p][y * stride_ - 1];
-+        expected = (sum + (1 << (shift - 1))) >> shift;
-+      } else
-+        expected = 0x80;
-+
-+      // check that all subsequent lines are equal to the first
-+      for (int y = 1; y < block_size_; ++y)
-+        ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
-+                            block_size_));
-+      // within the first line, ensure that each pixel has the same value
-+      for (int x = 1; x < block_size_; ++x)
-+        ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]);
-+      // now ensure that that pixel has the expected (DC) value
-+      ASSERT_EQ(expected, data_ptr_[p][0]);
-+    }
-+  }
-+
-+  // check V prediction output against a reference
-+  void CheckVPrediction() const {
-+    // check that all lines equal the top border
-+    for (int p = 0; p < num_planes_; p++)
-+      for (int y = 0; y < block_size_; y++)
-+        ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_],
-+                            &data_ptr_[p][y * stride_], block_size_));
-+  }
-+
-+  // check H prediction output against a reference
-+  void CheckHPrediction() const {
-+    // for each line, ensure that each pixel is equal to the left border
-+    for (int p = 0; p < num_planes_; p++)
-+      for (int y = 0; y < block_size_; y++)
-+        for (int x = 0; x < block_size_; x++)
-+          ASSERT_EQ(data_ptr_[p][-1 + y * stride_],
-+                    data_ptr_[p][x + y * stride_]);
-+  }
-+
-+  static int ClipByte(int value) {
-+    if (value > 255)
-+      return 255;
-+    else if (value < 0)
-+      return 0;
-+    return value;
-+  }
-+
-+  // check TM prediction output against a reference
-+  void CheckTMPrediction() const {
-+    for (int p = 0; p < num_planes_; p++)
-+      for (int y = 0; y < block_size_; y++)
-+        for (int x = 0; x < block_size_; x++) {
-+          const int expected = ClipByte(data_ptr_[p][x - stride_]
-+                                      + data_ptr_[p][stride_ * y - 1]
-+                                      - data_ptr_[p][-1 - stride_]);
-+          ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]);
-+       }
-+  }
-+
-+  // Actual test
-+  void RunTest() {
-+    {
-+      SCOPED_TRACE("DC_PRED");
-+      FillRandom();
-+      Predict(DC_PRED);
-+      CheckDCPrediction();
-+    }
-+    {
-+      SCOPED_TRACE("DC_PRED LEFT");
-+      FillRandom();
-+      SetLeftUnavailable();
-+      Predict(DC_PRED);
-+      CheckDCPrediction();
-+    }
-+    {
-+      SCOPED_TRACE("DC_PRED TOP");
-+      FillRandom();
-+      SetTopUnavailable();
-+      Predict(DC_PRED);
-+      CheckDCPrediction();
-+    }
-+    {
-+      SCOPED_TRACE("DC_PRED TOP_LEFT");
-+      FillRandom();
-+      SetTopLeftUnavailable();
-+      Predict(DC_PRED);
-+      CheckDCPrediction();
-+    }
-+    {
-+      SCOPED_TRACE("H_PRED");
-+      FillRandom();
-+      Predict(H_PRED);
-+      CheckHPrediction();
-+    }
-+    {
-+      SCOPED_TRACE("V_PRED");
-+      FillRandom();
-+      Predict(V_PRED);
-+      CheckVPrediction();
-+    }
-+    {
-+      SCOPED_TRACE("TM_PRED");
-+      FillRandom();
-+      Predict(TM_PRED);
-+      CheckTMPrediction();
-+    }
-+  }
-+
-+  MACROBLOCKD mb_;
-+  MODE_INFO mi_;
-+  uint8_t *data_ptr_[2];  // in the case of Y, only [0] is used
-+  int stride_;
-+  int block_size_;
-+  int num_planes_;
-+};
-+
-+typedef void (*intra_pred_y_fn_t)(MACROBLOCKD *x,
-+                                  uint8_t *yabove_row,
-+                                  uint8_t *yleft,
-+                                  int left_stride,
-+                                  uint8_t *ypred_ptr,
-+                                  int y_stride);
-+
-+class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
-+    protected IntraPredBase {
-+ public:
-+  static void SetUpTestCase() {
-+    data_array_ = reinterpret_cast<uint8_t*>(
-+        vpx_memalign(kDataAlignment, kDataBufferSize));
-+  }
-+
-+  static void TearDownTestCase() {
-+    vpx_free(data_array_);
-+    data_array_ = NULL;
-+  }
-+
-+ protected:
-+  static const int kBlockSize = 16;
-+  static const int kDataAlignment = 16;
-+  static const int kStride = kBlockSize * 3;
-+  // We use 48 so that the data pointer of the first pixel in each row of
-+  // each macroblock is 16-byte aligned, and this gives us access to the
-+  // top-left and top-right corner pixels belonging to the top-left/right
-+  // macroblocks.
-+  // We use 17 lines so we have one line above us for top-prediction.
-+  static const int kDataBufferSize = kStride * (kBlockSize + 1);
-+
-+  virtual void SetUp() {
-+    pred_fn_ = GetParam();
-+    SetupMacroblock(data_array_, kBlockSize, kStride, 1);
-+  }
-+
-+  virtual void Predict(MB_PREDICTION_MODE mode) {
-+    mb_.mode_info_context->mbmi.mode = mode;
-+    REGISTER_STATE_CHECK(pred_fn_(&mb_,
-+                                  data_ptr_[0] - kStride,
-+                                  data_ptr_[0] - 1, kStride,
-+                                  data_ptr_[0], kStride));
-+  }
-+
-+  intra_pred_y_fn_t pred_fn_;
-+  static uint8_t* data_array_;
-+};
-+
-+uint8_t* IntraPredYTest::data_array_ = NULL;
-+
-+TEST_P(IntraPredYTest, IntraPredTests) {
-+  RunTest();
-+}
-+
-+INSTANTIATE_TEST_CASE_P(C, IntraPredYTest,
-+                        ::testing::Values(
-+                            vp8_build_intra_predictors_mby_s_c));
-+#if HAVE_SSE2
-+INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest,
-+                        ::testing::Values(
-+                            vp8_build_intra_predictors_mby_s_sse2));
-+#endif
-+#if HAVE_SSSE3
-+INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest,
-+                        ::testing::Values(
-+                            vp8_build_intra_predictors_mby_s_ssse3));
-+#endif
-+
-+typedef void (*intra_pred_uv_fn_t)(MACROBLOCKD *x,
-+                                   uint8_t *uabove_row,
-+                                   uint8_t *vabove_row,
-+                                   uint8_t *uleft,
-+                                   uint8_t *vleft,
-+                                   int left_stride,
-+                                   uint8_t *upred_ptr,
-+                                   uint8_t *vpred_ptr,
-+                                   int pred_stride);
-+
-+class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
-+    protected IntraPredBase {
-+ public:
-+  static void SetUpTestCase() {
-+    data_array_ = reinterpret_cast<uint8_t*>(
-+        vpx_memalign(kDataAlignment, kDataBufferSize));
-+  }
-+
-+  static void TearDownTestCase() {
-+    vpx_free(data_array_);
-+    data_array_ = NULL;
-+  }
-+
-+ protected:
-+  static const int kBlockSize = 8;
-+  static const int kDataAlignment = 8;
-+  static const int kStride = kBlockSize * 3;
-+  // We use 24 so that the data pointer of the first pixel in each row of
-+  // each macroblock is 8-byte aligned, and this gives us access to the
-+  // top-left and top-right corner pixels belonging to the top-left/right
-+  // macroblocks.
-+  // We use 9 lines so we have one line above us for top-prediction.
-+  // [0] = U, [1] = V
-+  static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1);
-+
-+  virtual void SetUp() {
-+    pred_fn_ = GetParam();
-+    SetupMacroblock(data_array_, kBlockSize, kStride, 2);
-+  }
-+
-+  virtual void Predict(MB_PREDICTION_MODE mode) {
-+    mb_.mode_info_context->mbmi.uv_mode = mode;
-+    pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
-+             data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
-+             data_ptr_[0], data_ptr_[1], kStride);
-+  }
-+
-+  intra_pred_uv_fn_t pred_fn_;
-+  // We use 24 so that the data pointer of the first pixel in each row of
-+  // each macroblock is 8-byte aligned, and this gives us access to the
-+  // top-left and top-right corner pixels belonging to the top-left/right
-+  // macroblocks.
-+  // We use 9 lines so we have one line above us for top-prediction.
-+  // [0] = U, [1] = V
-+  static uint8_t* data_array_;
-+};
-+
-+uint8_t* IntraPredUVTest::data_array_ = NULL;
-+
-+TEST_P(IntraPredUVTest, IntraPredTests) {
-+  RunTest();
-+}
-+
-+INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest,
-+                        ::testing::Values(
-+                            vp8_build_intra_predictors_mbuv_s_c));
-+#if HAVE_SSE2
-+INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest,
-+                        ::testing::Values(
-+                            vp8_build_intra_predictors_mbuv_s_sse2));
-+#endif
-+#if HAVE_SSSE3
-+INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest,
-+                        ::testing::Values(
-+                            vp8_build_intra_predictors_mbuv_s_ssse3));
-+#endif
-+
-+}  // namespace
-diff --git a/test/ivf_video_source.h b/test/ivf_video_source.h
-new file mode 100644
-index 0000000..48c3a7d
---- /dev/null
-+++ b/test/ivf_video_source.h
-@@ -0,0 +1,109 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#ifndef TEST_IVF_VIDEO_SOURCE_H_
-+#define TEST_IVF_VIDEO_SOURCE_H_
-+#include <cstdio>
-+#include <cstdlib>
-+#include <new>
-+#include <string>
-+#include "test/video_source.h"
-+
-+namespace libvpx_test {
-+const unsigned int kCodeBufferSize = 256 * 1024;
-+const unsigned int kIvfFileHdrSize = 32;
-+const unsigned int kIvfFrameHdrSize = 12;
-+
-+static unsigned int MemGetLe32(const uint8_t *mem) {
-+  return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]);
-+}
-+
-+// This class extends VideoSource to allow parsing of ivf files,
-+// so that we can do actual file decodes.
-+class IVFVideoSource : public CompressedVideoSource {
-+ public:
-+  IVFVideoSource(const std::string &file_name)
-+      : file_name_(file_name),
-+        input_file_(NULL),
-+        compressed_frame_buf_(NULL),
-+        frame_sz_(0),
-+        frame_(0),
-+        end_of_file_(false) {
-+  }
-+
-+  virtual ~IVFVideoSource() {
-+    delete[] compressed_frame_buf_;
-+
-+    if (input_file_)
-+      fclose(input_file_);
-+  }
-+
-+  virtual void Init() {
-+    // Allocate a buffer for read in the compressed video frame.
-+    compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize];
-+    ASSERT_TRUE(compressed_frame_buf_) << "Allocate frame buffer failed";
-+  }
-+
-+  virtual void Begin() {
-+    input_file_ = OpenTestDataFile(file_name_);
-+    ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
-+        << file_name_;
-+
-+    // Read file header
-+    uint8_t file_hdr[kIvfFileHdrSize];
-+    ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_))
-+        << "File header read failed.";
-+    // Check file header
-+    ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' && file_hdr[2] == 'I'
-+                && file_hdr[3] == 'F') << "Input is not an IVF file.";
-+
-+    FillFrame();
-+  }
-+
-+  virtual void Next() {
-+    ++frame_;
-+    FillFrame();
-+  }
-+
-+  void FillFrame() {
-+    uint8_t frame_hdr[kIvfFrameHdrSize];
-+    // Check frame header and read a frame from input_file.
-+    if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_)
-+        != kIvfFrameHdrSize) {
-+      end_of_file_ = true;
-+    } else {
-+      end_of_file_ = false;
-+
-+      frame_sz_ = MemGetLe32(frame_hdr);
-+      ASSERT_LE(frame_sz_, kCodeBufferSize)
-+          << "Frame is too big for allocated code buffer";
-+      ASSERT_EQ(frame_sz_,
-+                fread(compressed_frame_buf_, 1, frame_sz_, input_file_))
-+          << "Failed to read complete frame";
-+    }
-+  }
-+
-+  virtual const uint8_t *cxdata() const {
-+    return end_of_file_ ? NULL : compressed_frame_buf_;
-+  }
-+  virtual const unsigned int frame_size() const { return frame_sz_; }
-+  virtual const unsigned int frame_number() const { return frame_; }
-+
-+ protected:
-+  std::string file_name_;
-+  FILE *input_file_;
-+  uint8_t *compressed_frame_buf_;
-+  unsigned int frame_sz_;
-+  unsigned int frame_;
-+  bool end_of_file_;
-+};
-+
-+}  // namespace libvpx_test
-+
-+#endif  // TEST_IVF_VIDEO_SOURCE_H_
-diff --git a/test/keyframe_test.cc b/test/keyframe_test.cc
-new file mode 100644
-index 0000000..d0c81df
---- /dev/null
-+++ b/test/keyframe_test.cc
-@@ -0,0 +1,145 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#include <climits>
-+#include <vector>
-+#include "test/encode_test_driver.h"
-+#include "test/i420_video_source.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+
-+namespace {
-+
-+class KeyframeTest : public ::libvpx_test::EncoderTest,
-+    public ::testing::TestWithParam<enum libvpx_test::TestMode> {
-+ protected:
-+  virtual void SetUp() {
-+    InitializeConfig();
-+    SetMode(GetParam());
-+    kf_count_ = 0;
-+    kf_count_max_ = INT_MAX;
-+    kf_do_force_kf_ = false;
-+    set_cpu_used_ = 0;
-+  }
-+
-+  virtual bool Continue() const {
-+    return !HasFatalFailure() && !abort_;
-+  }
-+
-+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-+                                  ::libvpx_test::Encoder *encoder) {
-+    if (kf_do_force_kf_)
-+      frame_flags_ = (video->frame() % 3) ? 0 : VPX_EFLAG_FORCE_KF;
-+    if (set_cpu_used_ && video->frame() == 1)
-+      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-+  }
-+
-+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-+    if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
-+      kf_pts_list_.push_back(pkt->data.frame.pts);
-+      kf_count_++;
-+      abort_ |= kf_count_ > kf_count_max_;
-+    }
-+  }
-+
-+  bool kf_do_force_kf_;
-+  int kf_count_;
-+  int kf_count_max_;
-+  std::vector<vpx_codec_pts_t> kf_pts_list_;
-+  int set_cpu_used_;
-+};
-+
-+TEST_P(KeyframeTest, TestRandomVideoSource) {
-+  // Validate that encoding the RandomVideoSource produces multiple keyframes.
-+  // This validates the results of the TestDisableKeyframes test.
-+  kf_count_max_ = 2;  // early exit successful tests.
-+
-+  ::libvpx_test::RandomVideoSource video;
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+
-+  // In realtime mode - auto placed keyframes are exceedingly rare,  don't
-+  // bother with this check   if(GetParam() > 0)
-+  if(GetParam() > 0)
-+    EXPECT_GT(kf_count_, 1);
-+}
-+
-+TEST_P(KeyframeTest, TestDisableKeyframes) {
-+  cfg_.kf_mode = VPX_KF_DISABLED;
-+  kf_count_max_ = 1;  // early exit failed tests.
-+
-+  ::libvpx_test::RandomVideoSource video;
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+
-+  EXPECT_EQ(1, kf_count_);
-+}
-+
-+TEST_P(KeyframeTest, TestForceKeyframe) {
-+  cfg_.kf_mode = VPX_KF_DISABLED;
-+  kf_do_force_kf_ = true;
-+
-+  ::libvpx_test::DummyVideoSource video;
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+
-+  // verify that every third frame is a keyframe.
-+  for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
-+       iter != kf_pts_list_.end(); ++iter) {
-+    ASSERT_EQ(0, *iter % 3) << "Unexpected keyframe at frame " << *iter;
-+  }
-+}
-+
-+TEST_P(KeyframeTest, TestKeyframeMaxDistance) {
-+  cfg_.kf_max_dist = 25;
-+
-+  ::libvpx_test::DummyVideoSource video;
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+
-+  // verify that keyframe interval matches kf_max_dist
-+  for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
-+       iter != kf_pts_list_.end(); ++iter) {
-+    ASSERT_EQ(0, *iter % 25) << "Unexpected keyframe at frame " << *iter;
-+  }
-+}
-+
-+TEST_P(KeyframeTest, TestAutoKeyframe) {
-+  cfg_.kf_mode = VPX_KF_AUTO;
-+  kf_do_force_kf_ = false;
-+
-+  // Force a deterministic speed step in Real Time mode, as the faster modes
-+  // may not produce a keyframe like we expect. This is necessary when running
-+  // on very slow environments (like Valgrind). The step -11 was determined
-+  // experimentally as the fastest mode that still throws the keyframe.
-+  if (deadline_ == VPX_DL_REALTIME)
-+    set_cpu_used_ = -11;
-+
-+  // This clip has a cut scene every 30 frames -> Frame 0, 30, 60, 90, 120.
-+  // I check only the first 40 frames to make sure there's a keyframe at frame
-+  // 0 and 30.
-+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-+                                       30, 1, 0, 40);
-+
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+
-+  // In realtime mode - auto placed keyframes are exceedingly rare,  don't
-+  // bother with this check
-+  if(GetParam() > 0)
-+    EXPECT_EQ(2u, kf_pts_list_.size()) << " Not the right number of keyframes ";
-+
-+  // Verify that keyframes match the file keyframes in the file.
-+  for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
-+       iter != kf_pts_list_.end(); ++iter) {
-+
-+    if (deadline_ == VPX_DL_REALTIME && *iter > 0)
-+      EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
-+        << *iter;
-+    else
-+      EXPECT_EQ(0, *iter % 30) << "Unexpected keyframe at frame " << *iter;
-+  }
-+}
-+
-+INSTANTIATE_TEST_CASE_P(AllModes, KeyframeTest, ALL_TEST_MODES);
-+}  // namespace
-diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc
-new file mode 100644
-index 0000000..9227449
---- /dev/null
-+++ b/test/pp_filter_test.cc
-@@ -0,0 +1,107 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#include "test/register_state_check.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+extern "C" {
-+#include "vpx_config.h"
-+#include "vpx_rtcd.h"
-+#include "vpx/vpx_integer.h"
-+#include "vpx_mem/vpx_mem.h"
-+}
-+
-+typedef void (*post_proc_func_t)(unsigned char *src_ptr,
-+                                 unsigned char *dst_ptr,
-+                                 int src_pixels_per_line,
-+                                 int dst_pixels_per_line,
-+                                 int cols,
-+                                 unsigned char *flimit,
-+                                 int size);
-+
-+namespace {
-+
-+class Vp8PostProcessingFilterTest
-+    : public ::testing::TestWithParam<post_proc_func_t> {};
-+
-+// Test routine for the VP8 post-processing function
-+// vp8_post_proc_down_and_across_mb_row_c.
-+
-+TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
-+  // Size of the underlying data block that will be filtered.
-+  const int block_width  = 16;
-+  const int block_height = 16;
-+
-+  // 5-tap filter needs 2 padding rows above and below the block in the input.
-+  const int input_width = block_width;
-+  const int input_height = block_height + 4;
-+  const int input_stride = input_width;
-+  const int input_size = input_width * input_height;
-+
-+  // Filter extends output block by 8 samples at left and right edges.
-+  const int output_width = block_width + 16;
-+  const int output_height = block_height;
-+  const int output_stride = output_width;
-+  const int output_size = output_width * output_height;
-+
-+  uint8_t *const src_image =
-+      reinterpret_cast<uint8_t*>(vpx_calloc(input_size, 1));
-+  uint8_t *const dst_image =
-+      reinterpret_cast<uint8_t*>(vpx_calloc(output_size, 1));
-+
-+  // Pointers to top-left pixel of block in the input and output images.
-+  uint8_t *const src_image_ptr = src_image + (input_stride << 1);
-+  uint8_t *const dst_image_ptr = dst_image + 8;
-+  uint8_t *const flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
-+  (void)vpx_memset(flimits, 255, block_width);
-+
-+  // Initialize pixels in the input:
-+  //   block pixels to value 1,
-+  //   border pixels to value 10.
-+  (void)vpx_memset(src_image, 10, input_size);
-+  uint8_t *pixel_ptr = src_image_ptr;
-+  for (int i = 0; i < block_height; ++i) {
-+    for (int j = 0; j < block_width; ++j) {
-+      pixel_ptr[j] = 1;
-+    }
-+    pixel_ptr += input_stride;
-+  }
-+
-+  // Initialize pixels in the output to 99.
-+  (void)vpx_memset(dst_image, 99, output_size);
-+
-+  REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr, input_stride,
-+                                  output_stride, block_width, flimits, 16));
-+
-+  static const uint8_t expected_data[block_height] = {
-+    4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4
-+  };
-+
-+  pixel_ptr = dst_image_ptr;
-+  for (int i = 0; i < block_height; ++i) {
-+    for (int j = 0; j < block_width; ++j) {
-+      EXPECT_EQ(expected_data[i], pixel_ptr[j])
-+          << "Vp8PostProcessingFilterTest failed with invalid filter output";
-+    }
-+    pixel_ptr += output_stride;
-+  }
-+
-+  vpx_free(src_image);
-+  vpx_free(dst_image);
-+  vpx_free(flimits);
-+};
-+
-+INSTANTIATE_TEST_CASE_P(C, Vp8PostProcessingFilterTest,
-+    ::testing::Values(vp8_post_proc_down_and_across_mb_row_c));
-+
-+#if HAVE_SSE2
-+INSTANTIATE_TEST_CASE_P(SSE2, Vp8PostProcessingFilterTest,
-+    ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2));
-+#endif
-+
-+}  // namespace
-diff --git a/test/register_state_check.h b/test/register_state_check.h
-new file mode 100644
-index 0000000..fb3f53b
---- /dev/null
-+++ b/test/register_state_check.h
-@@ -0,0 +1,95 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_
-+#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_
-+
-+#ifdef _WIN64
-+
-+#define _WIN32_LEAN_AND_MEAN
-+#include <windows.h>
-+#include <winnt.h>
-+
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+
-+namespace testing {
-+namespace internal {
-+
-+inline bool operator==(const M128A& lhs, const M128A& rhs) {
-+  return (lhs.Low == rhs.Low && lhs.High == rhs.High);
-+}
-+
-+}  // namespace internal
-+}  // namespace testing
-+
-+namespace libvpx_test {
-+
-+// Compares the state of xmm[6-15] at construction with their state at
-+// destruction. These registers should be preserved by the callee on
-+// Windows x64.
-+// Usage:
-+// {
-+//   RegisterStateCheck reg_check;
-+//   FunctionToVerify();
-+// }
-+class RegisterStateCheck {
-+ public:
-+  RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }
-+  ~RegisterStateCheck() { EXPECT_TRUE(Check()); }
-+
-+ private:
-+  static bool StoreRegisters(CONTEXT* const context) {
-+    const HANDLE this_thread = GetCurrentThread();
-+    EXPECT_TRUE(this_thread != NULL);
-+    context->ContextFlags = CONTEXT_FLOATING_POINT;
-+    const bool context_saved = GetThreadContext(this_thread, context) == TRUE;
-+    EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError();
-+    return context_saved;
-+  }
-+
-+  // Compares the register state. Returns true if the states match.
-+  bool Check() const {
-+    if (!initialized_) return false;
-+    CONTEXT post_context;
-+    if (!StoreRegisters(&post_context)) return false;
-+
-+    const M128A* xmm_pre = &pre_context_.Xmm6;
-+    const M128A* xmm_post = &post_context.Xmm6;
-+    for (int i = 6; i <= 15; ++i) {
-+      EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!";
-+      ++xmm_pre;
-+      ++xmm_post;
-+    }
-+    return !testing::Test::HasNonfatalFailure();
-+  }
-+
-+  bool initialized_;
-+  CONTEXT pre_context_;
-+};
-+
-+#define REGISTER_STATE_CHECK(statement) do { \
-+  libvpx_test::RegisterStateCheck reg_check; \
-+  statement;                               \
-+} while (false)
-+
-+}  // namespace libvpx_test
-+
-+#else  // !_WIN64
-+
-+namespace libvpx_test {
-+
-+class RegisterStateCheck {};
-+#define REGISTER_STATE_CHECK(statement) statement
-+
-+}  // namespace libvpx_test
-+
-+#endif  // _WIN64
-+
-+#endif  // LIBVPX_TEST_REGISTER_STATE_CHECK_H_
-diff --git a/test/resize_test.cc b/test/resize_test.cc
-new file mode 100644
-index 0000000..c846157
---- /dev/null
-+++ b/test/resize_test.cc
-@@ -0,0 +1,104 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#include <climits>
-+#include <vector>
-+#include "test/encode_test_driver.h"
-+#include "test/video_source.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+
-+namespace {
-+
-+const unsigned int kInitialWidth = 320;
-+const unsigned int kInitialHeight = 240;
-+
-+unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
-+  if (frame < 10)
-+    return val;
-+  if (frame < 20)
-+    return val / 2;
-+  if (frame < 30)
-+    return val * 2 / 3;
-+  if (frame < 40)
-+    return val / 4;
-+  if (frame < 50)
-+    return val * 7 / 8;
-+  return val;
-+}
-+
-+class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
-+ public:
-+  ResizingVideoSource() {
-+    SetSize(kInitialWidth, kInitialHeight);
-+    limit_ = 60;
-+  }
-+
-+ protected:
-+  virtual void Next() {
-+    ++frame_;
-+    SetSize(ScaleForFrameNumber(frame_, kInitialWidth),
-+            ScaleForFrameNumber(frame_, kInitialHeight));
-+    FillFrame();
-+  }
-+};
-+
-+class ResizeTest : public ::libvpx_test::EncoderTest,
-+  public ::testing::TestWithParam<enum libvpx_test::TestMode> {
-+ protected:
-+  struct FrameInfo {
-+    FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
-+        : pts(_pts), w(_w), h(_h) {}
-+
-+    vpx_codec_pts_t pts;
-+    unsigned int    w;
-+    unsigned int    h;
-+  };
-+
-+  virtual void SetUp() {
-+    InitializeConfig();
-+    SetMode(GetParam());
-+  }
-+
-+  virtual bool Continue() const {
-+    return !HasFatalFailure() && !abort_;
-+  }
-+
-+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-+    if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
-+      const unsigned char *buf =
-+          reinterpret_cast<const unsigned char *>(pkt->data.frame.buf);
-+      const unsigned int w = (buf[6] | (buf[7] << 8)) & 0x3fff;
-+      const unsigned int h = (buf[8] | (buf[9] << 8)) & 0x3fff;
-+
-+      frame_info_list_.push_back(FrameInfo(pkt->data.frame.pts, w, h));
-+    }
-+  }
-+
-+  std::vector< FrameInfo > frame_info_list_;
-+};
-+
-+TEST_P(ResizeTest, TestExternalResizeWorks) {
-+  ResizingVideoSource video;
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+
-+  for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
-+       info != frame_info_list_.end(); ++info) {
-+    const vpx_codec_pts_t pts = info->pts;
-+    const unsigned int expected_w = ScaleForFrameNumber(pts, kInitialWidth);
-+    const unsigned int expected_h = ScaleForFrameNumber(pts, kInitialHeight);
-+
-+    EXPECT_EQ(expected_w, info->w)
-+        << "Frame " << pts << "had unexpected width";
-+    EXPECT_EQ(expected_h, info->h)
-+        << "Frame " << pts << "had unexpected height";
-+  }
-+}
-+
-+INSTANTIATE_TEST_CASE_P(OnePass, ResizeTest, ONE_PASS_TEST_MODES);
-+}  // namespace
-diff --git a/test/sad_test.cc b/test/sad_test.cc
-new file mode 100644
-index 0000000..5a0653b
---- /dev/null
-+++ b/test/sad_test.cc
-@@ -0,0 +1,253 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+
-+#include <string.h>
-+#include <limits.h>
-+#include <stdio.h>
-+
-+extern "C" {
-+#include "./vpx_config.h"
-+#include "./vpx_rtcd.h"
-+#include "vp8/common/blockd.h"
-+#include "vpx_mem/vpx_mem.h"
-+}
-+
-+#include "test/acm_random.h"
-+#include "test/register_state_check.h"
-+#include "test/util.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+
-+
-+typedef unsigned int (*sad_m_by_n_fn_t)(const unsigned char *source_ptr,
-+                                        int source_stride,
-+                                        const unsigned char *reference_ptr,
-+                                        int reference_stride,
-+                                        unsigned int max_sad);
-+
-+using libvpx_test::ACMRandom;
-+
-+namespace {
-+class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) {
-+ public:
-+  static void SetUpTestCase() {
-+    source_data_ = reinterpret_cast<uint8_t*>(
-+        vpx_memalign(kDataAlignment, kDataBufferSize));
-+    reference_data_ = reinterpret_cast<uint8_t*>(
-+        vpx_memalign(kDataAlignment, kDataBufferSize));
-+  }
-+
-+  static void TearDownTestCase() {
-+    vpx_free(source_data_);
-+    source_data_ = NULL;
-+    vpx_free(reference_data_);
-+    reference_data_ = NULL;
-+  }
-+
-+ protected:
-+  static const int kDataAlignment = 16;
-+  static const int kDataBufferSize = 16 * 32;
-+
-+  virtual void SetUp() {
-+    sad_fn_ = GET_PARAM(2);
-+    height_ = GET_PARAM(1);
-+    width_ = GET_PARAM(0);
-+    source_stride_ = width_ * 2;
-+    reference_stride_ = width_ * 2;
-+    rnd_.Reset(ACMRandom::DeterministicSeed());
-+  }
-+
-+  sad_m_by_n_fn_t sad_fn_;
-+  virtual unsigned int SAD(unsigned int max_sad) {
-+    unsigned int ret;
-+    REGISTER_STATE_CHECK(ret = sad_fn_(source_data_, source_stride_,
-+                                       reference_data_, reference_stride_,
-+                                       max_sad));
-+    return ret;
-+  }
-+
-+  // Sum of Absolute Differences. Given two blocks, calculate the absolute
-+  // difference between two pixels in the same relative location; accumulate.
-+  unsigned int ReferenceSAD(unsigned int max_sad) {
-+    unsigned int sad = 0;
-+
-+    for (int h = 0; h < height_; ++h) {
-+      for (int w = 0; w < width_; ++w) {
-+        sad += abs(source_data_[h * source_stride_ + w]
-+               - reference_data_[h * reference_stride_ + w]);
-+      }
-+      if (sad > max_sad) {
-+        break;
-+      }
-+    }
-+    return sad;
-+  }
-+
-+  void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) {
-+    for (int h = 0; h < height_; ++h) {
-+      for (int w = 0; w < width_; ++w) {
-+        data[h * stride + w] = fill_constant;
-+      }
-+    }
-+  }
-+
-+  void FillRandom(uint8_t *data, int stride) {
-+    for (int h = 0; h < height_; ++h) {
-+      for (int w = 0; w < width_; ++w) {
-+        data[h * stride + w] = rnd_.Rand8();
-+      }
-+    }
-+  }
-+
-+  void CheckSad(unsigned int max_sad) {
-+    unsigned int reference_sad, exp_sad;
-+
-+    reference_sad = ReferenceSAD(max_sad);
-+    exp_sad = SAD(max_sad);
-+
-+    if (reference_sad <= max_sad) {
-+      ASSERT_EQ(exp_sad, reference_sad);
-+    } else {
-+      // Alternative implementations are not required to check max_sad
-+      ASSERT_GE(exp_sad, reference_sad);
-+    }
-+  }
-+
-+  // Handle blocks up to 16x16 with stride up to 32
-+  int height_, width_;
-+  static uint8_t* source_data_;
-+  int source_stride_;
-+  static uint8_t* reference_data_;
-+  int reference_stride_;
-+
-+  ACMRandom rnd_;
-+};
-+
-+uint8_t* SADTest::source_data_ = NULL;
-+uint8_t* SADTest::reference_data_ = NULL;
-+
-+TEST_P(SADTest, MaxRef) {
-+  FillConstant(source_data_, source_stride_, 0);
-+  FillConstant(reference_data_, reference_stride_, 255);
-+  CheckSad(UINT_MAX);
-+}
-+
-+TEST_P(SADTest, MaxSrc) {
-+  FillConstant(source_data_, source_stride_, 255);
-+  FillConstant(reference_data_, reference_stride_, 0);
-+  CheckSad(UINT_MAX);
-+}
-+
-+TEST_P(SADTest, ShortRef) {
-+  int tmp_stride = reference_stride_;
-+  reference_stride_ >>= 1;
-+  FillRandom(source_data_, source_stride_);
-+  FillRandom(reference_data_, reference_stride_);
-+  CheckSad(UINT_MAX);
-+  reference_stride_ = tmp_stride;
-+}
-+
-+TEST_P(SADTest, UnalignedRef) {
-+  // The reference frame, but not the source frame, may be unaligned for
-+  // certain types of searches.
-+  int tmp_stride = reference_stride_;
-+  reference_stride_ -= 1;
-+  FillRandom(source_data_, source_stride_);
-+  FillRandom(reference_data_, reference_stride_);
-+  CheckSad(UINT_MAX);
-+  reference_stride_ = tmp_stride;
-+}
-+
-+TEST_P(SADTest, ShortSrc) {
-+  int tmp_stride = source_stride_;
-+  source_stride_ >>= 1;
-+  FillRandom(source_data_, source_stride_);
-+  FillRandom(reference_data_, reference_stride_);
-+  CheckSad(UINT_MAX);
-+  source_stride_ = tmp_stride;
-+}
-+
-+TEST_P(SADTest, MaxSAD) {
-+  // Verify that, when max_sad is set, the implementation does not return a
-+  // value lower than the reference.
-+  FillConstant(source_data_, source_stride_, 255);
-+  FillConstant(reference_data_, reference_stride_, 0);
-+  CheckSad(128);
-+}
-+
-+using std::tr1::make_tuple;
-+
-+const sad_m_by_n_fn_t sad_16x16_c = vp8_sad16x16_c;
-+const sad_m_by_n_fn_t sad_8x16_c = vp8_sad8x16_c;
-+const sad_m_by_n_fn_t sad_16x8_c = vp8_sad16x8_c;
-+const sad_m_by_n_fn_t sad_8x8_c = vp8_sad8x8_c;
-+const sad_m_by_n_fn_t sad_4x4_c = vp8_sad4x4_c;
-+INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::Values(
-+                        make_tuple(16, 16, sad_16x16_c),
-+                        make_tuple(8, 16, sad_8x16_c),
-+                        make_tuple(16, 8, sad_16x8_c),
-+                        make_tuple(8, 8, sad_8x8_c),
-+                        make_tuple(4, 4, sad_4x4_c)));
-+
-+// ARM tests
-+#if HAVE_MEDIA
-+const sad_m_by_n_fn_t sad_16x16_armv6 = vp8_sad16x16_armv6;
-+INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values(
-+                        make_tuple(16, 16, sad_16x16_armv6)));
-+
-+#endif
-+#if HAVE_NEON
-+const sad_m_by_n_fn_t sad_16x16_neon = vp8_sad16x16_neon;
-+const sad_m_by_n_fn_t sad_8x16_neon = vp8_sad8x16_neon;
-+const sad_m_by_n_fn_t sad_16x8_neon = vp8_sad16x8_neon;
-+const sad_m_by_n_fn_t sad_8x8_neon = vp8_sad8x8_neon;
-+const sad_m_by_n_fn_t sad_4x4_neon = vp8_sad4x4_neon;
-+INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values(
-+                        make_tuple(16, 16, sad_16x16_neon),
-+                        make_tuple(8, 16, sad_8x16_neon),
-+                        make_tuple(16, 8, sad_16x8_neon),
-+                        make_tuple(8, 8, sad_8x8_neon),
-+                        make_tuple(4, 4, sad_4x4_neon)));
-+#endif
-+
-+// X86 tests
-+#if HAVE_MMX
-+const sad_m_by_n_fn_t sad_16x16_mmx = vp8_sad16x16_mmx;
-+const sad_m_by_n_fn_t sad_8x16_mmx = vp8_sad8x16_mmx;
-+const sad_m_by_n_fn_t sad_16x8_mmx = vp8_sad16x8_mmx;
-+const sad_m_by_n_fn_t sad_8x8_mmx = vp8_sad8x8_mmx;
-+const sad_m_by_n_fn_t sad_4x4_mmx = vp8_sad4x4_mmx;
-+INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::Values(
-+                        make_tuple(16, 16, sad_16x16_mmx),
-+                        make_tuple(8, 16, sad_8x16_mmx),
-+                        make_tuple(16, 8, sad_16x8_mmx),
-+                        make_tuple(8, 8, sad_8x8_mmx),
-+                        make_tuple(4, 4, sad_4x4_mmx)));
-+#endif
-+#if HAVE_SSE2
-+const sad_m_by_n_fn_t sad_16x16_wmt = vp8_sad16x16_wmt;
-+const sad_m_by_n_fn_t sad_8x16_wmt = vp8_sad8x16_wmt;
-+const sad_m_by_n_fn_t sad_16x8_wmt = vp8_sad16x8_wmt;
-+const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt;
-+const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
-+INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::Values(
-+                        make_tuple(16, 16, sad_16x16_wmt),
-+                        make_tuple(8, 16, sad_8x16_wmt),
-+                        make_tuple(16, 8, sad_16x8_wmt),
-+                        make_tuple(8, 8, sad_8x8_wmt),
-+                        make_tuple(4, 4, sad_4x4_wmt)));
-+#endif
-+#if HAVE_SSSE3
-+const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3;
-+INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
-+                        make_tuple(16, 16, sad_16x16_sse3)));
-+#endif
-+
-+}  // namespace
-diff --git a/test/set_roi.cc b/test/set_roi.cc
-new file mode 100644
-index 0000000..3b6112e
---- /dev/null
-+++ b/test/set_roi.cc
-@@ -0,0 +1,182 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+
-+#include <math.h>
-+#include <stddef.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <sys/types.h>
-+
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "vpx/vpx_integer.h"
-+#include "vpx_mem/vpx_mem.h"
-+extern "C" {
-+#include "vp8/encoder/onyx_int.h"
-+}
-+
-+namespace {
-+
-+TEST(Vp8RoiMapTest, ParameterCheck) {
-+  int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
-+  int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
-+  unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 };
-+
-+  const int internalq_trans[] = {
-+    0,   1,  2,  3,  4,  5,  7,  8,
-+    9,  10, 12, 13, 15, 17, 18, 19,
-+    20,  21, 23, 24, 25, 26, 27, 28,
-+    29,  30, 31, 33, 35, 37, 39, 41,
-+    43,  45, 47, 49, 51, 53, 55, 57,
-+    59,  61, 64, 67, 70, 73, 76, 79,
-+    82,  85, 88, 91, 94, 97, 100, 103,
-+    106, 109, 112, 115, 118, 121, 124, 127,
-+  };
-+
-+  // Initialize elements of cpi with valid defaults.
-+  VP8_COMP cpi;
-+  cpi.mb.e_mbd.mb_segement_abs_delta = SEGMENT_DELTADATA;
-+  cpi.cyclic_refresh_mode_enabled = 0;
-+  cpi.mb.e_mbd.segmentation_enabled = 0;
-+  cpi.mb.e_mbd.update_mb_segmentation_map = 0;
-+  cpi.mb.e_mbd.update_mb_segmentation_data = 0;
-+  cpi.common.mb_rows = 240 >> 4;
-+  cpi.common.mb_cols = 320 >> 4;
-+  const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols);
-+  vpx_memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));
-+
-+  // Segment map
-+  cpi.segmentation_map = reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
-+
-+  // Allocate memory for the source memory map.
-+  unsigned char *roi_map =
-+    reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
-+  vpx_memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
-+  vpx_memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
-+  vpx_memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));
-+
-+  // Do a test call with valid parameters.
-+  int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
-+                                  cpi.common.mb_cols, delta_q, delta_lf,
-+                                  threshold);
-+  EXPECT_EQ(0, roi_retval)
-+        << "vp8_set_roimap roi failed with default test parameters";
-+
-+  // Check that the values in the cpi structure get set as expected.
-+  if (roi_retval == 0) {
-+    // Check that the segment map got set.
-+    const int mapcompare = memcmp(roi_map, cpi.segmentation_map, mbs);
-+    EXPECT_EQ(0, mapcompare) << "segment map error";
-+
-+    // Check the q deltas (note the need to translate into
-+    // the interanl range of 0-127.
-+    for (int i = 0; i < MAX_MB_SEGMENTS; ++i) {
-+      const int transq = internalq_trans[abs(delta_q[i])];
-+      if (abs(cpi.segment_feature_data[MB_LVL_ALT_Q][i]) != transq) {
-+          EXPECT_EQ(transq, cpi.segment_feature_data[MB_LVL_ALT_Q][i])
-+                    << "segment delta_q  error";
-+          break;
-+      }
-+    }
-+
-+    // Check the loop filter deltas
-+    for (int i = 0; i < MAX_MB_SEGMENTS; ++i) {
-+      if (cpi.segment_feature_data[MB_LVL_ALT_LF][i] != delta_lf[i]) {
-+        EXPECT_EQ(delta_lf[i], cpi.segment_feature_data[MB_LVL_ALT_LF][i])
-+                  << "segment delta_lf error";
-+        break;
-+      }
-+    }
-+
-+    // Check the breakout thresholds
-+    for (int i = 0; i < MAX_MB_SEGMENTS; ++i) {
-+      unsigned int breakout =
-+        static_cast<unsigned int>(cpi.segment_encode_breakout[i]);
-+
-+      if (threshold[i] != breakout) {
-+        EXPECT_EQ(threshold[i], breakout)
-+                  << "breakout threshold error";
-+        break;
-+      }
-+    }
-+
-+    // Segmentation, and segmentation update flages should be set.
-+    EXPECT_EQ(1, cpi.mb.e_mbd.segmentation_enabled)
-+              << "segmentation_enabled error";
-+    EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_map)
-+              << "update_mb_segmentation_map error";
-+    EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_data)
-+              << "update_mb_segmentation_data error";
-+
-+
-+    // Try a range of delta q and lf parameters (some legal, some not)
-+    for (int i = 0; i < 1000; ++i) {
-+      int rand_deltas[4];
-+      int deltas_valid;
-+      rand_deltas[0] = (rand() % 160) - 80;
-+      rand_deltas[1] = (rand() % 160) - 80;
-+      rand_deltas[2] = (rand() % 160) - 80;
-+      rand_deltas[3] = (rand() % 160) - 80;
-+
-+      deltas_valid = ((abs(rand_deltas[0]) <= 63) &&
-+                      (abs(rand_deltas[1]) <= 63) &&
-+                      (abs(rand_deltas[2]) <= 63) &&
-+                      (abs(rand_deltas[3]) <= 63)) ? 0 : -1;
-+
-+      // Test with random delta q values.
-+      roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
-+                                  cpi.common.mb_cols, rand_deltas,
-+                                  delta_lf, threshold);
-+      EXPECT_EQ(deltas_valid, roi_retval) << "dq range check error";
-+
-+      // One delta_q error shown at a time
-+      if (deltas_valid != roi_retval)
-+        break;
-+
-+      // Test with random loop filter values.
-+      roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
-+                                  cpi.common.mb_cols, delta_q,
-+                                  rand_deltas, threshold);
-+      EXPECT_EQ(deltas_valid, roi_retval) << "dlf range check error";
-+
-+      // One delta loop filter error shown at a time
-+      if (deltas_valid != roi_retval)
-+        break;
-+    }
-+
-+    // Test that we report and error if cyclic refresh is enabled.
-+    cpi.cyclic_refresh_mode_enabled = 1;
-+    roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
-+                                cpi.common.mb_cols, delta_q,
-+                                delta_lf, threshold);
-+    EXPECT_EQ(-1, roi_retval) << "cyclic refresh check error";
-+    cpi.cyclic_refresh_mode_enabled = 0;
-+
-+    // Test invalid number of rows or colums.
-+    roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows + 1,
-+                                cpi.common.mb_cols, delta_q,
-+                                delta_lf, threshold);
-+    EXPECT_EQ(-1, roi_retval) << "MB rows bounds check error";
-+
-+    roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
-+                                cpi.common.mb_cols - 1, delta_q,
-+                                delta_lf, threshold);
-+    EXPECT_EQ(-1, roi_retval) << "MB cols bounds check error";
-+  }
-+
-+  // Free allocated memory
-+  if (cpi.segmentation_map)
-+    vpx_free(cpi.segmentation_map);
-+  if (roi_map)
-+    vpx_free(roi_map);
-+};
-+
-+}  // namespace
-diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc
-new file mode 100644
-index 0000000..c9dcceb
---- /dev/null
-+++ b/test/sixtap_predict_test.cc
-@@ -0,0 +1,224 @@
-+/*
-+*  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+*
-+*  Use of this source code is governed by a BSD-style license
-+*  that can be found in the LICENSE file in the root of the source
-+*  tree. An additional intellectual property rights grant can be found
-+*  in the file PATENTS.  All contributing project authors may
-+*  be found in the AUTHORS file in the root of the source tree.
-+*/
-+
-+#include <math.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include "test/acm_random.h"
-+#include "test/register_state_check.h"
-+#include "test/util.h"
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+extern "C" {
-+#include "./vpx_config.h"
-+#include "./vpx_rtcd.h"
-+#include "vpx/vpx_integer.h"
-+#include "vpx_mem/vpx_mem.h"
-+}
-+
-+namespace {
-+
-+typedef void (*sixtap_predict_fn_t)(uint8_t *src_ptr,
-+                                    int  src_pixels_per_line,
-+                                    int  xoffset,
-+                                    int  yoffset,
-+                                    uint8_t *dst_ptr,
-+                                    int  dst_pitch);
-+
-+class SixtapPredictTest : public PARAMS(int, int, sixtap_predict_fn_t) {
-+ public:
-+  static void SetUpTestCase() {
-+    src_ = reinterpret_cast<uint8_t*>(vpx_memalign(kDataAlignment, kSrcSize));
-+    dst_ = reinterpret_cast<uint8_t*>(vpx_memalign(kDataAlignment, kDstSize));
-+    dst_c_ = reinterpret_cast<uint8_t*>(vpx_memalign(kDataAlignment, kDstSize));
-+  }
-+
-+  static void TearDownTestCase() {
-+    vpx_free(src_);
-+    src_ = NULL;
-+    vpx_free(dst_);
-+    dst_ = NULL;
-+    vpx_free(dst_c_);
-+    dst_c_ = NULL;
-+  }
-+
-+ protected:
-+  // Make test arrays big enough for 16x16 functions. Six-tap filters
-+  // need 5 extra pixels outside of the macroblock.
-+  static const int kSrcStride = 21;
-+  static const int kDstStride = 16;
-+  static const int kDataAlignment = 16;
-+  static const int kSrcSize = kSrcStride * kSrcStride + 1;
-+  static const int kDstSize = kDstStride * kDstStride;
-+
-+  virtual void SetUp() {
-+    width_ = GET_PARAM(0);
-+    height_ = GET_PARAM(1);
-+    sixtap_predict_ = GET_PARAM(2);
-+    memset(src_, 0, sizeof(src_));
-+    memset(dst_, 0, sizeof(dst_));
-+    memset(dst_c_, 0, sizeof(dst_c_));
-+  }
-+
-+  int width_;
-+  int height_;
-+  sixtap_predict_fn_t sixtap_predict_;
-+  // The src stores the macroblock we will filter on, and makes it 1 byte larger
-+  // in order to test unaligned access. The result is stored in dst and dst_c(c
-+  // reference code result).
-+  static uint8_t* src_;
-+  static uint8_t* dst_;
-+  static uint8_t* dst_c_;
-+};
-+
-+uint8_t* SixtapPredictTest::src_ = NULL;
-+uint8_t* SixtapPredictTest::dst_ = NULL;
-+uint8_t* SixtapPredictTest::dst_c_ = NULL;
-+
-+TEST_P(SixtapPredictTest, TestWithPresetData) {
-+  // Test input
-+  static const uint8_t test_data[kSrcSize] = {
-+    216, 184, 4, 191, 82, 92, 41, 0, 1, 226, 236, 172, 20, 182, 42, 226, 177,
-+    79, 94, 77, 179, 203, 206, 198, 22, 192, 19, 75, 17, 192, 44, 233, 120,
-+    48, 168, 203, 141, 210, 203, 143, 180, 184, 59, 201, 110, 102, 171, 32,
-+    182, 10, 109, 105, 213, 60, 47, 236, 253, 67, 55, 14, 3, 99, 247, 124,
-+    148, 159, 71, 34, 114, 19, 177, 38, 203, 237, 239, 58, 83, 155, 91, 10,
-+    166, 201, 115, 124, 5, 163, 104, 2, 231, 160, 16, 234, 4, 8, 103, 153,
-+    167, 174, 187, 26, 193, 109, 64, 141, 90, 48, 200, 174, 204, 36, 184,
-+    114, 237, 43, 238, 242, 207, 86, 245, 182, 247, 6, 161, 251, 14, 8, 148,
-+    182, 182, 79, 208, 120, 188, 17, 6, 23, 65, 206, 197, 13, 242, 126, 128,
-+    224, 170, 110, 211, 121, 197, 200, 47, 188, 207, 208, 184, 221, 216, 76,
-+    148, 143, 156, 100, 8, 89, 117, 14, 112, 183, 221, 54, 197, 208, 180, 69,
-+    176, 94, 180, 131, 215, 121, 76, 7, 54, 28, 216, 238, 249, 176, 58, 142,
-+    64, 215, 242, 72, 49, 104, 87, 161, 32, 52, 216, 230, 4, 141, 44, 181,
-+    235, 224, 57, 195, 89, 134, 203, 144, 162, 163, 126, 156, 84, 185, 42,
-+    148, 145, 29, 221, 194, 134, 52, 100, 166, 105, 60, 140, 110, 201, 184,
-+    35, 181, 153, 93, 121, 243, 227, 68, 131, 134, 232, 2, 35, 60, 187, 77,
-+    209, 76, 106, 174, 15, 241, 227, 115, 151, 77, 175, 36, 187, 121, 221,
-+    223, 47, 118, 61, 168, 105, 32, 237, 236, 167, 213, 238, 202, 17, 170,
-+    24, 226, 247, 131, 145, 6, 116, 117, 121, 11, 194, 41, 48, 126, 162, 13,
-+    93, 209, 131, 154, 122, 237, 187, 103, 217, 99, 60, 200, 45, 78, 115, 69,
-+    49, 106, 200, 194, 112, 60, 56, 234, 72, 251, 19, 120, 121, 182, 134, 215,
-+    135, 10, 114, 2, 247, 46, 105, 209, 145, 165, 153, 191, 243, 12, 5, 36,
-+    119, 206, 231, 231, 11, 32, 209, 83, 27, 229, 204, 149, 155, 83, 109, 35,
-+    93, 223, 37, 84, 14, 142, 37, 160, 52, 191, 96, 40, 204, 101, 77, 67, 52,
-+    53, 43, 63, 85, 253, 147, 113, 226, 96, 6, 125, 179, 115, 161, 17, 83,
-+    198, 101, 98, 85, 139, 3, 137, 75, 99, 178, 23, 201, 255, 91, 253, 52,
-+    134, 60, 138, 131, 208, 251, 101, 48, 2, 227, 228, 118, 132, 245, 202,
-+    75, 91, 44, 160, 231, 47, 41, 50, 147, 220, 74, 92, 219, 165, 89, 16
-+  };
-+
-+  // Expected result
-+  static const uint8_t expected_dst[kDstSize] = {
-+    117, 102, 74, 135, 42, 98, 175, 206, 70, 73, 222, 197, 50, 24, 39, 49, 38,
-+    105, 90, 47, 169, 40, 171, 215, 200, 73, 109, 141, 53, 85, 177, 164, 79,
-+    208, 124, 89, 212, 18, 81, 145, 151, 164, 217, 153, 91, 154, 102, 102,
-+    159, 75, 164, 152, 136, 51, 213, 219, 186, 116, 193, 224, 186, 36, 231,
-+    208, 84, 211, 155, 167, 35, 59, 42, 76, 216, 149, 73, 201, 78, 149, 184,
-+    100, 96, 196, 189, 198, 188, 235, 195, 117, 129, 120, 129, 49, 25, 133,
-+    113, 69, 221, 114, 70, 143, 99, 157, 108, 189, 140, 78, 6, 55, 65, 240,
-+    255, 245, 184, 72, 90, 100, 116, 131, 39, 60, 234, 167, 33, 160, 88, 185,
-+    200, 157, 159, 176, 127, 151, 138, 102, 168, 106, 170, 86, 82, 219, 189,
-+    76, 33, 115, 197, 106, 96, 198, 136, 97, 141, 237, 151, 98, 137, 191,
-+    185, 2, 57, 95, 142, 91, 255, 185, 97, 137, 76, 162, 94, 173, 131, 193,
-+    161, 81, 106, 72, 135, 222, 234, 137, 66, 137, 106, 243, 210, 147, 95,
-+    15, 137, 110, 85, 66, 16, 96, 167, 147, 150, 173, 203, 140, 118, 196,
-+    84, 147, 160, 19, 95, 101, 123, 74, 132, 202, 82, 166, 12, 131, 166,
-+    189, 170, 159, 85, 79, 66, 57, 152, 132, 203, 194, 0, 1, 56, 146, 180,
-+    224, 156, 28, 83, 181, 79, 76, 80, 46, 160, 175, 59, 106, 43, 87, 75,
-+    136, 85, 189, 46, 71, 200, 90
-+  };
-+
-+  uint8_t *src = const_cast<uint8_t*>(test_data);
-+
-+  REGISTER_STATE_CHECK(sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride,
-+                                       2, 2, dst_, kDstStride));
-+
-+  for (int i = 0; i < height_; ++i)
-+    for (int j = 0; j < width_; ++j)
-+      ASSERT_EQ(expected_dst[i * kDstStride + j], dst_[i * kDstStride + j])
-+          << "i==" << (i * width_ + j);
-+}
-+
-+using libvpx_test::ACMRandom;
-+
-+TEST_P(SixtapPredictTest, TestWithRandomData) {
-+  ACMRandom rnd(ACMRandom::DeterministicSeed());
-+  for (int i = 0; i < kSrcSize; ++i)
-+    src_[i] = rnd.Rand8();
-+
-+  // Run tests for all possible offsets.
-+  for (int xoffset = 0; xoffset < 8; ++xoffset) {
-+    for (int yoffset = 0; yoffset < 8; ++yoffset) {
-+      // Call c reference function.
-+      // Move start point to next pixel to test if the function reads
-+      // unaligned data correctly.
-+      vp8_sixtap_predict16x16_c(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,
-+                                xoffset, yoffset, dst_c_, kDstStride);
-+
-+      // Run test.
-+      REGISTER_STATE_CHECK(
-+          sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,
-+                          xoffset, yoffset, dst_, kDstStride));
-+
-+      for (int i = 0; i < height_; ++i)
-+        for (int j = 0; j < width_; ++j)
-+          ASSERT_EQ(dst_c_[i * kDstStride + j], dst_[i * kDstStride + j])
-+              << "i==" << (i * width_ + j);
-+    }
-+  }
-+}
-+
-+using std::tr1::make_tuple;
-+
-+const sixtap_predict_fn_t sixtap_16x16_c = vp8_sixtap_predict16x16_c;
-+const sixtap_predict_fn_t sixtap_8x8_c = vp8_sixtap_predict8x8_c;
-+const sixtap_predict_fn_t sixtap_8x4_c = vp8_sixtap_predict8x4_c;
-+const sixtap_predict_fn_t sixtap_4x4_c = vp8_sixtap_predict4x4_c;
-+INSTANTIATE_TEST_CASE_P(
-+    C, SixtapPredictTest, ::testing::Values(
-+        make_tuple(16, 16, sixtap_16x16_c),
-+        make_tuple(8, 8, sixtap_8x8_c),
-+        make_tuple(8, 4, sixtap_8x4_c),
-+        make_tuple(4, 4, sixtap_4x4_c)));
-+#if HAVE_MMX
-+const sixtap_predict_fn_t sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx;
-+const sixtap_predict_fn_t sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx;
-+const sixtap_predict_fn_t sixtap_8x4_mmx = vp8_sixtap_predict8x4_mmx;
-+const sixtap_predict_fn_t sixtap_4x4_mmx = vp8_sixtap_predict4x4_mmx;
-+INSTANTIATE_TEST_CASE_P(
-+    MMX, SixtapPredictTest, ::testing::Values(
-+        make_tuple(16, 16, sixtap_16x16_mmx),
-+        make_tuple(8, 8, sixtap_8x8_mmx),
-+        make_tuple(8, 4, sixtap_8x4_mmx),
-+        make_tuple(4, 4, sixtap_4x4_mmx)));
-+#endif
-+#if HAVE_SSE2
-+const sixtap_predict_fn_t sixtap_16x16_sse2 = vp8_sixtap_predict16x16_sse2;
-+const sixtap_predict_fn_t sixtap_8x8_sse2 = vp8_sixtap_predict8x8_sse2;
-+const sixtap_predict_fn_t sixtap_8x4_sse2 = vp8_sixtap_predict8x4_sse2;
-+INSTANTIATE_TEST_CASE_P(
-+    SSE2, SixtapPredictTest, ::testing::Values(
-+        make_tuple(16, 16, sixtap_16x16_sse2),
-+        make_tuple(8, 8, sixtap_8x8_sse2),
-+        make_tuple(8, 4, sixtap_8x4_sse2)));
-+#endif
-+#if HAVE_SSSE3
-+const sixtap_predict_fn_t sixtap_16x16_ssse3 = vp8_sixtap_predict16x16_ssse3;
-+const sixtap_predict_fn_t sixtap_8x8_ssse3 = vp8_sixtap_predict8x8_ssse3;
-+const sixtap_predict_fn_t sixtap_8x4_ssse3 = vp8_sixtap_predict8x4_ssse3;
-+const sixtap_predict_fn_t sixtap_4x4_ssse3 = vp8_sixtap_predict4x4_ssse3;
-+INSTANTIATE_TEST_CASE_P(
-+    SSSE3, SixtapPredictTest, ::testing::Values(
-+        make_tuple(16, 16, sixtap_16x16_ssse3),
-+        make_tuple(8, 8, sixtap_8x8_ssse3),
-+        make_tuple(8, 4, sixtap_8x4_ssse3),
-+        make_tuple(4, 4, sixtap_4x4_ssse3)));
-+#endif
-+}  // namespace
-diff --git a/test/subtract_test.cc b/test/subtract_test.cc
-new file mode 100644
-index 0000000..60acf81
---- /dev/null
-+++ b/test/subtract_test.cc
-@@ -0,0 +1,114 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "test/acm_random.h"
-+#include "test/register_state_check.h"
-+extern "C" {
-+#include "vpx_config.h"
-+#include "vpx_rtcd.h"
-+#include "vp8/common/blockd.h"
-+#include "vp8/encoder/block.h"
-+#include "vpx_mem/vpx_mem.h"
-+}
-+
-+typedef void (*subtract_b_fn_t)(BLOCK *be, BLOCKD *bd, int pitch);
-+
-+namespace {
-+
-+class SubtractBlockTest : public ::testing::TestWithParam<subtract_b_fn_t> {};
-+
-+using libvpx_test::ACMRandom;
-+
-+TEST_P(SubtractBlockTest, SimpleSubtract) {
-+  ACMRandom rnd(ACMRandom::DeterministicSeed());
-+  BLOCK be;
-+  BLOCKD bd;
-+  // in libvpx, this stride is always 16
-+  const int kDiffPredStride = 16;
-+  const int kSrcStride[] = {32, 16, 8, 4, 0};
-+  const int kBlockWidth = 4;
-+  const int kBlockHeight = 4;
-+
-+  // Allocate... align to 16 for mmx/sse tests
-+  uint8_t *source = reinterpret_cast<uint8_t*>(
-+      vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source)));
-+  be.src_diff = reinterpret_cast<int16_t*>(
-+      vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff)));
-+  bd.predictor = reinterpret_cast<unsigned char*>(
-+      vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
-+
-+  for(int i = 0; kSrcStride[i] > 0; ++i) {
-+    // start at block0
-+    be.src = 0;
-+    be.base_src = &source;
-+    be.src_stride = kSrcStride[i];
-+
-+    // set difference
-+    int16_t *src_diff = be.src_diff;
-+    for (int r = 0; r < kBlockHeight; ++r) {
-+      for (int c = 0; c < kBlockWidth; ++c) {
-+        src_diff[c] = 0xa5a5;
-+      }
-+      src_diff += kDiffPredStride;
-+    }
-+
-+    // set destination
-+    uint8_t *base_src = *be.base_src;
-+    for (int r = 0; r < kBlockHeight; ++r) {
-+      for (int c = 0; c < kBlockWidth; ++c) {
-+        base_src[c] = rnd.Rand8();
-+      }
-+      base_src += be.src_stride;
-+    }
-+
-+    // set predictor
-+    uint8_t *predictor = bd.predictor;
-+    for (int r = 0; r < kBlockHeight; ++r) {
-+      for (int c = 0; c < kBlockWidth; ++c) {
-+        predictor[c] = rnd.Rand8();
-+      }
-+      predictor += kDiffPredStride;
-+    }
-+
-+    REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
-+
-+    base_src = *be.base_src;
-+    src_diff = be.src_diff;
-+    predictor = bd.predictor;
-+    for (int r = 0; r < kBlockHeight; ++r) {
-+      for (int c = 0; c < kBlockWidth; ++c) {
-+        EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r
-+                                                             << ", c = " << c;
-+      }
-+      src_diff += kDiffPredStride;
-+      predictor += kDiffPredStride;
-+      base_src += be.src_stride;
-+    }
-+  }
-+  vpx_free(be.src_diff);
-+  vpx_free(source);
-+  vpx_free(bd.predictor);
-+}
-+
-+INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
-+                        ::testing::Values(vp8_subtract_b_c));
-+
-+#if HAVE_MMX
-+INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest,
-+                        ::testing::Values(vp8_subtract_b_mmx));
-+#endif
-+
-+#if HAVE_SSE2
-+INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest,
-+                        ::testing::Values(vp8_subtract_b_sse2));
-+#endif
-+
-+}  // namespace
-diff --git a/test/test-data.sha1 b/test/test-data.sha1
-new file mode 100644
-index 0000000..c1b6a83
---- /dev/null
-+++ b/test/test-data.sha1
-@@ -0,0 +1,123 @@
-+d5dfb0151c9051f8c85999255645d7a23916d3c0  hantro_collage_w352h288.yuv
-+5184c46ddca8b1fadd16742e8500115bc8f749da  vp80-00-comprehensive-001.ivf
-+65bf1bbbced81b97bd030f376d1b7f61a224793f  vp80-00-comprehensive-002.ivf
-+906b4c1e99eb734504c504b3f1ad8052137ce672  vp80-00-comprehensive-003.ivf
-+ec144b1af53af895db78355785650b96dd3f0ade  vp80-00-comprehensive-004.ivf
-+afc7091785c62f1c121c4554a2830c30704587d9  vp80-00-comprehensive-005.ivf
-+42ea9d55c818145d06a9b633b8e85c6a6164fd3e  vp80-00-comprehensive-006.ivf
-+e5b3a73ab79fe024c14309d653d6bed92902ee3b  vp80-00-comprehensive-007.ivf
-+f3c50a58875930adfb84525c0ef59d7e4c08540c  vp80-00-comprehensive-008.ivf
-+4b2841fdb83db51ae322096ae468bbb9dc2c8362  vp80-00-comprehensive-009.ivf
-+efbff736e3a91ab6a98c5bc2dce65d645944c7b1  vp80-00-comprehensive-010.ivf
-+6b315102cae008d22a3d2c231be92cb704a222f8  vp80-00-comprehensive-011.ivf
-+f3214a4fea14c2d5ec689936c1613f274c859ee8  vp80-00-comprehensive-012.ivf
-+e4094e96d308c8a35b74c480a43d853c5294cd34  vp80-00-comprehensive-013.ivf
-+5b0adfaf60a69e0aaf3ec021a39d0a68fc0e1b5a  vp80-00-comprehensive-014.ivf
-+e8467688ddf26b5000664f904faf0d70506aa653  vp80-00-comprehensive-015.ivf
-+aab55582337dfd2a39ff54fb2576a91910d49337  vp80-00-comprehensive-016.ivf
-+1ba24724f80203c9bae4f1d0f99d534721980016  vp80-00-comprehensive-017.ivf
-+143a15512b46f436280ddb4d0e6411eb4af434f2  vp80-00-comprehensive-018.ivf
-+c5baeaf5714fdfb3a8bc960a8e33ac438e83b16b  vp80-01-intra-1400.ivf
-+f383955229afe3408453e316d11553d923ca60d5  vp80-01-intra-1411.ivf
-+84e1f4343f174c9f3c83f834bac3196fb325bf2c  vp80-01-intra-1416.ivf
-+fb6e712a47dd57a28a3727d2ae2c97a8b7c7ca51  vp80-01-intra-1417.ivf
-+71ea772d3e9d315b8cbecf41207b8a237c34853b  vp80-02-inter-1402.ivf
-+d85dbc4271525dcd128c503f936fe69091d1f8d0  vp80-02-inter-1412.ivf
-+d4e5d3ad56511867d025f93724d090f92ba6ec3d  vp80-02-inter-1418.ivf
-+91791cbcc37c60f35dbd8090bacb54e5ec6dd4fa  vp80-02-inter-1424.ivf
-+17fbfe2fea70f6e2f3fa6ca4efaae6c0b03b5f02  vp80-03-segmentation-01.ivf
-+3c3600dbbcde08e20d54c66fe3b7eadd4f09bdbb  vp80-03-segmentation-02.ivf
-+c156778d5340967d4b369c490848076e92f1f875  vp80-03-segmentation-03.ivf
-+d25dcff6c60e87a1af70945b8911b6b4998533b0  vp80-03-segmentation-04.ivf
-+362baba2ce454c9db21218f35e81c27a5ed0b730  vp80-03-segmentation-1401.ivf
-+d223ae7ee748ce07e74c4679bfd219e84aa9f4b0  vp80-03-segmentation-1403.ivf
-+033adf7f3a13836a3f1cffcb87c1972900f2b5c6  vp80-03-segmentation-1407.ivf
-+4d51dfbf9f3e2c590ec99d1d6f59dd731d04375f  vp80-03-segmentation-1408.ivf
-+f37a62b197c2600d75e0ccfbb31b60efdedac251  vp80-03-segmentation-1409.ivf
-+eb25bd7bfba5b2f6935018a930f42d123b1e7fcd  vp80-03-segmentation-1410.ivf
-+b9d5c436663a30c27cfff84b53a002e501258843  vp80-03-segmentation-1413.ivf
-+6da92b9d1a180cc3a8afe348ab12258f5a37be1a  vp80-03-segmentation-1414.ivf
-+a4f5842602886bd669f115f93d8a35c035cb0948  vp80-03-segmentation-1415.ivf
-+f295dceb8ef278b77251b3f9df8aee22e161d547  vp80-03-segmentation-1425.ivf
-+198dbf9f36f733200e432664cc8c5752d59779de  vp80-03-segmentation-1426.ivf
-+7704804e32f5de976803929934a7fafe101ac7b0  vp80-03-segmentation-1427.ivf
-+831ccd862ea95ca025d2f3bd8b88678752f5416d  vp80-03-segmentation-1432.ivf
-+b3c11978529289f9109f2766fcaba3ebc40e11ef  vp80-03-segmentation-1435.ivf
-+a835a731f5520ebfc1002c40121264d0020559ac  vp80-03-segmentation-1436.ivf
-+1d1732942f773bb2a5775fcb9689b1579ce28eab  vp80-03-segmentation-1437.ivf
-+db04799adfe089dfdf74dbd43cc05ede7161f99e  vp80-03-segmentation-1441.ivf
-+7caf39b3f20cfd52b998210878062e52a5edf1e6  vp80-03-segmentation-1442.ivf
-+3607f6bb4ee106c38fa1ea370dc4ff8b8cde2261  vp80-04-partitions-1404.ivf
-+93cc323b6b6867f1b12dd48773424549c6960a6b  vp80-04-partitions-1405.ivf
-+047eedb14b865bdac8a3538e63801054e0295e9c  vp80-04-partitions-1406.ivf
-+0f1233bd2bc33f56ce5e495dbd455d122339f384  vp80-05-sharpness-1428.ivf
-+51767fc136488a9535c2a4c38067c542ee2048df  vp80-05-sharpness-1429.ivf
-+9805aa107672de25d6fb8c35e20d06deca5efe18  vp80-05-sharpness-1430.ivf
-+61db6b965f9c27aebe71b85bf2d5877e58e4bbdf  vp80-05-sharpness-1431.ivf
-+10420d266290d2923555f84af38eeb96edbd3ae8  vp80-05-sharpness-1433.ivf
-+3ed24f9a80cddfdf75824ba95cdb4ff9286cb443  vp80-05-sharpness-1434.ivf
-+c87599cbecd72d4cd4f7ace3313b7a6bc6eb8163  vp80-05-sharpness-1438.ivf
-+aff51d865c2621b60510459244ea83e958e4baed  vp80-05-sharpness-1439.ivf
-+da386e72b19b5485a6af199c5eb60ef25e510dd1  vp80-05-sharpness-1440.ivf
-+6759a095203d96ccd267ce09b1b050b8cc4c2f1f  vp80-05-sharpness-1443.ivf
-+db55ec7fd02c864ba996ff060b25b1e08611330b  vp80-00-comprehensive-001.ivf.md5
-+29db0ad011cba1e45f856d5623cd38dac3e3bf19  vp80-00-comprehensive-002.ivf.md5
-+e84f258f69e173e7d68f8f8c037a0a3766902182  vp80-00-comprehensive-003.ivf.md5
-+eb7912eaf69559a16fd82bc3f5fb1524cf4a4466  vp80-00-comprehensive-004.ivf.md5
-+4206f71c94894bd5b5b376f6c09b3817dbc65206  vp80-00-comprehensive-005.ivf.md5
-+4f89b356f6f2fecb928f330a10f804f00f5325f5  vp80-00-comprehensive-006.ivf.md5
-+2813236a32964dd8007e17648bcf035a20fcda6c  vp80-00-comprehensive-007.ivf.md5
-+10746c72098f872803c900e17c5680e451f5f498  vp80-00-comprehensive-008.ivf.md5
-+39a23d0692ce64421a7bb7cdf6ccec5928d37fff  vp80-00-comprehensive-009.ivf.md5
-+f6e3de8931a0cc659bda8fbc14050346955e72d4  vp80-00-comprehensive-010.ivf.md5
-+101683ec195b6e944f7cd1e468fc8921439363e6  vp80-00-comprehensive-011.ivf.md5
-+1f592751ce46d8688998fa0fa4fbdcda0fd4058c  vp80-00-comprehensive-012.ivf.md5
-+6066176f90ca790251e795fca1a5797d59999841  vp80-00-comprehensive-013.ivf.md5
-+2656da94ba93691f23edc4d60b3a09e2be46c217  vp80-00-comprehensive-014.ivf.md5
-+c6e0d5f5d61460c8ac8edfa4e701f10312c03133  vp80-00-comprehensive-015.ivf.md5
-+ee60fee501d8493e34e8d6a1fe315b51ed09b24a  vp80-00-comprehensive-016.ivf.md5
-+9f1914ceffcad4546c0a29de3ef591d8bea304dc  vp80-00-comprehensive-017.ivf.md5
-+e0305178fe288a9fd8082b39e2d03181edb19054  vp80-00-comprehensive-018.ivf.md5
-+612494da2fa799cc9d76dcdd835ae6c7cb2e5c05  vp80-01-intra-1400.ivf.md5
-+48ea06097ac8269c5e8c2131d3d0639f431fcf0e  vp80-01-intra-1411.ivf.md5
-+6e2ab4e7677ad0ba868083ca6bc387ee922b400c  vp80-01-intra-1416.ivf.md5
-+eca0a90348959ce3854142f8d8641b13050e8349  vp80-01-intra-1417.ivf.md5
-+920feea203145d5c2258a91c4e6991934a79a99e  vp80-02-inter-1402.ivf.md5
-+f71d97909fe2b3dd65be7e1f56c72237f0cef200  vp80-02-inter-1412.ivf.md5
-+e911254569a30bbb2a237ff8b79f69ed9da0672d  vp80-02-inter-1418.ivf.md5
-+58c789c50c9bb9cc90580bed291164a0939d28ba  vp80-02-inter-1424.ivf.md5
-+ff3e2f441327b9c20a0b37c524e0f5a48a36de7b  vp80-03-segmentation-01.ivf.md5
-+0791f417f076a542ae66fbc3426ab4d94cbd6c75  vp80-03-segmentation-02.ivf.md5
-+722e50f1a6a91c34302d68681faffc1c26d1cc57  vp80-03-segmentation-03.ivf.md5
-+c701f1885bcfb27fb8e70cc65606b289172ef889  vp80-03-segmentation-04.ivf.md5
-+f79bc9ec189a2b4807632a3d0c5bf04a178b5300  vp80-03-segmentation-1401.ivf.md5
-+b9aa4c74c0219b639811c44760d0b24cd8bb436a  vp80-03-segmentation-1403.ivf.md5
-+70d5a2207ca1891bcaebd5cf6dd88ce8d57b4334  vp80-03-segmentation-1407.ivf.md5
-+265f962ee781531f9a93b9309461316fd32b2a1d  vp80-03-segmentation-1408.ivf.md5
-+0c4ecbbd6dc042d30e626d951b65f460dd6cd563  vp80-03-segmentation-1409.ivf.md5
-+cf779af36a937f06570a0fca9db64ba133451dee  vp80-03-segmentation-1410.ivf.md5
-+0e6c5036d51ab078842f133934926c598a9cff02  vp80-03-segmentation-1413.ivf.md5
-+eb3930aaf229116c80d507516c34759c3f6cdf69  vp80-03-segmentation-1414.ivf.md5
-+123d6c0f72ee87911c4ae7538e87b7d163b22d6c  vp80-03-segmentation-1415.ivf.md5
-+e70551d1a38920e097a5d8782390b79ecaeb7505  vp80-03-segmentation-1425.ivf.md5
-+44e8f4117e46dbb302b2cfd81171cc1a1846e431  vp80-03-segmentation-1426.ivf.md5
-+52636e54aee5f95bbace37021bd67de5db767e9a  vp80-03-segmentation-1427.ivf.md5
-+b1ad3eff20215c28e295b15ef3636ed926d59cba  vp80-03-segmentation-1432.ivf.md5
-+24c22a552fa28a90e5978f67f57181cc2d7546d7  vp80-03-segmentation-1435.ivf.md5
-+96c49c390abfced18a7a8c9b9ea10af778e10edb  vp80-03-segmentation-1436.ivf.md5
-+f95eb6214571434f1f73ab7833b9ccdf47588020  vp80-03-segmentation-1437.ivf.md5
-+1c0700ca27c9b0090a7747a4b0b4dc21d1843181  vp80-03-segmentation-1441.ivf.md5
-+81d4f23ca32667ee958bae579c8f5e97ba72eb97  vp80-03-segmentation-1442.ivf.md5
-+272efcef07a3a30fbca51bfd566063d8258ec0be  vp80-04-partitions-1404.ivf.md5
-+66ed219ab812ac801b256d35cf495d193d4cf478  vp80-04-partitions-1405.ivf.md5
-+36083f37f56f502bd60ec5e07502ee9e6b8699b0  vp80-04-partitions-1406.ivf.md5
-+6ca909bf168a64c09415626294665dc1be3d1973  vp80-05-sharpness-1428.ivf.md5
-+1667d2ee2334e5fdea8a8a866f4ccf3cf76f033a  vp80-05-sharpness-1429.ivf.md5
-+71bcbe5357d36a19df5b07fbe3e27bffa8893f0a  vp80-05-sharpness-1430.ivf.md5
-+89a09b1dffce2d55770a89e58d9925c70ef79bf8  vp80-05-sharpness-1431.ivf.md5
-+08444a18b4e6ba3450c0796dd728d48c399a2dc9  vp80-05-sharpness-1433.ivf.md5
-+6d6223719a90c13e848aa2a8a6642098cdb5977a  vp80-05-sharpness-1434.ivf.md5
-+41d70bb5fa45bc88da1604a0af466930b8dd77b5  vp80-05-sharpness-1438.ivf.md5
-+086c56378df81b6cee264d7540a7b8f2b405c7a4  vp80-05-sharpness-1439.ivf.md5
-+d32dc2c4165eb266ea4c23c14a45459b363def32  vp80-05-sharpness-1440.ivf.md5
-+8c69dc3d8e563f56ffab5ad1e400d9e689dd23df  vp80-05-sharpness-1443.ivf.md5
-\ No newline at end of file
-diff --git a/test/test.mk b/test/test.mk
-new file mode 100644
-index 0000000..982be5b
---- /dev/null
-+++ b/test/test.mk
-@@ -0,0 +1,179 @@
-+LIBVPX_TEST_SRCS-yes += acm_random.h
-+LIBVPX_TEST_SRCS-yes += register_state_check.h
-+LIBVPX_TEST_SRCS-yes += test.mk
-+LIBVPX_TEST_SRCS-yes += test_libvpx.cc
-+LIBVPX_TEST_SRCS-yes += util.h
-+LIBVPX_TEST_SRCS-yes += video_source.h
-+
-+##
-+## BLACK BOX TESTS
-+##
-+## Black box tests only use the public API.
-+##
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += datarate_test.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.h
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += error_resilience_test.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += i420_video_source.h
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
-+
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ../md5_utils.h ../md5_utils.c
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.h
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ivf_video_source.h
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc
-+##
-+## WHITE BOX TESTS
-+##
-+## Whitebox tests invoke functions not exposed via the public API. Certain
-+## shared library builds don't make these functions accessible.
-+##
-+ifeq ($(CONFIG_SHARED),)
-+
-+# These tests require both the encoder and decoder to be built.
-+ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)
-+LIBVPX_TEST_SRCS-yes                   += boolcoder_test.cc
-+endif
-+
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += fdct4x4_test.cc
-+LIBVPX_TEST_SRCS-yes                   += idctllm_test.cc
-+LIBVPX_TEST_SRCS-yes                   += intrapred_test.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC)    += pp_filter_test.cc
-+LIBVPX_TEST_SRCS-yes                   += sad_test.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
-+LIBVPX_TEST_SRCS-yes                   += sixtap_predict_test.cc
-+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
-+
-+endif
-+
-+
-+##
-+## TEST DATA
-+##
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_ENCODER) += hantro_collage_w352h288.yuv
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf.md5
-+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf.md5
-diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc
-new file mode 100644
-index 0000000..cfd5d28
---- /dev/null
-+++ b/test/test_libvpx.cc
-@@ -0,0 +1,45 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#include <string>
-+#include "vpx_config.h"
-+#if ARCH_X86 || ARCH_X86_64
-+extern "C" {
-+#include "vpx_ports/x86.h"
-+}
-+#endif
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+
-+static void append_gtest_filter(const char *str) {
-+  std::string filter = ::testing::FLAGS_gtest_filter;
-+  filter += str;
-+  ::testing::FLAGS_gtest_filter = filter;
-+}
-+
-+int main(int argc, char **argv) {
-+  ::testing::InitGoogleTest(&argc, argv);
-+
-+#if ARCH_X86 || ARCH_X86_64
-+  const int simd_caps = x86_simd_caps();
-+  if(!(simd_caps & HAS_MMX))
-+    append_gtest_filter(":-MMX/*");
-+  if(!(simd_caps & HAS_SSE))
-+    append_gtest_filter(":-SSE/*");
-+  if(!(simd_caps & HAS_SSE2))
-+    append_gtest_filter(":-SSE2/*");
-+  if(!(simd_caps & HAS_SSE3))
-+    append_gtest_filter(":-SSE3/*");
-+  if(!(simd_caps & HAS_SSSE3))
-+    append_gtest_filter(":-SSSE3/*");
-+  if(!(simd_caps & HAS_SSE4_1))
-+    append_gtest_filter(":-SSE4_1/*");
-+#endif
-+
-+  return RUN_ALL_TESTS();
-+}
-diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
-new file mode 100644
-index 0000000..938457b
---- /dev/null
-+++ b/test/test_vector_test.cc
-@@ -0,0 +1,144 @@
-+/*
-+ Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+
-+ Use of this source code is governed by a BSD-style license
-+ that can be found in the LICENSE file in the root of the source
-+ tree. An additional intellectual property rights grant can be found
-+ in the file PATENTS.  All contributing project authors may
-+ be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+#include <cstdio>
-+#include <cstdlib>
-+#include <string>
-+#include "third_party/googletest/src/include/gtest/gtest.h"
-+#include "test/decode_test_driver.h"
-+#include "test/ivf_video_source.h"
-+extern "C" {
-+#include "./md5_utils.h"
-+#include "vpx_mem/vpx_mem.h"
-+}
-+
-+#if defined(_MSC_VER)
-+#define snprintf sprintf_s
-+#endif
-+
-+namespace {
-+// There are 61 test vectors in total.
-+const char *kTestVectors[] = {
-+  "vp80-00-comprehensive-001.ivf",
-+  "vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf",
-+  "vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf",
-+  "vp80-00-comprehensive-006.ivf", "vp80-00-comprehensive-007.ivf",
-+  "vp80-00-comprehensive-008.ivf", "vp80-00-comprehensive-009.ivf",
-+  "vp80-00-comprehensive-010.ivf", "vp80-00-comprehensive-011.ivf",
-+  "vp80-00-comprehensive-012.ivf", "vp80-00-comprehensive-013.ivf",
-+  "vp80-00-comprehensive-014.ivf", "vp80-00-comprehensive-015.ivf",
-+  "vp80-00-comprehensive-016.ivf", "vp80-00-comprehensive-017.ivf",
-+  "vp80-00-comprehensive-018.ivf", "vp80-01-intra-1400.ivf",
-+  "vp80-01-intra-1411.ivf", "vp80-01-intra-1416.ivf",
-+  "vp80-01-intra-1417.ivf", "vp80-02-inter-1402.ivf",
-+  "vp80-02-inter-1412.ivf", "vp80-02-inter-1418.ivf",
-+  "vp80-02-inter-1424.ivf", "vp80-03-segmentation-01.ivf",
-+  "vp80-03-segmentation-02.ivf", "vp80-03-segmentation-03.ivf",
-+  "vp80-03-segmentation-04.ivf", "vp80-03-segmentation-1401.ivf",
-+  "vp80-03-segmentation-1403.ivf", "vp80-03-segmentation-1407.ivf",
-+  "vp80-03-segmentation-1408.ivf", "vp80-03-segmentation-1409.ivf",
-+  "vp80-03-segmentation-1410.ivf", "vp80-03-segmentation-1413.ivf",
-+  "vp80-03-segmentation-1414.ivf", "vp80-03-segmentation-1415.ivf",
-+  "vp80-03-segmentation-1425.ivf", "vp80-03-segmentation-1426.ivf",
-+  "vp80-03-segmentation-1427.ivf", "vp80-03-segmentation-1432.ivf",
-+  "vp80-03-segmentation-1435.ivf", "vp80-03-segmentation-1436.ivf",
-+  "vp80-03-segmentation-1437.ivf", "vp80-03-segmentation-1441.ivf",
-+  "vp80-03-segmentation-1442.ivf", "vp80-04-partitions-1404.ivf",
-+  "vp80-04-partitions-1405.ivf", "vp80-04-partitions-1406.ivf",
-+  "vp80-05-sharpness-1428.ivf", "vp80-05-sharpness-1429.ivf",
-+  "vp80-05-sharpness-1430.ivf", "vp80-05-sharpness-1431.ivf",
-+  "vp80-05-sharpness-1433.ivf", "vp80-05-sharpness-1434.ivf",
-+  "vp80-05-sharpness-1438.ivf", "vp80-05-sharpness-1439.ivf",
-+  "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf"
-+};
-+
-+class TestVectorTest : public libvpx_test::DecoderTest,
-+    public ::testing::TestWithParam<const char*> {
-+ protected:
-+  TestVectorTest() : md5_file_(NULL) {}
-+
-+  virtual ~TestVectorTest() {
-+    if (md5_file_)
-+      fclose(md5_file_);
-+  }
-+
-+  void OpenMD5File(const std::string& md5_file_name_) {
-+    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-+    ASSERT_TRUE(md5_file_) << "Md5 file open failed. Filename: "
-+        << md5_file_name_;
-+  }
-+
-+  virtual void DecompressedFrameHook(const vpx_image_t& img,
-+                                     const unsigned int frame_number) {
-+    char expected_md5[33];
-+    char junk[128];
-+
-+    // Read correct md5 checksums.
-+    const int res = fscanf(md5_file_, "%s  %s", expected_md5, junk);
-+    ASSERT_NE(res, EOF) << "Read md5 data failed";
-+    expected_md5[32] = '\0';
-+
-+    MD5Context md5;
-+    MD5Init(&md5);
-+
-+    // Compute and update md5 for each raw in decompressed data.
-+    for (int plane = 0; plane < 3; ++plane) {
-+      uint8_t *buf = img.planes[plane];
-+
-+      for (unsigned int y = 0; y < (plane ? (img.d_h + 1) >> 1 : img.d_h);
-+           ++y) {
-+        MD5Update(&md5, buf, (plane ? (img.d_w + 1) >> 1 : img.d_w));
-+        buf += img.stride[plane];
-+      }
-+    }
-+
-+    uint8_t md5_sum[16];
-+    MD5Final(md5_sum, &md5);
-+
-+    char actual_md5[33];
-+    // Convert to get the actual md5.
-+    for (int i = 0; i < 16; i++) {
-+      snprintf(&actual_md5[i * 2], sizeof(actual_md5) - i * 2, "%02x",
-+               md5_sum[i]);
-+    }
-+    actual_md5[32] = '\0';
-+
-+    // Check md5 match.
-+    ASSERT_STREQ(expected_md5, actual_md5)
-+        << "Md5 checksums don't match: frame number = " << frame_number;
-+  }
-+
-+ private:
-+  FILE *md5_file_;
-+};
-+
-+// This test runs through the whole set of test vectors, and decodes them.
-+// The md5 checksums are computed for each frame in the video file. If md5
-+// checksums match the correct md5 data, then the test is passed. Otherwise,
-+// the test failed.
-+TEST_P(TestVectorTest, MD5Match) {
-+  const std::string filename = GetParam();
-+  // Open compressed video file.
-+  libvpx_test::IVFVideoSource video(filename);
-+
-+  video.Init();
-+
-+  // Construct md5 file name.
-+  const std::string md5_filename = filename + ".md5";
-+  OpenMD5File(md5_filename);
-+
-+  // Decode frame, and check the md5 matching.
-+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-+}
-+
-+INSTANTIATE_TEST_CASE_P(TestVectorSequence, TestVectorTest,
-+                        ::testing::ValuesIn(kTestVectors));
-+
-+}  // namespace
-diff --git a/test/util.h b/test/util.h
-new file mode 100644
-index 0000000..06a70cc
---- /dev/null
-+++ b/test/util.h
-@@ -0,0 +1,18 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+#ifndef TEST_UTIL_H_
-+#define TEST_UTIL_H_
-+
-+// Macros
-+#define PARAMS(...) ::testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
-+#define GET_PARAM(k) std::tr1::get< k >(GetParam())
-+
-+#endif  // TEST_UTIL_H_
-diff --git a/test/video_source.h b/test/video_source.h
-new file mode 100644
-index 0000000..9772657
---- /dev/null
-+++ b/test/video_source.h
-@@ -0,0 +1,175 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+#ifndef TEST_VIDEO_SOURCE_H_
-+#define TEST_VIDEO_SOURCE_H_
-+
-+#include <cstdio>
-+#include <cstdlib>
-+#include <string>
-+#include "test/acm_random.h"
-+#include "vpx/vpx_encoder.h"
-+
-+namespace libvpx_test {
-+
-+static FILE *OpenTestDataFile(const std::string& file_name) {
-+  std::string path_to_source = file_name;
-+  const char *kDataPath = getenv("LIBVPX_TEST_DATA_PATH");
-+
-+  if (kDataPath) {
-+    path_to_source = kDataPath;
-+    path_to_source += "/";
-+    path_to_source += file_name;
-+  }
-+
-+  return fopen(path_to_source.c_str(), "rb");
-+}
-+
-+// Abstract base class for test video sources, which provide a stream of
-+// vpx_image_t images with associated timestamps and duration.
-+class VideoSource {
-+ public:
-+  virtual ~VideoSource() {}
-+
-+  // Prepare the stream for reading, rewind/open as necessary.
-+  virtual void Begin() = 0;
-+
-+  // Advance the cursor to the next frame
-+  virtual void Next() = 0;
-+
-+  // Get the current video frame, or NULL on End-Of-Stream.
-+  virtual vpx_image_t *img() const = 0;
-+
-+  // Get the presentation timestamp of the current frame.
-+  virtual vpx_codec_pts_t pts() const = 0;
-+
-+  // Get the current frame's duration
-+  virtual unsigned long duration() const = 0;
-+
-+  // Get the timebase for the stream
-+  virtual vpx_rational_t timebase() const = 0;
-+
-+  // Get the current frame counter, starting at 0.
-+  virtual unsigned int frame() const = 0;
-+
-+  // Get the current file limit.
-+  virtual unsigned int limit() const = 0;
-+};
-+
-+
-+class DummyVideoSource : public VideoSource {
-+ public:
-+  DummyVideoSource() : img_(NULL), limit_(100), width_(0), height_(0) {
-+    SetSize(80, 64);
-+  }
-+
-+  virtual ~DummyVideoSource() { vpx_img_free(img_); }
-+
-+  virtual void Begin() {
-+    frame_ = 0;
-+    FillFrame();
-+  }
-+
-+  virtual void Next() {
-+    ++frame_;
-+    FillFrame();
-+  }
-+
-+  virtual vpx_image_t *img() const {
-+    return (frame_ < limit_) ? img_ : NULL;
-+  }
-+
-+  // Models a stream where Timebase = 1/FPS, so pts == frame.
-+  virtual vpx_codec_pts_t pts() const { return frame_; }
-+
-+  virtual unsigned long duration() const { return 1; }
-+
-+  virtual vpx_rational_t timebase() const {
-+    const vpx_rational_t t = {1, 30};
-+    return t;
-+  }
-+
-+  virtual unsigned int frame() const { return frame_; }
-+
-+  virtual unsigned int limit() const { return limit_; }
-+
-+  void SetSize(unsigned int width, unsigned int height) {
-+    if (width != width_ || height != height_) {
-+      vpx_img_free(img_);
-+      raw_sz_ = ((width + 31)&~31) * height * 3 / 2;
-+      img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 32);
-+      width_ = width;
-+      height_ = height;
-+    }
-+  }
-+
-+ protected:
-+  virtual void FillFrame() { memset(img_->img_data, 0, raw_sz_); }
-+
-+  vpx_image_t *img_;
-+  size_t       raw_sz_;
-+  unsigned int limit_;
-+  unsigned int frame_;
-+  unsigned int width_;
-+  unsigned int height_;
-+};
-+
-+
-+class RandomVideoSource : public DummyVideoSource {
-+ public:
-+  RandomVideoSource(int seed = ACMRandom::DeterministicSeed())
-+      : rnd_(seed),
-+        seed_(seed) { }
-+
-+ protected:
-+  // Reset the RNG to get a matching stream for the second pass
-+  virtual void Begin() {
-+    frame_ = 0;
-+    rnd_.Reset(seed_);
-+    FillFrame();
-+  }
-+
-+  // 15 frames of noise, followed by 15 static frames. Reset to 0 rather
-+  // than holding previous frames to encourage keyframes to be thrown.
-+  virtual void FillFrame() {
-+    if (frame_ % 30 < 15)
-+      for (size_t i = 0; i < raw_sz_; ++i)
-+        img_->img_data[i] = rnd_.Rand8();
-+    else
-+      memset(img_->img_data, 0, raw_sz_);
-+  }
-+
-+  ACMRandom rnd_;
-+  int seed_;
-+};
-+
-+// Abstract base class for test video sources, which provide a stream of
-+// decompressed images to the decoder.
-+class CompressedVideoSource {
-+ public:
-+  virtual ~CompressedVideoSource() {}
-+
-+  virtual void Init() = 0;
-+
-+  // Prepare the stream for reading, rewind/open as necessary.
-+  virtual void Begin() = 0;
-+
-+  // Advance the cursor to the next frame
-+  virtual void Next() = 0;
-+
-+  virtual const uint8_t *cxdata() const = 0;
-+
-+  virtual const unsigned int frame_size() const = 0;
-+
-+  virtual const unsigned int frame_number() const = 0;
-+};
-+
-+}  // namespace libvpx_test
-+
-+#endif  // TEST_VIDEO_SOURCE_H_
-diff --git a/third_party/libyuv/source/scale.c b/third_party/libyuv/source/scale.c
-index 930a7ae..c142a17 100644
---- a/third_party/libyuv/source/scale.c
-+++ b/third_party/libyuv/source/scale.c
-@@ -60,7 +60,7 @@ void SetUseReferenceImpl(int use) {
- 
- #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
- #define HAS_SCALEROWDOWN2_NEON
--void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
-+void ScaleRowDown2_NEON(const uint8* src_ptr, int  src_stride,
-                         uint8* dst, int dst_width) {
-   asm volatile (
-     "1:                                        \n"
-@@ -102,7 +102,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
- }
- 
- #define HAS_SCALEROWDOWN4_NEON
--static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
-+static void ScaleRowDown4_NEON(const uint8* src_ptr, int src_stride,
-                                uint8* dst_ptr, int dst_width) {
-   asm volatile (
-     "1:                                        \n"
-@@ -160,7 +160,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
- // Down scale from 4 to 3 pixels.  Use the neon multilane read/write
- //  to load up the every 4th pixel into a 4 different registers.
- // Point samples 32 pixels to 24 pixels.
--static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
-+static void ScaleRowDown34_NEON(const uint8* src_ptr, int src_stride,
-                                 uint8* dst_ptr, int dst_width) {
-   asm volatile (
-     "1:                                        \n"
-@@ -284,7 +284,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) =
-     65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
- 
- // 32 -> 12
--static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
-+static void ScaleRowDown38_NEON(const uint8* src_ptr, int src_stride,
-                                 uint8* dst_ptr, int dst_width) {
-   asm volatile (
-     "vld1.u8      {q3}, [%3]                   \n"
-diff --git a/tools/ftfy.sh b/tools/ftfy.sh
-index 95fd397..c5cfdea 100755
---- a/tools/ftfy.sh
-+++ b/tools/ftfy.sh
-@@ -34,7 +34,7 @@ vpx_style() {
-          --align-pointer=name \
-          --indent-preprocessor --convert-tabs --indent-labels \
-          --suffix=none --quiet "$@"
--  sed -i 's/[[:space:]]\{1,\},/,/g' "$@"
-+  sed -i "" 's/[[:space:]]\{1,\},/,/g' "$@"
- }
- 
- 
-diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
-index d58e49c..8af9e90 100644
---- a/vp8/common/alloccommon.c
-+++ b/vp8/common/alloccommon.c
-@@ -17,23 +17,6 @@
- #include "entropymode.h"
- #include "systemdependent.h"
- 
--
--extern  void vp8_init_scan_order_mask();
--
--static void update_mode_info_border(MODE_INFO *mi, int rows, int cols)
--{
--    int i;
--    vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
--
--    for (i = 0; i < rows; i++)
--    {
--        /* TODO(holmer): Bug? This updates the last element of each row
--         * rather than the border element!
--         */
--        vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO));
--    }
--}
--
- void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
- {
-     int i;
-@@ -45,16 +28,20 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
-     vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
-     if (oci->post_proc_buffer_int_used)
-         vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
-+
-+    vpx_free(oci->pp_limits_buffer);
-+    oci->pp_limits_buffer = NULL;
- #endif
- 
-     vpx_free(oci->above_context);
-     vpx_free(oci->mip);
-+#if CONFIG_ERROR_CONCEALMENT
-     vpx_free(oci->prev_mip);
-+    oci->prev_mip = NULL;
-+#endif
- 
--    oci->above_context = 0;
--    oci->mip = 0;
--    oci->prev_mip = 0;
--
-+    oci->above_context = NULL;
-+    oci->mip = NULL;
- }
- 
- int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
-@@ -76,10 +63,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
-         oci->fb_idx_ref_cnt[i] = 0;
-         oci->yv12_fb[i].flags = 0;
-         if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
--        {
--            vp8_de_alloc_frame_buffers(oci);
--            return 1;
--        }
-+            goto allocation_fail;
-     }
- 
-     oci->new_fb_idx = 0;
-@@ -93,22 +77,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
-     oci->fb_idx_ref_cnt[3] = 1;
- 
-     if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame,   width, 16, VP8BORDERINPIXELS) < 0)
--    {
--        vp8_de_alloc_frame_buffers(oci);
--        return 1;
--    }
--
--#if CONFIG_POSTPROC
--    if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
--    {
--        vp8_de_alloc_frame_buffers(oci);
--        return 1;
--    }
--
--    oci->post_proc_buffer_int_used = 0;
--    vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
--    vpx_memset((&oci->post_proc_buffer)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
--#endif
-+        goto allocation_fail;
- 
-     oci->mb_rows = height >> 4;
-     oci->mb_cols = width >> 4;
-@@ -117,44 +86,43 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
-     oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
- 
-     if (!oci->mip)
--    {
--        vp8_de_alloc_frame_buffers(oci);
--        return 1;
--    }
-+        goto allocation_fail;
- 
-     oci->mi = oci->mip + oci->mode_info_stride + 1;
- 
--    /* allocate memory for last frame MODE_INFO array */
--#if CONFIG_ERROR_CONCEALMENT
--    oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
--
--    if (!oci->prev_mip)
--    {
--        vp8_de_alloc_frame_buffers(oci);
--        return 1;
--    }
--
--    oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1;
--#else
--    oci->prev_mip = NULL;
--    oci->prev_mi = NULL;
--#endif
-+    /* Allocation of previous mode info will be done in vp8_decode_frame()
-+     * as it is a decoder only data */
- 
-     oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
- 
-     if (!oci->above_context)
--    {
--        vp8_de_alloc_frame_buffers(oci);
--        return 1;
--    }
-+        goto allocation_fail;
- 
--    update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
--#if CONFIG_ERROR_CONCEALMENT
--    update_mode_info_border(oci->prev_mi, oci->mb_rows, oci->mb_cols);
-+#if CONFIG_POSTPROC
-+    if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
-+        goto allocation_fail;
-+
-+    oci->post_proc_buffer_int_used = 0;
-+    vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
-+    vpx_memset(oci->post_proc_buffer.buffer_alloc, 128,
-+               oci->post_proc_buffer.frame_size);
-+
-+    /* Allocate buffer to store post-processing filter coefficients.
-+     *
-+     * Note: Round up mb_cols to support SIMD reads
-+     */
-+    oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
-+    if (!oci->pp_limits_buffer)
-+        goto allocation_fail;
- #endif
- 
-     return 0;
-+
-+allocation_fail:
-+    vp8_de_alloc_frame_buffers(oci);
-+    return 1;
- }
-+
- void vp8_setup_version(VP8_COMMON *cm)
- {
-     switch (cm->version)
-diff --git a/vp8/common/arm/armv6/intra4x4_predict_v6.asm b/vp8/common/arm/armv6/intra4x4_predict_v6.asm
-index a974cd1..c5ec824 100644
---- a/vp8/common/arm/armv6/intra4x4_predict_v6.asm
-+++ b/vp8/common/arm/armv6/intra4x4_predict_v6.asm
-@@ -18,15 +18,23 @@
-     AREA ||.text||, CODE, READONLY, ALIGN=2
- 
- 
--;void vp8_intra4x4_predict(unsigned char *src, int src_stride, int b_mode,
--;                          unsigned char *dst, int dst_stride)
--
-+;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft,
-+;                                B_PREDICTION_MODE left_stride, int b_mode,
-+;                                unsigned char *dst, int dst_stride,
-+;                                unsigned char top_left)
-+
-+; r0: *Above
-+; r1: *yleft
-+; r2: left_stride
-+; r3: b_mode
-+; sp + #40: dst
-+; sp + #44: dst_stride
-+; sp + #48: top_left
- |vp8_intra4x4_predict_armv6| PROC
-     push        {r4-r12, lr}
- 
--
--    cmp         r2, #10
--    addlt       pc, pc, r2, lsl #2       ; position independent switch
-+    cmp         r3, #10
-+    addlt       pc, pc, r3, lsl #2       ; position independent switch
-     pop         {r4-r12, pc}             ; default
-     b           b_dc_pred
-     b           b_tm_pred
-@@ -41,13 +49,13 @@
- 
- b_dc_pred
-     ; load values
--    ldr         r8, [r0, -r1]            ; Above
--    ldrb        r4, [r0, #-1]!           ; Left[0]
-+    ldr         r8, [r0]                 ; Above
-+    ldrb        r4, [r1], r2             ; Left[0]
-     mov         r9, #0
--    ldrb        r5, [r0, r1]             ; Left[1]
--    ldrb        r6, [r0, r1, lsl #1]!    ; Left[2]
-+    ldrb        r5, [r1], r2             ; Left[1]
-+    ldrb        r6, [r1], r2             ; Left[2]
-     usad8       r12, r8, r9
--    ldrb        r7, [r0, r1]             ; Left[3]
-+    ldrb        r7, [r1]                 ; Left[3]
- 
-     ; calculate dc
-     add         r4, r4, r5
-@@ -55,31 +63,30 @@ b_dc_pred
-     add         r4, r4, r7
-     add         r4, r4, r12
-     add         r4, r4, #4
--    ldr         r0, [sp, #40]           ; load stride
-+    ldr         r0, [sp, #44]           ; dst_stride
-     mov         r12, r4, asr #3         ; (expected_dc + 4) >> 3
- 
-     add         r12, r12, r12, lsl #8
--    add         r3, r3, r0
-+    ldr         r3, [sp, #40]           ; dst
-     add         r12, r12, r12, lsl #16
- 
-     ; store values
--    str         r12, [r3, -r0]
-+    str         r12, [r3], r0
-+    str         r12, [r3], r0
-+    str         r12, [r3], r0
-     str         r12, [r3]
--    str         r12, [r3, r0]
--    str         r12, [r3, r0, lsl #1]
- 
-     pop        {r4-r12, pc}
- 
- b_tm_pred
--    sub         r10, r0, #1             ; Left
--    ldr         r8, [r0, -r1]           ; Above
--    ldrb        r9, [r10, -r1]          ; top_left
--    ldrb        r4, [r0, #-1]!          ; Left[0]
--    ldrb        r5, [r10, r1]!          ; Left[1]
--    ldrb        r6, [r0, r1, lsl #1]    ; Left[2]
--    ldrb        r7, [r10, r1, lsl #1]   ; Left[3]
--    ldr         r0, [sp, #40]           ; load stride
--
-+    ldr         r8, [r0]                ; Above
-+    ldrb        r9, [sp, #48]           ; top_left
-+    ldrb        r4, [r1], r2            ; Left[0]
-+    ldrb        r5, [r1], r2            ; Left[1]
-+    ldrb        r6, [r1], r2            ; Left[2]
-+    ldrb        r7, [r1]                ; Left[3]
-+    ldr         r0, [sp, #44]           ; dst_stride
-+    ldr         r3, [sp, #40]           ; dst
- 
-     add         r9, r9, r9, lsl #16     ; [tl|tl]
-     uxtb16      r10, r8                 ; a[2|0]
-@@ -126,25 +133,26 @@ b_tm_pred
-     str         r12, [r3], r0
- 
-     add         r12, r4, r5, lsl #8     ; [3|2|1|0]
--    str         r12, [r3], r0
-+    str         r12, [r3]
- 
-     pop        {r4-r12, pc}
- 
- b_ve_pred
--    ldr         r8, [r0, -r1]!          ; a[3|2|1|0]
-+    ldr         r8, [r0]                ; a[3|2|1|0]
-     ldr         r11, c00FF00FF
--    ldrb        r9, [r0, #-1]           ; top_left
-+    ldrb        r9, [sp, #48]           ; top_left
-     ldrb        r10, [r0, #4]           ; a[4]
- 
-     ldr         r0, c00020002
- 
-     uxtb16      r4, r8                  ; a[2|0]
-     uxtb16      r5, r8, ror #8          ; a[3|1]
--    ldr         r2, [sp, #40]           ; stride
-+    ldr         r2, [sp, #44]           ; dst_stride
-     pkhbt       r9, r9, r5, lsl #16     ; a[1|-1]
- 
-     add         r9, r9, r4, lsl #1      ;[a[1]+2*a[2]       | tl+2*a[0]       ]
-     uxtab16     r9, r9, r5              ;[a[1]+2*a[2]+a[3]  | tl+2*a[0]+a[1]  ]
-+    ldr         r3, [sp, #40]           ; dst
-     uxtab16     r9, r9, r0              ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2]
- 
-     add         r0, r0, r10, lsl #16    ;[a[4]+2            |                 2]
-@@ -154,25 +162,23 @@ b_ve_pred
- 
-     and         r9, r11, r9, asr #2
-     and         r4, r11, r4, asr #2
--    add         r3, r3, r2              ; dst + dst_stride
-     add         r9, r9, r4, lsl #8
- 
-     ; store values
--    str         r9, [r3, -r2]
-+    str         r9, [r3], r2
-+    str         r9, [r3], r2
-+    str         r9, [r3], r2
-     str         r9, [r3]
--    str         r9, [r3, r2]
--    str         r9, [r3, r2, lsl #1]
- 
-     pop        {r4-r12, pc}
- 
- 
- b_he_pred
--    sub         r10, r0, #1             ; Left
--    ldrb        r4, [r0, #-1]!          ; Left[0]
--    ldrb        r8, [r10, -r1]          ; top_left
--    ldrb        r5, [r10, r1]!          ; Left[1]
--    ldrb        r6, [r0, r1, lsl #1]    ; Left[2]
--    ldrb        r7, [r10, r1, lsl #1]   ; Left[3]
-+    ldrb        r4, [r1], r2            ; Left[0]
-+    ldrb        r8, [sp, #48]           ; top_left
-+    ldrb        r5, [r1], r2            ; Left[1]
-+    ldrb        r6, [r1], r2            ; Left[2]
-+    ldrb        r7, [r1]                ; Left[3]
- 
-     add         r8, r8, r4              ; tl   + l[0]
-     add         r9, r4, r5              ; l[0] + l[1]
-@@ -197,7 +203,8 @@ b_he_pred
-     pkhtb       r10, r10, r10, asr #16  ; l[-|2|-|2]
-     pkhtb       r11, r11, r11, asr #16  ; l[-|3|-|3]
- 
--    ldr         r0, [sp, #40]           ; stride
-+    ldr         r0, [sp, #44]           ; dst_stride
-+    ldr         r3, [sp, #40]           ; dst
- 
-     add         r8, r8, r8, lsl #8      ; l[0|0|0|0]
-     add         r9, r9, r9, lsl #8      ; l[1|1|1|1]
-@@ -206,16 +213,16 @@ b_he_pred
- 
-     ; store values
-     str         r8, [r3], r0
--    str         r9, [r3]
--    str         r10, [r3, r0]
--    str         r11, [r3, r0, lsl #1]
-+    str         r9, [r3], r0
-+    str         r10, [r3], r0
-+    str         r11, [r3]
- 
-     pop        {r4-r12, pc}
- 
- b_ld_pred
--    ldr         r4, [r0, -r1]!          ; Above
-+    ldr         r4, [r0]                ; Above[0-3]
-     ldr         r12, c00020002
--    ldr         r5, [r0, #4]
-+    ldr         r5, [r0, #4]            ; Above[4-7]
-     ldr         lr,  c00FF00FF
- 
-     uxtb16      r6, r4                  ; a[2|0]
-@@ -225,7 +232,6 @@ b_ld_pred
-     pkhtb       r10, r6, r8             ; a[2|4]
-     pkhtb       r11, r7, r9             ; a[3|5]
- 
--
-     add         r4, r6, r7, lsl #1      ; [a2+2*a3      |      a0+2*a1]
-     add         r4, r4, r10, ror #16    ; [a2+2*a3+a4   |   a0+2*a1+a2]
-     uxtab16     r4, r4, r12             ; [a2+2*a3+a4+2 | a0+2*a1+a2+2]
-@@ -244,7 +250,8 @@ b_ld_pred
-     add         r7, r7, r9, asr #16     ; [                 a5+2*a6+a7]
-     uxtah       r7, r7, r12             ; [               a5+2*a6+a7+2]
- 
--    ldr         r0, [sp, #40]           ; stride
-+    ldr         r0, [sp, #44]           ; dst_stride
-+    ldr         r3, [sp, #40]           ; dst
- 
-     ; scale down
-     and         r4, lr, r4, asr #2
-@@ -266,18 +273,17 @@ b_ld_pred
-     mov         r6, r6, lsr #16
-     mov         r11, r10, lsr #8
-     add         r11, r11, r6, lsl #24   ; [6|5|4|3]
--    str         r11, [r3], r0
-+    str         r11, [r3]
- 
-     pop        {r4-r12, pc}
- 
- b_rd_pred
--    sub         r12, r0, r1             ; Above = src - src_stride
--    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
--    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
--    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
--    ldrb        r6, [r12, r1, lsl #1]   ; l[1] = pp[2]
--    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
--    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
-+    ldrb        r7, [r1], r2            ; l[0] = pp[3]
-+    ldr         lr, [r0]                ; Above = pp[8|7|6|5]
-+    ldrb        r8, [sp, #48]           ; tl   = pp[4]
-+    ldrb        r6, [r1], r2            ; l[1] = pp[2]
-+    ldrb        r5, [r1], r2            ; l[2] = pp[1]
-+    ldrb        r4, [r1], r2            ; l[3] = pp[0]
- 
- 
-     uxtb16      r9, lr                  ; p[7|5]
-@@ -307,7 +313,8 @@ b_rd_pred
-     add         r7, r7, r10             ; [p6+2*p7+p8   |   p4+2*p5+p6]
-     uxtab16     r7, r7, r12             ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
- 
--    ldr         r0, [sp, #40]           ; stride
-+    ldr         r0, [sp, #44]           ; dst_stride
-+    ldr         r3, [sp, #40]           ; dst
- 
-     ; scale down
-     and         r7, lr, r7, asr #2
-@@ -328,18 +335,17 @@ b_rd_pred
- 
-     mov         r11, r10, lsl #8        ; [3|2|1|-]
-     uxtab       r11, r11, r4            ; [3|2|1|0]
--    str         r11, [r3], r0
-+    str         r11, [r3]
- 
-     pop        {r4-r12, pc}
- 
- b_vr_pred
--    sub         r12, r0, r1             ; Above = src - src_stride
--    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
--    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
--    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
--    ldrb        r6, [r12, r1, lsl #1]   ; l[1] = pp[2]
--    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
--    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
-+    ldrb        r7, [r1], r2            ; l[0] = pp[3]
-+    ldr         lr, [r0]                ; Above = pp[8|7|6|5]
-+    ldrb        r8, [sp, #48]           ; tl   = pp[4]
-+    ldrb        r6, [r1], r2            ; l[1] = pp[2]
-+    ldrb        r5, [r1], r2            ; l[2] = pp[1]
-+    ldrb        r4, [r1]                ; l[3] = pp[0]
- 
-     add         r5, r5, r7, lsl #16     ; p[3|1]
-     add         r6, r6, r8, lsl #16     ; p[4|2]
-@@ -376,7 +382,8 @@ b_vr_pred
-     add         r8, r8, r10             ; [p6+2*p7+p8   |   p4+2*p5+p6]
-     uxtab16     r8, r8, r12             ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
- 
--    ldr         r0, [sp, #40]           ; stride
-+    ldr         r0, [sp, #44]           ; dst_stride
-+    ldr         r3, [sp, #40]           ; dst
- 
-     ; scale down
-     and         r5, lr, r5, asr #2      ; [B|A]
-@@ -397,14 +404,14 @@ b_vr_pred
-     pkhtb       r10, r7, r5, asr #16    ; [-|H|-|B]
-     str         r2, [r3], r0
-     add         r12, r12, r10, lsl #8   ; [H|D|B|A]
--    str         r12, [r3], r0
-+    str         r12, [r3]
- 
-     pop        {r4-r12, pc}
- 
- b_vl_pred
--    ldr         r4, [r0, -r1]!          ; [3|2|1|0]
-+    ldr         r4, [r0]                ; [3|2|1|0] = Above[0-3]
-     ldr         r12, c00020002
--    ldr         r5, [r0, #4]            ; [7|6|5|4]
-+    ldr         r5, [r0, #4]            ; [7|6|5|4] = Above[4-7]
-     ldr         lr,  c00FF00FF
-     ldr         r2,  c00010001
- 
-@@ -441,7 +448,8 @@ b_vl_pred
-     add         r9, r9, r11             ; [p5+2*p6+p7   |   p3+2*p4+p5]
-     uxtab16     r9, r9, r12             ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
- 
--    ldr         r0, [sp, #40]           ; stride
-+    ldr         r0, [sp, #44]           ; dst_stride
-+    ldr         r3, [sp, #40]           ; dst
- 
-     ; scale down
-     and         r5, lr, r5, asr #2      ; [D|C]
-@@ -449,7 +457,6 @@ b_vl_pred
-     and         r8, lr, r8, asr #2      ; [I|D]
-     and         r9, lr, r9, asr #2      ; [J|H]
- 
--
-     add         r10, r4, r6, lsl #8     ; [F|B|E|A]
-     str         r10, [r3], r0
- 
-@@ -463,18 +470,17 @@ b_vl_pred
-     str         r12, [r3], r0
- 
-     add         r10, r7, r10, lsl #8    ; [J|H|D|G]
--    str         r10, [r3], r0
-+    str         r10, [r3]
- 
-     pop        {r4-r12, pc}
- 
- b_hd_pred
--    sub         r12, r0, r1             ; Above = src - src_stride
--    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
--    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
--    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
--    ldrb        r6, [r0, r1]            ; l[1] = pp[2]
--    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
--    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
-+    ldrb        r7, [r1], r2            ; l[0] = pp[3]
-+    ldr         lr, [r0]                ; Above = pp[8|7|6|5]
-+    ldrb        r8, [sp, #48]           ; tl   = pp[4]
-+    ldrb        r6, [r1], r2            ; l[1] = pp[2]
-+    ldrb        r5, [r1], r2            ; l[2] = pp[1]
-+    ldrb        r4, [r1]                ; l[3] = pp[0]
- 
-     uxtb16      r9, lr                  ; p[7|5]
-     uxtb16      r10, lr, ror #8         ; p[8|6]
-@@ -492,7 +498,6 @@ b_hd_pred
-     pkhtb       r1, r9, r10             ; p[7|6]
-     pkhbt       r10, r8, r10, lsl #16   ; p[6|5]
- 
--
-     uadd16      r11, r4, r5             ; [p1+p2        |        p0+p1]
-     uhadd16     r11, r11, r2            ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
-                                         ; [B|A]
-@@ -518,7 +523,8 @@ b_hd_pred
-     and         r5, lr, r5, asr #2      ; [H|G]
-     and         r6, lr, r6, asr #2      ; [J|I]
- 
--    ldr         lr, [sp, #40]           ; stride
-+    ldr         lr, [sp, #44]           ; dst_stride
-+    ldr         r3, [sp, #40]           ; dst
- 
-     pkhtb       r2, r0, r6              ; [-|F|-|I]
-     pkhtb       r12, r6, r5, asr #16    ; [-|J|-|H]
-@@ -527,7 +533,6 @@ b_hd_pred
-     mov         r12, r12, ror #24       ; [J|I|H|F]
-     str         r12, [r3], lr
- 
--
-     mov         r7, r11, asr #16        ; [-|-|-|B]
-     str         r2, [r3], lr
-     add         r7, r7, r0, lsl #16     ; [-|E|-|B]
-@@ -536,21 +541,20 @@ b_hd_pred
-     str         r7, [r3], lr
- 
-     add         r5, r11, r4, lsl #8     ; [D|B|C|A]
--    str         r5, [r3], lr
-+    str         r5, [r3]
- 
-     pop        {r4-r12, pc}
- 
- 
- 
- b_hu_pred
--    ldrb        r4, [r0, #-1]!          ; Left[0]
-+    ldrb        r4, [r1], r2            ; Left[0]
-     ldr         r12, c00020002
--    ldrb        r5, [r0, r1]!           ; Left[1]
-+    ldrb        r5, [r1], r2            ; Left[1]
-     ldr         lr,  c00FF00FF
--    ldrb        r6, [r0, r1]!           ; Left[2]
-+    ldrb        r6, [r1], r2            ; Left[2]
-     ldr         r2,  c00010001
--    ldrb        r7, [r0, r1]            ; Left[3]
--
-+    ldrb        r7, [r1]                ; Left[3]
- 
-     add         r4, r4, r5, lsl #16     ; [1|0]
-     add         r5, r5, r6, lsl #16     ; [2|1]
-@@ -563,7 +567,8 @@ b_hu_pred
-     add         r4, r4, r5, lsl #1      ; [p1+2*p2      |      p0+2*p1]
-     add         r4, r4, r9              ; [p1+2*p2+p3   |   p0+2*p1+p2]
-     uxtab16     r4, r4, r12             ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
--    ldr         r2, [sp, #40]           ; stride
-+    ldr         r2, [sp, #44]           ; dst_stride
-+    ldr         r3, [sp, #40]           ; dst
-     and         r4, lr, r4, asr #2      ; [D|C]
- 
-     add         r10, r6, r7             ; [p2+p3]
-@@ -587,9 +592,9 @@ b_hu_pred
- 
-     add         r10, r11, lsl #8        ; [-|-|F|E]
-     add         r10, r10, r9, lsl #16   ; [G|G|F|E]
--    str         r10, [r3]
-+    str         r10, [r3], r2
- 
--    str         r7, [r3, r2]
-+    str         r7, [r3]
- 
-     pop        {r4-r12, pc}
- 
-diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
-index 65a4680..79ff02c 100644
---- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm
-+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
-@@ -46,7 +46,7 @@
-     vst1.32         {d2[1]}, [r3], r12
-     vst1.32         {d4[0]}, [r3], r12
-     vst1.32         {d4[1]}, [r3]
-- 
-+
-     bx              lr
- 
-     ENDP
-diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
-index a4c1d92..f7ff577 100644
---- a/vp8/common/blockd.h
-+++ b/vp8/common/blockd.h
-@@ -161,22 +161,32 @@ typedef struct
-     uint8_t segment_id;                  /* Which set of segmentation parameters should be used for this MB */
- } MB_MODE_INFO;
- 
--typedef struct
-+typedef struct modeinfo
- {
-     MB_MODE_INFO mbmi;
-     union b_mode_info bmi[16];
- } MODE_INFO;
- 
- #if CONFIG_MULTI_RES_ENCODING
--/* The information needed to be stored for higher-resolution encoder */
-+/* The mb-level information needed to be stored for higher-resolution encoder */
- typedef struct
- {
-     MB_PREDICTION_MODE mode;
-     MV_REFERENCE_FRAME ref_frame;
-     int_mv mv;
--    //union b_mode_info bmi[16];
--    int dissim;    // dissimilarity level of the macroblock
--} LOWER_RES_INFO;
-+    int dissim;    /* dissimilarity level of the macroblock */
-+} LOWER_RES_MB_INFO;
-+
-+/* The frame-level information needed to be stored for higher-resolution
-+ *  encoder */
-+typedef struct
-+{
-+    FRAME_TYPE frame_type;
-+    int is_frame_dropped;
-+    /* The frame number of each reference frames */
-+    unsigned int low_res_ref_frames[MAX_REF_FRAMES];
-+    LOWER_RES_MB_INFO *mb_info;
-+} LOWER_RES_FRAME_INFO;
- #endif
- 
- typedef struct blockd
-@@ -216,12 +226,6 @@ typedef struct macroblockd
-     MODE_INFO *mode_info_context;
-     int mode_info_stride;
- 
--#if CONFIG_TEMPORAL_DENOISING
--    MB_PREDICTION_MODE best_sse_inter_mode;
--    int_mv best_sse_mv;
--    unsigned char need_to_clamp_best_mvs;
--#endif
--
-     FRAME_TYPE frame_type;
- 
-     int up_available;
-diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
-index a95a923..8c046a4 100644
---- a/vp8/common/entropy.c
-+++ b/vp8/common/entropy.c
-@@ -101,7 +101,7 @@ const vp8_tree_index vp8_coef_tree[ 22] =     /* corresponding _CONTEXT_NODEs */
- /* vp8_coef_encodings generated with:
-     vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree);
- */
--const vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
-+vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
- {
-     {2, 2},
-     {6, 3},
-diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c
-index de7e828..091e4c7 100644
---- a/vp8/common/entropymode.c
-+++ b/vp8/common/entropymode.c
-@@ -160,9 +160,7 @@ const vp8_tree_index vp8_small_mvtree [14] =
- void vp8_init_mbmode_probs(VP8_COMMON *x)
- {
-     vpx_memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
--    vpx_memcpy(x->kf_ymode_prob, vp8_kf_ymode_prob, sizeof(vp8_kf_ymode_prob));
-     vpx_memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
--    vpx_memcpy(x->kf_uv_mode_prob, vp8_kf_uv_mode_prob, sizeof(vp8_kf_uv_mode_prob));
-     vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
- }
- 
-@@ -171,7 +169,3 @@ void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
-     vpx_memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
- }
- 
--void vp8_kf_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1])
--{
--    vpx_memcpy(p, vp8_kf_bmode_prob, sizeof(vp8_kf_bmode_prob));
--}
-diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
-index 70200cb..1df0f64 100644
---- a/vp8/common/entropymode.h
-+++ b/vp8/common/entropymode.h
-@@ -24,11 +24,11 @@ typedef enum
-     SUBMVREF_LEFT_ABOVE_ZED
- } sumvfref_t;
- 
--typedef const int vp8_mbsplit[16];
-+typedef int vp8_mbsplit[16];
- 
- #define VP8_NUMMBSPLITS 4
- 
--extern vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
-+extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
- 
- extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS];    /* # of subsets */
- 
-@@ -67,9 +67,14 @@ extern const vp8_tree_index vp8_small_mvtree[];
- 
- extern const struct vp8_token_struct vp8_small_mvencodings[8];
- 
--void vp8_init_mbmode_probs(VP8_COMMON *x);
-+/* Key frame default mode probs */
-+extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES]
-+[VP8_BINTRAMODES-1];
-+extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1];
-+extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1];
- 
--void   vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
-+void vp8_init_mbmode_probs(VP8_COMMON *x);
-+void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
- void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
- 
- #endif
-diff --git a/vp8/common/extend.c b/vp8/common/extend.c
-index 9089e16..c9bdd21 100644
---- a/vp8/common/extend.c
-+++ b/vp8/common/extend.c
-@@ -116,7 +116,7 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
-     int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
-     int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
- 
--    // If the side is not touching the bounder then don't extend.
-+    /* If the side is not touching the bounder then don't extend. */
-     if (srcy)
-       et = 0;
-     if (srcx)
-@@ -157,7 +157,10 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
- 
- 
- /* note the extension is only for the last row, for intra prediction purpose */
--void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
-+void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf,
-+                       unsigned char *YPtr,
-+                       unsigned char *UPtr,
-+                       unsigned char *VPtr)
- {
-     int i;
- 
-diff --git a/vp8/common/filter.h b/vp8/common/filter.h
-index 0f225c2..b7591f2 100644
---- a/vp8/common/filter.h
-+++ b/vp8/common/filter.h
-@@ -19,4 +19,4 @@
- extern const short vp8_bilinear_filters[8][2];
- extern const short vp8_sub_pel_filters[8][6];
- 
--#endif //FILTER_H
-+#endif
-diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
-index 2a30166..5a6ac7b 100644
---- a/vp8/common/generic/systemdependent.c
-+++ b/vp8/common/generic/systemdependent.c
-@@ -83,57 +83,6 @@ static int get_cpu_count()
- #endif
- 
- 
--#if HAVE_PTHREAD_H
--#include <pthread.h>
--static void once(void (*func)(void))
--{
--    static pthread_once_t lock = PTHREAD_ONCE_INIT;
--    pthread_once(&lock, func);
--}
--
--
--#elif defined(_WIN32)
--static void once(void (*func)(void))
--{
--    /* Using a static initializer here rather than InitializeCriticalSection()
--     * since there's no race-free context in which to execute it. Protecting
--     * it with an atomic op like InterlockedCompareExchangePointer introduces
--     * an x86 dependency, and InitOnceExecuteOnce requires Vista.
--     */
--    static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0};
--    static int done;
--
--    EnterCriticalSection(&lock);
--
--    if (!done)
--    {
--        func();
--        done = 1;
--    }
--
--    LeaveCriticalSection(&lock);
--}
--
--
--#else
--/* No-op version that performs no synchronization. vpx_rtcd() is idempotent,
-- * so as long as your platform provides atomic loads/stores of pointers
-- * no synchronization is strictly necessary.
-- */
--
--static void once(void (*func)(void))
--{
--    static int done;
--
--    if(!done)
--    {
--        func();
--        done = 1;
--    }
--}
--#endif
--
--
- void vp8_machine_specific_config(VP8_COMMON *ctx)
- {
- #if CONFIG_MULTITHREAD
-@@ -145,6 +94,4 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
- #elif ARCH_X86 || ARCH_X86_64
-     ctx->cpu_caps = x86_simd_caps();
- #endif
--
--    once(vpx_rtcd);
- }
-diff --git a/vp8/common/idctllm_test.cc b/vp8/common/idctllm_test.cc
-deleted file mode 100755
-index 0f6ebe7..0000000
---- a/vp8/common/idctllm_test.cc
-+++ /dev/null
-@@ -1,31 +0,0 @@
--/*
-- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-- *
-- *  Use of this source code is governed by a BSD-style license
-- *  that can be found in the LICENSE file in the root of the source
-- *  tree. An additional intellectual property rights grant can be found
-- *  in the file PATENTS.  All contributing project authors may
-- *  be found in the AUTHORS file in the root of the source tree.
-- */
--
--
-- extern "C" {
--    void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
--                            int pred_stride, unsigned char *dst_ptr,
--                            int dst_stride);
--}
--
--#include "vpx_config.h"
--#include "idctllm_test.h"
--namespace
--{
--
--INSTANTIATE_TEST_CASE_P(C, IDCTTest,
--                        ::testing::Values(vp8_short_idct4x4llm_c));
--
--} // namespace
--
--int main(int argc, char **argv) {
--  ::testing::InitGoogleTest(&argc, argv);
--  return RUN_ALL_TESTS();
--}
-diff --git a/vp8/common/idctllm_test.h b/vp8/common/idctllm_test.h
-deleted file mode 100755
-index a6a694b..0000000
---- a/vp8/common/idctllm_test.h
-+++ /dev/null
-@@ -1,113 +0,0 @@
--/*
-- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-- *
-- *  Use of this source code is governed by a BSD-style license
-- *  that can be found in the LICENSE file in the root of the source
-- *  tree. An additional intellectual property rights grant can be found
-- *  in the file PATENTS.  All contributing project authors may
-- *  be found in the AUTHORS file in the root of the source tree.
-- */
--
--
-- #include "third_party/googletest/src/include/gtest/gtest.h"
--typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
--                            int pred_stride, unsigned char *dst_ptr,
--                            int dst_stride);
--namespace {
--class IDCTTest : public ::testing::TestWithParam<idct_fn_t>
--{
--  protected:
--    virtual void SetUp()
--    {
--        int i;
--
--        UUT = GetParam();
--        memset(input, 0, sizeof(input));
--        /* Set up guard blocks */
--        for(i=0; i<256; i++)
--            output[i] = ((i&0xF)<4&&(i<64))?0:-1;
--    }
--
--    idct_fn_t UUT;
--    short input[16];
--    unsigned char output[256];
--    unsigned char predict[256];
--};
--
--TEST_P(IDCTTest, TestGuardBlocks)
--{
--    int i;
--
--    for(i=0; i<256; i++)
--        if((i&0xF) < 4 && i<64)
--            EXPECT_EQ(0, output[i]) << i;
--        else
--            EXPECT_EQ(255, output[i]);
--}
--
--TEST_P(IDCTTest, TestAllZeros)
--{
--    int i;
--
--    UUT(input, output, 16, output, 16);
--
--    for(i=0; i<256; i++)
--        if((i&0xF) < 4 && i<64)
--            EXPECT_EQ(0, output[i]) << "i==" << i;
--        else
--            EXPECT_EQ(255, output[i]) << "i==" << i;
--}
--
--TEST_P(IDCTTest, TestAllOnes)
--{
--    int i;
--
--    input[0] = 4;
--    UUT(input, output, 16, output, 16);
--
--    for(i=0; i<256; i++)
--        if((i&0xF) < 4 && i<64)
--            EXPECT_EQ(1, output[i]) << "i==" << i;
--        else
--            EXPECT_EQ(255, output[i]) << "i==" << i;
--}
--
--TEST_P(IDCTTest, TestAddOne)
--{
--    int i;
--
--    for(i=0; i<256; i++)
--        predict[i] = i;
--
--    input[0] = 4;
--    UUT(input, predict, 16, output, 16);
--
--    for(i=0; i<256; i++)
--        if((i&0xF) < 4 && i<64)
--            EXPECT_EQ(i+1, output[i]) << "i==" << i;
--        else
--            EXPECT_EQ(255, output[i]) << "i==" << i;
--}
--
--TEST_P(IDCTTest, TestWithData)
--{
--    int i;
--
--    for(i=0; i<16; i++)
--        input[i] = i;
--
--    UUT(input, output, 16, output, 16);
--
--    for(i=0; i<256; i++)
--        if((i&0xF) > 3 || i>63)
--            EXPECT_EQ(255, output[i]) << "i==" << i;
--        else if(i == 0)
--            EXPECT_EQ(11, output[i]) << "i==" << i;
--        else if(i == 34)
--            EXPECT_EQ(1, output[i]) << "i==" << i;
--        else if(i == 2 || i == 17 || i == 32)
--            EXPECT_EQ(3, output[i]) << "i==" << i;
--        else
--            EXPECT_EQ(0, output[i]) << "i==" << i;
--}
--}
-diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
-index 3f05efe..41b4f12 100644
---- a/vp8/common/loopfilter.c
-+++ b/vp8/common/loopfilter.c
-@@ -196,18 +196,122 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
-     }
- }
- 
--void vp8_loop_filter_frame
--(
--    VP8_COMMON *cm,
--    MACROBLOCKD *mbd
--)
-+
-+void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context,
-+                         int mb_row, int post_ystride, int post_uvstride,
-+                         unsigned char *y_ptr, unsigned char *u_ptr,
-+                         unsigned char *v_ptr)
- {
--    YV12_BUFFER_CONFIG *post = cm->frame_to_show;
-+    int mb_col;
-+    int filter_level;
-     loop_filter_info_n *lfi_n = &cm->lf_info;
-     loop_filter_info lfi;
--
-     FRAME_TYPE frame_type = cm->frame_type;
- 
-+    for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-+    {
-+        int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
-+                        mode_info_context->mbmi.mode != SPLITMV &&
-+                        mode_info_context->mbmi.mb_skip_coeff);
-+
-+        const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
-+        const int seg = mode_info_context->mbmi.segment_id;
-+        const int ref_frame = mode_info_context->mbmi.ref_frame;
-+
-+        filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
-+
-+        if (filter_level)
-+        {
-+            const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
-+            lfi.mblim = lfi_n->mblim[filter_level];
-+            lfi.blim = lfi_n->blim[filter_level];
-+            lfi.lim = lfi_n->lim[filter_level];
-+            lfi.hev_thr = lfi_n->hev_thr[hev_index];
-+
-+            if (mb_col > 0)
-+                vp8_loop_filter_mbv
-+                (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
-+
-+            if (!skip_lf)
-+                vp8_loop_filter_bv
-+                (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
-+
-+            /* don't apply across umv border */
-+            if (mb_row > 0)
-+                vp8_loop_filter_mbh
-+                (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
-+
-+            if (!skip_lf)
-+                vp8_loop_filter_bh
-+                (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
-+        }
-+
-+        y_ptr += 16;
-+        u_ptr += 8;
-+        v_ptr += 8;
-+
-+        mode_info_context++;     /* step to next MB */
-+    }
-+
-+}
-+
-+void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context,
-+                         int mb_row, int post_ystride, int post_uvstride,
-+                         unsigned char *y_ptr, unsigned char *u_ptr,
-+                         unsigned char *v_ptr)
-+{
-+    int mb_col;
-+    int filter_level;
-+    loop_filter_info_n *lfi_n = &cm->lf_info;
-+
-+    for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-+    {
-+        int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
-+                        mode_info_context->mbmi.mode != SPLITMV &&
-+                        mode_info_context->mbmi.mb_skip_coeff);
-+
-+        const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
-+        const int seg = mode_info_context->mbmi.segment_id;
-+        const int ref_frame = mode_info_context->mbmi.ref_frame;
-+
-+        filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
-+
-+        if (filter_level)
-+        {
-+            if (mb_col > 0)
-+                vp8_loop_filter_simple_mbv
-+                (y_ptr, post_ystride, lfi_n->mblim[filter_level]);
-+
-+            if (!skip_lf)
-+                vp8_loop_filter_simple_bv
-+                (y_ptr, post_ystride, lfi_n->blim[filter_level]);
-+
-+            /* don't apply across umv border */
-+            if (mb_row > 0)
-+                vp8_loop_filter_simple_mbh
-+                (y_ptr, post_ystride, lfi_n->mblim[filter_level]);
-+
-+            if (!skip_lf)
-+                vp8_loop_filter_simple_bh
-+                (y_ptr, post_ystride, lfi_n->blim[filter_level]);
-+        }
-+
-+        y_ptr += 16;
-+        u_ptr += 8;
-+        v_ptr += 8;
-+
-+        mode_info_context++;     /* step to next MB */
-+    }
-+
-+}
-+void vp8_loop_filter_frame(VP8_COMMON *cm,
-+                           MACROBLOCKD *mbd,
-+                           int frame_type)
-+{
-+    YV12_BUFFER_CONFIG *post = cm->frame_to_show;
-+    loop_filter_info_n *lfi_n = &cm->lf_info;
-+    loop_filter_info lfi;
-+
-     int mb_row;
-     int mb_col;
-     int mb_rows = cm->mb_rows;
-diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
-index 0fa8375..b3af2d6 100644
---- a/vp8/common/loopfilter.h
-+++ b/vp8/common/loopfilter.h
-@@ -69,6 +69,7 @@ typedef void loop_filter_uvfunction
- /* assorted loopfilter functions which get used elsewhere */
- struct VP8Common;
- struct macroblockd;
-+struct modeinfo;
- 
- void vp8_loop_filter_init(struct VP8Common *cm);
- 
-@@ -76,7 +77,8 @@ void vp8_loop_filter_frame_init(struct VP8Common *cm,
-                                 struct macroblockd *mbd,
-                                 int default_filt_lvl);
- 
--void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd);
-+void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd,
-+                           int frame_type);
- 
- void vp8_loop_filter_partial_frame(struct VP8Common *cm,
-                                    struct macroblockd *mbd,
-@@ -89,4 +91,15 @@ void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
- void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
-                                       int sharpness_lvl);
- 
-+void vp8_loop_filter_row_normal(struct VP8Common *cm,
-+                                struct modeinfo *mode_info_context,
-+                                int mb_row, int post_ystride, int post_uvstride,
-+                                unsigned char *y_ptr, unsigned char *u_ptr,
-+                                unsigned char *v_ptr);
-+
-+void vp8_loop_filter_row_simple(struct VP8Common *cm,
-+                                struct modeinfo *mode_info_context,
-+                                int mb_row, int post_ystride, int post_uvstride,
-+                                unsigned char *y_ptr, unsigned char *u_ptr,
-+                                unsigned char *v_ptr);
- #endif
-diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c
-index ca67e91..3dff150 100644
---- a/vp8/common/mfqe.c
-+++ b/vp8/common/mfqe.c
-@@ -160,9 +160,9 @@ static void multiframe_quality_enhance_block
-         vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse));
-         vsad = (sse + 32)>>6;
- #else
--        sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, INT_MAX)+128)>>8;
--        usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, INT_MAX)+32)>>6;
--        vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, INT_MAX)+32)>>6;
-+        sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8;
-+        usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6;
-+        vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6;
- #endif
-     }
-     else /* if (blksize == 8) */
-@@ -177,16 +177,16 @@ static void multiframe_quality_enhance_block
-         vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse));
-         vsad = (sse + 8)>>4;
- #else
--        sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, INT_MAX)+32)>>6;
--        usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, INT_MAX)+8)>>4;
--        vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, INT_MAX)+8)>>4;
-+        sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6;
-+        usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4;
-+        vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4;
- #endif
-     }
- 
-     actrisk = (actd > act * 5);
- 
--    /* thr = qdiff/8 + log2(act) + log4(qprev) */
--    thr = (qdiff >> 3);
-+    /* thr = qdiff/16 + log2(act) + log4(qprev) */
-+    thr = (qdiff >> 4);
-     while (actd >>= 1) thr++;
-     while (qprev >>= 2) thr++;
- 
-diff --git a/vp8/common/mips/dspr2/dequantize_dspr2.c b/vp8/common/mips/dspr2/dequantize_dspr2.c
-new file mode 100644
-index 0000000..6823325
---- /dev/null
-+++ b/vp8/common/mips/dspr2/dequantize_dspr2.c
-@@ -0,0 +1,33 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+
-+#include "vpx_config.h"
-+#include "vpx_rtcd.h"
-+#include "vpx_mem/vpx_mem.h"
-+
-+#if HAVE_DSPR2
-+void vp8_dequant_idct_add_dspr2(short *input, short *dq,
-+                                unsigned char *dest, int stride)
-+{
-+    int i;
-+
-+    for (i = 0; i < 16; i++)
-+    {
-+        input[i] = dq[i] * input[i];
-+    }
-+
-+    vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride);
-+
-+    vpx_memset(input, 0, 32);
-+
-+}
-+
-+#endif
-diff --git a/vp8/common/mips/dspr2/filter_dspr2.c b/vp8/common/mips/dspr2/filter_dspr2.c
-new file mode 100644
-index 0000000..71fdcd7
---- /dev/null
-+++ b/vp8/common/mips/dspr2/filter_dspr2.c
-@@ -0,0 +1,2823 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+
-+#include <stdlib.h>
-+#include "vpx_rtcd.h"
-+#include "vpx_ports/mem.h"
-+
-+#if HAVE_DSPR2
-+#define CROP_WIDTH 256
-+unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH];
-+
-+static const unsigned short sub_pel_filterss[8][3] =
-+{
-+    {      0,      0,      0},
-+    {      0, 0x0601, 0x7b0c},
-+    { 0x0201, 0x0b08, 0x6c24},
-+    {      0, 0x0906, 0x5d32},
-+    { 0x0303, 0x1010, 0x4d4d},
-+    {      0, 0x0609, 0x325d},
-+    { 0x0102, 0x080b, 0x246c},
-+    {      0, 0x0106, 0x0c7b},
-+};
-+
-+
-+static const int sub_pel_filters_int[8][3] =
-+{
-+    {          0,          0,          0},
-+    { 0x0000fffa, 0x007b000c, 0xffff0000},
-+    { 0x0002fff5, 0x006c0024, 0xfff80001},
-+    { 0x0000fff7, 0x005d0032, 0xfffa0000},
-+    { 0x0003fff0, 0x004d004d, 0xfff00003},
-+    { 0x0000fffa, 0x0032005d, 0xfff70000},
-+    { 0x0001fff8, 0x0024006c, 0xfff50002},
-+    { 0x0000ffff, 0x000c007b, 0xfffa0000},
-+};
-+
-+
-+static const int sub_pel_filters_inv[8][3] =
-+{
-+    {          0,          0,          0},
-+    { 0xfffa0000, 0x000c007b, 0x0000ffff},
-+    { 0xfff50002, 0x0024006c, 0x0001fff8},
-+    { 0xfff70000, 0x0032005d, 0x0000fffa},
-+    { 0xfff00003, 0x004d004d, 0x0003fff0},
-+    { 0xfffa0000, 0x005d0032, 0x0000fff7},
-+    { 0xfff80001, 0x006c0024, 0x0002fff5},
-+    { 0xffff0000, 0x007b000c, 0x0000fffa},
-+};
-+
-+
-+static const int sub_pel_filters_int_tap_4[8][2] =
-+{
-+    {          0,          0},
-+    { 0xfffa007b, 0x000cffff},
-+    {          0,          0},
-+    { 0xfff7005d, 0x0032fffa},
-+    {          0,          0},
-+    { 0xfffa0032, 0x005dfff7},
-+    {          0,          0},
-+    { 0xffff000c, 0x007bfffa},
-+};
-+
-+
-+static const int sub_pel_filters_inv_tap_4[8][2] =
-+{
-+    {          0,          0},
-+    { 0x007bfffa, 0xffff000c},
-+    {          0,          0},
-+    { 0x005dfff7, 0xfffa0032},
-+    {          0,          0},
-+    { 0x0032fffa, 0xfff7005d},
-+    {          0,          0},
-+    { 0x000cffff, 0xfffa007b},
-+};
-+
-+inline void prefetch_load(unsigned char *src)
-+{
-+    __asm__ __volatile__ (
-+        "pref   0,  0(%[src])   \n\t"
-+        :
-+        : [src] "r" (src)
-+    );
-+}
-+
-+
-+inline void prefetch_store(unsigned char *dst)
-+{
-+    __asm__ __volatile__ (
-+        "pref   1,  0(%[dst])   \n\t"
-+        :
-+        : [dst] "r" (dst)
-+    );
-+}
-+
-+void dsputil_static_init(void)
-+{
-+    int i;
-+
-+    for (i = 0; i < 256; i++) ff_cropTbl[i + CROP_WIDTH] = i;
-+
-+    for (i = 0; i < CROP_WIDTH; i++)
-+    {
-+        ff_cropTbl[i] = 0;
-+        ff_cropTbl[i + CROP_WIDTH + 256] = 255;
-+    }
-+}
-+
-+void vp8_filter_block2d_first_pass_4
-+(
-+    unsigned char *RESTRICT src_ptr,
-+    unsigned char *RESTRICT dst_ptr,
-+    unsigned int src_pixels_per_line,
-+    unsigned int output_height,
-+    int xoffset,
-+    int pitch
-+)
-+{
-+    unsigned int i;
-+    int Temp1, Temp2, Temp3, Temp4;
-+
-+    unsigned int vector4a = 64;
-+    int vector1b, vector2b, vector3b;
-+    unsigned int tp1, tp2, tn1, tn2;
-+    unsigned int p1, p2, p3;
-+    unsigned int n1, n2, n3;
-+    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
-+
-+    vector3b = sub_pel_filters_inv[xoffset][2];
-+
-+    /* if (xoffset == 0) we don't need any filtering */
-+    if (vector3b == 0)
-+    {
-+        for (i = 0; i < output_height; i++)
-+        {
-+            /* prefetch src_ptr data to cache memory */
-+            prefetch_load(src_ptr + src_pixels_per_line);
-+            dst_ptr[0] = src_ptr[0];
-+            dst_ptr[1] = src_ptr[1];
-+            dst_ptr[2] = src_ptr[2];
-+            dst_ptr[3] = src_ptr[3];
-+
-+            /* next row... */
-+            src_ptr += src_pixels_per_line;
-+            dst_ptr += 4;
-+        }
-+    }
-+    else
-+    {
-+        if (vector3b > 65536)
-+        {
-+            /* 6 tap filter */
-+
-+            vector1b = sub_pel_filters_inv[xoffset][0];
-+            vector2b = sub_pel_filters_inv[xoffset][1];
-+
-+            /* prefetch src_ptr data to cache memory */
-+            prefetch_load(src_ptr + src_pixels_per_line);
-+
-+            for (i = output_height; i--;)
-+            {
-+                /* apply filter with vectors pairs */
-+                __asm__ __volatile__ (
-+                    "ulw              %[tp1],      -2(%[src_ptr])                 \n\t"
-+                    "ulw              %[tp2],      2(%[src_ptr])                  \n\t"
-+
-+                    /* even 1. pixel */
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
-+                    "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
-+                    "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
-+                    "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[p3],          %[vector3b]    \n\t"
-+
-+                    /* even 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "preceu.ph.qbl    %[p1],       %[tp2]                         \n\t"
-+                    "balign           %[tp2],      %[tp1],         3              \n\t"
-+                    "extp             %[Temp1],    $ac3,           9              \n\t"
-+                    "dpa.w.ph         $ac2,        %[p2],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[p3],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[p1],          %[vector3b]    \n\t"
-+
-+                    /* odd 1. pixel */
-+                    "ulw              %[tn2],      3(%[src_ptr])                  \n\t"
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "preceu.ph.qbr    %[n1],       %[tp2]                         \n\t"
-+                    "preceu.ph.qbl    %[n2],       %[tp2]                         \n\t"
-+                    "preceu.ph.qbr    %[n3],       %[tn2]                         \n\t"
-+                    "extp             %[Temp3],    $ac2,           9              \n\t"
-+                    "dpa.w.ph         $ac3,        %[n1],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[n2],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[n3],          %[vector3b]    \n\t"
-+
-+                    /* even 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "preceu.ph.qbl    %[n1],       %[tn2]                         \n\t"
-+                    "extp             %[Temp2],    $ac3,           9              \n\t"
-+                    "dpa.w.ph         $ac2,        %[n2],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[n3],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[n1],          %[vector3b]    \n\t"
-+                    "extp             %[Temp4],    $ac2,           9              \n\t"
-+
-+                    /* clamp */
-+                    "lbux             %[tp1],      %[Temp1](%[cm])                \n\t"
-+                    "lbux             %[tn1],      %[Temp2](%[cm])                \n\t"
-+                    "lbux             %[tp2],      %[Temp3](%[cm])                \n\t"
-+                    "lbux             %[n2],       %[Temp4](%[cm])                \n\t"
-+
-+                    /* store bytes */
-+                    "sb               %[tp1],      0(%[dst_ptr])                  \n\t"
-+                    "sb               %[tn1],      1(%[dst_ptr])                  \n\t"
-+                    "sb               %[tp2],      2(%[dst_ptr])                  \n\t"
-+                    "sb               %[n2],       3(%[dst_ptr])                  \n\t"
-+
-+                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1),
-+                      [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2),
-+                      [p3] "=&r" (p3), [n1] "=&r" (n1), [n2] "=&r" (n2),
-+                      [n3] "=&r" (n3), [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                      [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
-+                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                      [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr),
-+                      [vector3b] "r" (vector3b), [src_ptr] "r" (src_ptr)
-+                );
-+
-+                /* Next row... */
-+                src_ptr += src_pixels_per_line;
-+                dst_ptr += pitch;
-+            }
-+        }
-+        else
-+        {
-+            /* 4 tap filter */
-+
-+            vector1b = sub_pel_filters_inv_tap_4[xoffset][0];
-+            vector2b = sub_pel_filters_inv_tap_4[xoffset][1];
-+
-+            for (i = output_height; i--;)
-+            {
-+                /* apply filter with vectors pairs */
-+                __asm__ __volatile__ (
-+                    "ulw              %[tp1],      -1(%[src_ptr])                 \n\t"
-+                    "ulw              %[tp2],      3(%[src_ptr])                  \n\t"
-+
-+                    /* even 1. pixel */
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
-+                    "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
-+                    "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
-+                    "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
-+
-+                    /* even 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "dpa.w.ph         $ac2,        %[p2],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[p3],          %[vector2b]    \n\t"
-+                    "extp             %[Temp1],    $ac3,           9              \n\t"
-+
-+                    /* odd 1. pixel */
-+                    "srl              %[tn1],      %[tp2],         8              \n\t"
-+                    "balign           %[tp2],      %[tp1],         3              \n\t"
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "preceu.ph.qbr    %[n1],       %[tp2]                         \n\t"
-+                    "preceu.ph.qbl    %[n2],       %[tp2]                         \n\t"
-+                    "preceu.ph.qbr    %[n3],       %[tn1]                         \n\t"
-+                    "extp             %[Temp3],    $ac2,           9              \n\t"
-+                    "dpa.w.ph         $ac3,        %[n1],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[n2],          %[vector2b]    \n\t"
-+
-+                    /* odd 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "extp             %[Temp2],    $ac3,           9              \n\t"
-+                    "dpa.w.ph         $ac2,        %[n2],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[n3],          %[vector2b]    \n\t"
-+                    "extp             %[Temp4],    $ac2,           9              \n\t"
-+
-+                    /* clamp and store results */
-+                    "lbux             %[tp1],      %[Temp1](%[cm])                \n\t"
-+                    "lbux             %[tn1],      %[Temp2](%[cm])                \n\t"
-+                    "lbux             %[tp2],      %[Temp3](%[cm])                \n\t"
-+                    "sb               %[tp1],      0(%[dst_ptr])                  \n\t"
-+                    "sb               %[tn1],      1(%[dst_ptr])                  \n\t"
-+                    "lbux             %[n2],       %[Temp4](%[cm])                \n\t"
-+                    "sb               %[tp2],      2(%[dst_ptr])                  \n\t"
-+                    "sb               %[n2],       3(%[dst_ptr])                  \n\t"
-+
-+                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1),
-+                      [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
-+                      [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
-+                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                      [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
-+                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                      [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr),
-+                      [src_ptr] "r" (src_ptr)
-+                );
-+                /*  Next row... */
-+                src_ptr += src_pixels_per_line;
-+                dst_ptr += pitch;
-+            }
-+        }
-+    }
-+}
-+
-+void vp8_filter_block2d_first_pass_8_all
-+(
-+    unsigned char *RESTRICT src_ptr,
-+    unsigned char *RESTRICT dst_ptr,
-+    unsigned int src_pixels_per_line,
-+    unsigned int output_height,
-+    int xoffset,
-+    int pitch
-+)
-+{
-+    unsigned int i;
-+    int Temp1, Temp2, Temp3, Temp4;
-+
-+    unsigned int vector4a = 64;
-+    unsigned int vector1b, vector2b, vector3b;
-+    unsigned int tp1, tp2, tn1, tn2;
-+    unsigned int p1, p2, p3, p4;
-+    unsigned int n1, n2, n3, n4;
-+
-+    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
-+
-+    /* if (xoffset == 0) we don't need any filtering */
-+    if (xoffset == 0)
-+    {
-+        for (i = 0; i < output_height; i++)
-+        {
-+            /* prefetch src_ptr data to cache memory */
-+            prefetch_load(src_ptr + src_pixels_per_line);
-+
-+            dst_ptr[0] = src_ptr[0];
-+            dst_ptr[1] = src_ptr[1];
-+            dst_ptr[2] = src_ptr[2];
-+            dst_ptr[3] = src_ptr[3];
-+            dst_ptr[4] = src_ptr[4];
-+            dst_ptr[5] = src_ptr[5];
-+            dst_ptr[6] = src_ptr[6];
-+            dst_ptr[7] = src_ptr[7];
-+
-+            /* next row... */
-+            src_ptr += src_pixels_per_line;
-+            dst_ptr += 8;
-+        }
-+    }
-+    else
-+    {
-+        vector3b = sub_pel_filters_inv[xoffset][2];
-+
-+        if (vector3b > 65536)
-+        {
-+            /* 6 tap filter */
-+
-+            vector1b = sub_pel_filters_inv[xoffset][0];
-+            vector2b = sub_pel_filters_inv[xoffset][1];
-+
-+            for (i = output_height; i--;)
-+            {
-+                /* prefetch src_ptr data to cache memory */
-+                prefetch_load(src_ptr + src_pixels_per_line);
-+
-+                /* apply filter with vectors pairs */
-+                __asm__ __volatile__ (
-+                    "ulw              %[tp1],      -2(%[src_ptr])                 \n\t"
-+                    "ulw              %[tp2],      2(%[src_ptr])                  \n\t"
-+
-+                    /* even 1. pixel */
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
-+                    "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
-+                    "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
-+                    "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[p3],          %[vector3b]    \n\t"
-+
-+                    /* even 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "preceu.ph.qbl    %[p1],       %[tp2]                         \n\t"
-+                    "dpa.w.ph         $ac2,        %[p2],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[p3],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[p1],          %[vector3b]    \n\t"
-+
-+                    "balign           %[tp2],      %[tp1],         3              \n\t"
-+                    "extp             %[Temp1],    $ac3,           9              \n\t"
-+                    "ulw              %[tn2],      3(%[src_ptr])                  \n\t"
-+
-+                    /* odd 1. pixel */
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "preceu.ph.qbr    %[n1],       %[tp2]                         \n\t"
-+                    "preceu.ph.qbl    %[n2],       %[tp2]                         \n\t"
-+                    "preceu.ph.qbr    %[n3],       %[tn2]                         \n\t"
-+                    "extp             %[Temp3],    $ac2,           9              \n\t"
-+                    "dpa.w.ph         $ac3,        %[n1],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[n2],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[n3],          %[vector3b]    \n\t"
-+
-+                    /* odd 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "preceu.ph.qbl    %[n1],       %[tn2]                         \n\t"
-+                    "dpa.w.ph         $ac2,        %[n2],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[n3],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[n1],          %[vector3b]    \n\t"
-+                    "ulw              %[tp1],      6(%[src_ptr])                  \n\t"
-+                    "extp             %[Temp2],    $ac3,           9              \n\t"
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "preceu.ph.qbr    %[p2],       %[tp1]                         \n\t"
-+                    "extp             %[Temp4],    $ac2,           9              \n\t"
-+
-+                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2),
-+                      [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
-+                      [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
-+                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                      [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
-+                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                      [vector4a] "r" (vector4a), [vector3b] "r" (vector3b),
-+                      [src_ptr] "r" (src_ptr)
-+                );
-+
-+                /* clamp and store results */
-+                dst_ptr[0] = cm[Temp1];
-+                dst_ptr[1] = cm[Temp2];
-+                dst_ptr[2] = cm[Temp3];
-+                dst_ptr[3] = cm[Temp4];
-+
-+                /* next 4 pixels */
-+                __asm__ __volatile__ (
-+                    /* even 3. pixel */
-+                    "dpa.w.ph         $ac3,        %[p3],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[p1],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[p2],          %[vector3b]    \n\t"
-+
-+                    /* even 4. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "preceu.ph.qbl    %[p4],       %[tp1]                         \n\t"
-+                    "dpa.w.ph         $ac2,        %[p1],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[p2],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[p4],          %[vector3b]    \n\t"
-+
-+                    "ulw              %[tn1],      7(%[src_ptr])                  \n\t"
-+                    "extp             %[Temp1],    $ac3,           9              \n\t"
-+
-+                    /* odd 3. pixel */
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "preceu.ph.qbr    %[n2],       %[tn1]                         \n\t"
-+                    "dpa.w.ph         $ac3,        %[n3],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[n1],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[n2],          %[vector3b]    \n\t"
-+                    "extp             %[Temp3],    $ac2,           9              \n\t"
-+
-+                    /* odd 4. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "preceu.ph.qbl    %[n4],       %[tn1]                         \n\t"
-+                    "dpa.w.ph         $ac2,        %[n1],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[n2],          %[vector2b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[n4],          %[vector3b]    \n\t"
-+                    "extp             %[Temp2],    $ac3,           9              \n\t"
-+                    "extp             %[Temp4],    $ac2,           9              \n\t"
-+
-+                    : [tn1] "=&r" (tn1), [n2] "=&r" (n2),
-+                      [p4] "=&r" (p4), [n4] "=&r" (n4),
-+                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                      [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
-+                    : [tp1] "r" (tp1), [vector1b] "r" (vector1b), [p2] "r" (p2),
-+                      [vector2b] "r" (vector2b), [n1] "r" (n1), [p1] "r" (p1),
-+                      [vector4a] "r" (vector4a), [vector3b] "r" (vector3b),
-+                      [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr)
-+                );
-+
-+                /* clamp and store results */
-+                dst_ptr[4] = cm[Temp1];
-+                dst_ptr[5] = cm[Temp2];
-+                dst_ptr[6] = cm[Temp3];
-+                dst_ptr[7] = cm[Temp4];
-+
-+                src_ptr += src_pixels_per_line;
-+                dst_ptr += pitch;
-+            }
-+        }
-+        else
-+        {
-+            /* 4 tap filter */
-+
-+            vector1b = sub_pel_filters_inv_tap_4[xoffset][0];
-+            vector2b = sub_pel_filters_inv_tap_4[xoffset][1];
-+
-+            for (i = output_height; i--;)
-+            {
-+                /* prefetch src_ptr data to cache memory */
-+                prefetch_load(src_ptr + src_pixels_per_line);
-+
-+                /* apply filter with vectors pairs */
-+                __asm__ __volatile__ (
-+                    "ulw              %[tp1],      -1(%[src_ptr])                 \n\t"
-+
-+                    /* even 1. pixel */
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
-+                    "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
-+                    "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
-+
-+                    "ulw              %[tp2],      3(%[src_ptr])                  \n\t"
-+
-+                    /* even 2. pixel  */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
-+                    "preceu.ph.qbl    %[p4],       %[tp2]                         \n\t"
-+                    "dpa.w.ph         $ac2,        %[p2],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[p3],          %[vector2b]    \n\t"
-+                    "extp             %[Temp1],    $ac3,           9              \n\t"
-+
-+                    "balign           %[tp2],      %[tp1],         3              \n\t"
-+
-+                    /* odd 1. pixel */
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "preceu.ph.qbr    %[n1],       %[tp2]                         \n\t"
-+                    "preceu.ph.qbl    %[n2],       %[tp2]                         \n\t"
-+                    "dpa.w.ph         $ac3,        %[n1],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[n2],          %[vector2b]    \n\t"
-+                    "extp             %[Temp3],    $ac2,           9              \n\t"
-+
-+                    "ulw              %[tn2],      4(%[src_ptr])                  \n\t"
-+
-+                    /* odd 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "preceu.ph.qbr    %[n3],       %[tn2]                         \n\t"
-+                    "preceu.ph.qbl    %[n4],       %[tn2]                         \n\t"
-+                    "dpa.w.ph         $ac2,        %[n2],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[n3],          %[vector2b]    \n\t"
-+                    "ulw              %[tp1],      7(%[src_ptr])                  \n\t"
-+                    "extp             %[Temp2],    $ac3,           9              \n\t"
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "extp             %[Temp4],    $ac2,           9              \n\t"
-+
-+                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2),
-+                      [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2),
-+                      [p3] "=&r" (p3), [p4] "=&r" (p4), [n1] "=&r" (n1),
-+                      [n2] "=&r" (n2), [n3] "=&r" (n3), [n4] "=&r" (n4),
-+                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                      [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
-+                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                      [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
-+                );
-+
-+                /* clamp and store results */
-+                dst_ptr[0] = cm[Temp1];
-+                dst_ptr[1] = cm[Temp2];
-+                dst_ptr[2] = cm[Temp3];
-+                dst_ptr[3] = cm[Temp4];
-+
-+                /* next 4 pixels */
-+                __asm__ __volatile__ (
-+                    /* even 3. pixel */
-+                    "dpa.w.ph         $ac3,        %[p3],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[p4],          %[vector2b]    \n\t"
-+
-+                    /* even 4. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "preceu.ph.qbr    %[p2],       %[tp1]                         \n\t"
-+                    "dpa.w.ph         $ac2,        %[p4],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[p2],          %[vector2b]    \n\t"
-+                    "extp             %[Temp1],    $ac3,           9              \n\t"
-+
-+                    /* odd 3. pixel */
-+                    "mtlo             %[vector4a], $ac3                           \n\t"
-+                    "dpa.w.ph         $ac3,        %[n3],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac3,        %[n4],          %[vector2b]    \n\t"
-+                    "ulw              %[tn1],      8(%[src_ptr])                  \n\t"
-+                    "extp             %[Temp3],    $ac2,           9              \n\t"
-+
-+                    /* odd 4. pixel */
-+                    "mtlo             %[vector4a], $ac2                           \n\t"
-+                    "preceu.ph.qbr    %[n2],       %[tn1]                         \n\t"
-+                    "dpa.w.ph         $ac2,        %[n4],          %[vector1b]    \n\t"
-+                    "dpa.w.ph         $ac2,        %[n2],          %[vector2b]    \n\t"
-+                    "extp             %[Temp2],    $ac3,           9              \n\t"
-+                    "extp             %[Temp4],    $ac2,           9              \n\t"
-+
-+                    : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2),
-+                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                      [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
-+                    : [tp1] "r" (tp1), [p3] "r" (p3), [p4] "r" (p4),
-+                      [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                      [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr),
-+                      [n3] "r" (n3), [n4] "r" (n4)
-+                );
-+
-+                /* clamp and store results */
-+                dst_ptr[4] = cm[Temp1];
-+                dst_ptr[5] = cm[Temp2];
-+                dst_ptr[6] = cm[Temp3];
-+                dst_ptr[7] = cm[Temp4];
-+
-+                /* next row... */
-+                src_ptr += src_pixels_per_line;
-+                dst_ptr += pitch;
-+            }
-+        }
-+    }
-+}
-+
-+
-+void vp8_filter_block2d_first_pass16_6tap
-+(
-+    unsigned char *RESTRICT src_ptr,
-+    unsigned char *RESTRICT dst_ptr,
-+    unsigned int src_pixels_per_line,
-+    unsigned int output_height,
-+    int xoffset,
-+    int pitch
-+)
-+{
-+    unsigned int i;
-+    int Temp1, Temp2, Temp3, Temp4;
-+
-+    unsigned int vector4a;
-+    unsigned int vector1b, vector2b, vector3b;
-+    unsigned int tp1, tp2, tn1, tn2;
-+    unsigned int p1, p2, p3, p4;
-+    unsigned int n1, n2, n3, n4;
-+    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
-+
-+    vector1b = sub_pel_filters_inv[xoffset][0];
-+    vector2b = sub_pel_filters_inv[xoffset][1];
-+    vector3b = sub_pel_filters_inv[xoffset][2];
-+    vector4a = 64;
-+
-+    for (i = output_height; i--;)
-+    {
-+        /* prefetch src_ptr data to cache memory */
-+        prefetch_load(src_ptr + src_pixels_per_line);
-+
-+        /* apply filter with vectors pairs */
-+        __asm__ __volatile__ (
-+            "ulw                %[tp1],      -2(%[src_ptr])                 \n\t"
-+            "ulw                %[tp2],      2(%[src_ptr])                  \n\t"
-+
-+            /* even 1. pixel */
-+            "mtlo               %[vector4a], $ac3                           \n\t"
-+            "preceu.ph.qbr      %[p1],       %[tp1]                         \n\t"
-+            "preceu.ph.qbl      %[p2],       %[tp1]                         \n\t"
-+            "preceu.ph.qbr      %[p3],       %[tp2]                         \n\t"
-+            "dpa.w.ph           $ac3,        %[p1],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[p2],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[p3],           %[vector3b]   \n\t"
-+
-+            /* even 2. pixel */
-+            "mtlo               %[vector4a], $ac2                           \n\t"
-+            "preceu.ph.qbl      %[p1],       %[tp2]                         \n\t"
-+            "dpa.w.ph           $ac2,        %[p2],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[p3],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[p1],           %[vector3b]   \n\t"
-+
-+            "balign             %[tp2],      %[tp1],          3             \n\t"
-+            "ulw                %[tn2],      3(%[src_ptr])                  \n\t"
-+            "extp               %[Temp1],    $ac3,            9             \n\t"
-+
-+            /* odd 1. pixel */
-+            "mtlo               %[vector4a], $ac3                           \n\t"
-+            "preceu.ph.qbr      %[n1],       %[tp2]                         \n\t"
-+            "preceu.ph.qbl      %[n2],       %[tp2]                         \n\t"
-+            "preceu.ph.qbr      %[n3],       %[tn2]                         \n\t"
-+            "extp               %[Temp3],    $ac2,            9             \n\t"
-+            "dpa.w.ph           $ac3,        %[n1],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[n2],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[n3],           %[vector3b]   \n\t"
-+
-+            /* odd 2. pixel */
-+            "mtlo               %[vector4a], $ac2                           \n\t"
-+            "preceu.ph.qbl      %[n1],       %[tn2]                         \n\t"
-+            "dpa.w.ph           $ac2,        %[n2],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[n3],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[n1],           %[vector3b]   \n\t"
-+            "ulw                %[tp1],      6(%[src_ptr])                  \n\t"
-+            "extp               %[Temp2],    $ac3,            9             \n\t"
-+            "mtlo               %[vector4a], $ac3                           \n\t"
-+            "preceu.ph.qbr      %[p2],       %[tp1]                         \n\t"
-+            "extp               %[Temp4],    $ac2,            9             \n\t"
-+
-+            : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2),
-+              [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
-+              [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
-+              [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+              [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
-+            : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+              [vector4a] "r" (vector4a), [vector3b] "r" (vector3b),
-+              [src_ptr] "r" (src_ptr)
-+        );
-+
-+        /* clamp and store results */
-+        dst_ptr[0] = cm[Temp1];
-+        dst_ptr[1] = cm[Temp2];
-+        dst_ptr[2] = cm[Temp3];
-+        dst_ptr[3] = cm[Temp4];
-+
-+        /* next 4 pixels */
-+        __asm__ __volatile__ (
-+            /* even 3. pixel */
-+            "dpa.w.ph           $ac3,        %[p3],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[p1],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[p2],           %[vector3b]   \n\t"
-+
-+            /* even 4. pixel */
-+            "mtlo               %[vector4a], $ac2                           \n\t"
-+            "preceu.ph.qbl      %[p4],       %[tp1]                         \n\t"
-+            "dpa.w.ph           $ac2,        %[p1],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[p2],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[p4],           %[vector3b]   \n\t"
-+            "ulw                %[tn1],      7(%[src_ptr])                  \n\t"
-+            "extp               %[Temp1],    $ac3,            9             \n\t"
-+
-+            /* odd 3. pixel */
-+            "mtlo               %[vector4a], $ac3                           \n\t"
-+            "preceu.ph.qbr      %[n2],       %[tn1]                         \n\t"
-+            "dpa.w.ph           $ac3,        %[n3],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[n1],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[n2],           %[vector3b]   \n\t"
-+            "extp               %[Temp3],    $ac2,            9             \n\t"
-+
-+            /* odd 4. pixel */
-+            "mtlo               %[vector4a], $ac2                           \n\t"
-+            "preceu.ph.qbl      %[n4],       %[tn1]                         \n\t"
-+            "dpa.w.ph           $ac2,        %[n1],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[n2],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[n4],           %[vector3b]   \n\t"
-+            "ulw                %[tp2],      10(%[src_ptr])                 \n\t"
-+            "extp               %[Temp2],    $ac3,            9             \n\t"
-+            "mtlo               %[vector4a], $ac3                           \n\t"
-+            "preceu.ph.qbr      %[p1],       %[tp2]                         \n\t"
-+            "extp               %[Temp4],    $ac2,            9             \n\t"
-+
-+            : [tn1] "=&r" (tn1), [tp2] "=&r" (tp2), [n2] "=&r" (n2),
-+              [p4] "=&r" (p4), [n4] "=&r" (n4),
-+              [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+              [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
-+            : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+              [tp1] "r" (tp1), [n1] "r" (n1), [p1] "r" (p1),
-+              [vector4a] "r" (vector4a), [p2] "r" (p2), [vector3b] "r" (vector3b),
-+              [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr)
-+        );
-+
-+        /* clamp and store results */
-+        dst_ptr[4] = cm[Temp1];
-+        dst_ptr[5] = cm[Temp2];
-+        dst_ptr[6] = cm[Temp3];
-+        dst_ptr[7] = cm[Temp4];
-+
-+        /* next 4 pixels */
-+        __asm__ __volatile__ (
-+            /* even 5. pixel */
-+            "dpa.w.ph           $ac3,        %[p2],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[p4],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[p1],           %[vector3b]   \n\t"
-+
-+            /* even 6. pixel */
-+            "mtlo               %[vector4a], $ac2                           \n\t"
-+            "preceu.ph.qbl      %[p3],       %[tp2]                         \n\t"
-+            "dpa.w.ph           $ac2,        %[p4],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[p1],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[p3],           %[vector3b]   \n\t"
-+
-+            "ulw                %[tn1],      11(%[src_ptr])                 \n\t"
-+            "extp               %[Temp1],    $ac3,            9             \n\t"
-+
-+            /* odd 5. pixel */
-+            "mtlo               %[vector4a], $ac3                           \n\t"
-+            "preceu.ph.qbr      %[n1],       %[tn1]                         \n\t"
-+            "dpa.w.ph           $ac3,        %[n2],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[n4],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[n1],           %[vector3b]   \n\t"
-+            "extp               %[Temp3],    $ac2,            9             \n\t"
-+
-+            /* odd 6. pixel */
-+            "mtlo               %[vector4a], $ac2                           \n\t"
-+            "preceu.ph.qbl      %[n3],       %[tn1]                         \n\t"
-+            "dpa.w.ph           $ac2,        %[n4],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[n1],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[n3],           %[vector3b]   \n\t"
-+            "ulw                %[tp1],      14(%[src_ptr])                 \n\t"
-+            "extp               %[Temp2],    $ac3,            9             \n\t"
-+            "mtlo               %[vector4a], $ac3                           \n\t"
-+            "preceu.ph.qbr      %[p4],       %[tp1]                         \n\t"
-+            "extp               %[Temp4],    $ac2,            9             \n\t"
-+
-+            : [tn1] "=&r" (tn1), [tp1] "=&r" (tp1),
-+              [n1] "=&r" (n1), [p3] "=&r" (p3), [n3] "=&r" (n3),
-+              [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+              [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
-+            : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+              [tp2] "r" (tp2), [p2] "r" (p2), [n2] "r" (n2),
-+              [p4] "r" (p4), [n4] "r" (n4), [p1] "r" (p1), [src_ptr] "r" (src_ptr),
-+              [vector4a] "r" (vector4a), [vector3b] "r" (vector3b)
-+        );
-+
-+        /* clamp and store results */
-+        dst_ptr[8] = cm[Temp1];
-+        dst_ptr[9] = cm[Temp2];
-+        dst_ptr[10] = cm[Temp3];
-+        dst_ptr[11] = cm[Temp4];
-+
-+        /* next 4 pixels */
-+        __asm__ __volatile__ (
-+            /* even 7. pixel */
-+            "dpa.w.ph           $ac3,        %[p1],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[p3],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[p4],           %[vector3b]   \n\t"
-+
-+            /* even 8. pixel */
-+            "mtlo               %[vector4a], $ac2                           \n\t"
-+            "preceu.ph.qbl      %[p2],       %[tp1]                         \n\t"
-+            "dpa.w.ph           $ac2,        %[p3],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[p4],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[p2],           %[vector3b]   \n\t"
-+            "ulw                %[tn1],      15(%[src_ptr])                 \n\t"
-+            "extp               %[Temp1],    $ac3,            9             \n\t"
-+
-+            /* odd 7. pixel */
-+            "mtlo               %[vector4a], $ac3                           \n\t"
-+            "preceu.ph.qbr      %[n4],       %[tn1]                         \n\t"
-+            "dpa.w.ph           $ac3,        %[n1],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[n3],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac3,        %[n4],           %[vector3b]   \n\t"
-+            "extp               %[Temp3],    $ac2,            9             \n\t"
-+
-+            /* odd 8. pixel */
-+            "mtlo               %[vector4a], $ac2                           \n\t"
-+            "preceu.ph.qbl      %[n2],       %[tn1]                         \n\t"
-+            "dpa.w.ph           $ac2,        %[n3],           %[vector1b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[n4],           %[vector2b]   \n\t"
-+            "dpa.w.ph           $ac2,        %[n2],           %[vector3b]   \n\t"
-+            "extp               %[Temp2],    $ac3,            9             \n\t"
-+            "extp               %[Temp4],    $ac2,            9             \n\t"
-+
-+            /* clamp and store results */
-+            "lbux               %[tp1],      %[Temp1](%[cm])                \n\t"
-+            "lbux               %[tn1],      %[Temp2](%[cm])                \n\t"
-+            "lbux               %[p2],       %[Temp3](%[cm])                \n\t"
-+            "sb                 %[tp1],      12(%[dst_ptr])                 \n\t"
-+            "sb                 %[tn1],      13(%[dst_ptr])                 \n\t"
-+            "lbux               %[n2],       %[Temp4](%[cm])                \n\t"
-+            "sb                 %[p2],       14(%[dst_ptr])                 \n\t"
-+            "sb                 %[n2],       15(%[dst_ptr])                 \n\t"
-+
-+            : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), [n4] "=&r" (n4),
-+              [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+              [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
-+            : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+              [tp1] "r" (tp1), [p4] "r" (p4), [n1] "r" (n1), [p1] "r" (p1),
-+              [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), [p3] "r" (p3),
-+              [n3] "r" (n3), [src_ptr] "r" (src_ptr),
-+              [cm] "r" (cm), [dst_ptr] "r" (dst_ptr)
-+        );
-+
-+        src_ptr += src_pixels_per_line;
-+        dst_ptr += pitch;
-+    }
-+}
-+
-+
-+void vp8_filter_block2d_first_pass16_0
-+(
-+    unsigned char *RESTRICT src_ptr,
-+    unsigned char *RESTRICT output_ptr,
-+    unsigned int src_pixels_per_line
-+)
-+{
-+    int Temp1, Temp2, Temp3, Temp4;
-+    int i;
-+
-+    /* prefetch src_ptr data to cache memory */
-+    prefetch_store(output_ptr + 32);
-+
-+    /* copy memory from src buffer to dst buffer */
-+    for (i = 0; i < 7; i++)
-+    {
-+        __asm__ __volatile__ (
-+            "ulw    %[Temp1],   0(%[src_ptr])                               \n\t"
-+            "ulw    %[Temp2],   4(%[src_ptr])                               \n\t"
-+            "ulw    %[Temp3],   8(%[src_ptr])                               \n\t"
-+            "ulw    %[Temp4],   12(%[src_ptr])                              \n\t"
-+            "sw     %[Temp1],   0(%[output_ptr])                            \n\t"
-+            "sw     %[Temp2],   4(%[output_ptr])                            \n\t"
-+            "sw     %[Temp3],   8(%[output_ptr])                            \n\t"
-+            "sw     %[Temp4],   12(%[output_ptr])                           \n\t"
-+            "addu   %[src_ptr], %[src_ptr],        %[src_pixels_per_line]   \n\t"
-+
-+            : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
-+              [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr)
-+            : [src_pixels_per_line] "r" (src_pixels_per_line),
-+              [output_ptr] "r" (output_ptr)
-+        );
-+
-+        __asm__ __volatile__ (
-+            "ulw    %[Temp1],   0(%[src_ptr])                               \n\t"
-+            "ulw    %[Temp2],   4(%[src_ptr])                               \n\t"
-+            "ulw    %[Temp3],   8(%[src_ptr])                               \n\t"
-+            "ulw    %[Temp4],   12(%[src_ptr])                              \n\t"
-+            "sw     %[Temp1],   16(%[output_ptr])                           \n\t"
-+            "sw     %[Temp2],   20(%[output_ptr])                           \n\t"
-+            "sw     %[Temp3],   24(%[output_ptr])                           \n\t"
-+            "sw     %[Temp4],   28(%[output_ptr])                           \n\t"
-+            "addu   %[src_ptr], %[src_ptr],        %[src_pixels_per_line]   \n\t"
-+
-+            : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
-+              [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr)
-+            : [src_pixels_per_line] "r" (src_pixels_per_line),
-+              [output_ptr] "r" (output_ptr)
-+        );
-+
-+        __asm__ __volatile__ (
-+            "ulw    %[Temp1],   0(%[src_ptr])                               \n\t"
-+            "ulw    %[Temp2],   4(%[src_ptr])                               \n\t"
-+            "ulw    %[Temp3],   8(%[src_ptr])                               \n\t"
-+            "ulw    %[Temp4],   12(%[src_ptr])                              \n\t"
-+            "sw     %[Temp1],   32(%[output_ptr])                           \n\t"
-+            "sw     %[Temp2],   36(%[output_ptr])                           \n\t"
-+            "sw     %[Temp3],   40(%[output_ptr])                           \n\t"
-+            "sw     %[Temp4],   44(%[output_ptr])                           \n\t"
-+            "addu   %[src_ptr], %[src_ptr],        %[src_pixels_per_line]   \n\t"
-+
-+            : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
-+              [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr)
-+            : [src_pixels_per_line] "r" (src_pixels_per_line),
-+              [output_ptr] "r" (output_ptr)
-+        );
-+
-+        output_ptr += 48;
-+    }
-+}
-+
-+
-+void vp8_filter_block2d_first_pass16_4tap
-+(
-+    unsigned char *RESTRICT src_ptr,
-+    unsigned char *RESTRICT output_ptr,
-+    unsigned int src_pixels_per_line,
-+    unsigned int output_width,
-+    unsigned int output_height,
-+    int xoffset,
-+    int yoffset,
-+    unsigned char *RESTRICT dst_ptr,
-+    int pitch
-+)
-+{
-+    unsigned int i, j;
-+    int Temp1, Temp2, Temp3, Temp4;
-+
-+    unsigned int vector4a;
-+    int vector1b, vector2b;
-+    unsigned int tp1, tp2, tp3, tn1;
-+    unsigned int p1, p2, p3;
-+    unsigned int n1, n2, n3;
-+    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
-+
-+    vector4a = 64;
-+
-+    vector1b = sub_pel_filters_inv_tap_4[xoffset][0];
-+    vector2b = sub_pel_filters_inv_tap_4[xoffset][1];
-+
-+    /* if (yoffset == 0) don't need temp buffer, data will be stored in dst_ptr */
-+    if (yoffset == 0)
-+    {
-+        output_height -= 5;
-+        src_ptr += (src_pixels_per_line + src_pixels_per_line);
-+
-+        for (i = output_height; i--;)
-+        {
-+            __asm__ __volatile__ (
-+                "ulw     %[tp3],   -1(%[src_ptr])               \n\t"
-+                : [tp3] "=&r" (tp3)
-+                : [src_ptr] "r" (src_ptr)
-+            );
-+
-+            /* processing 4 adjacent pixels */
-+            for (j = 0; j < 16; j += 4)
-+            {
-+                /* apply filter with vectors pairs */
-+                __asm__ __volatile__ (
-+                    "ulw              %[tp2],      3(%[src_ptr])                    \n\t"
-+                    "move             %[tp1],      %[tp3]                           \n\t"
-+
-+                    /* even 1. pixel */
-+                    "mtlo             %[vector4a], $ac3                             \n\t"
-+                    "mthi             $0,          $ac3                             \n\t"
-+                    "move             %[tp3],      %[tp2]                           \n\t"
-+                    "preceu.ph.qbr    %[p1],       %[tp1]                           \n\t"
-+                    "preceu.ph.qbl    %[p2],       %[tp1]                           \n\t"
-+                    "preceu.ph.qbr    %[p3],       %[tp2]                           \n\t"
-+                    "dpa.w.ph         $ac3,        %[p1],           %[vector1b]     \n\t"
-+                    "dpa.w.ph         $ac3,        %[p2],           %[vector2b]     \n\t"
-+
-+                    /* even 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                             \n\t"
-+                    "mthi             $0,          $ac2                             \n\t"
-+                    "dpa.w.ph         $ac2,        %[p2],           %[vector1b]     \n\t"
-+                    "dpa.w.ph         $ac2,        %[p3],           %[vector2b]     \n\t"
-+                    "extr.w           %[Temp1],    $ac3,            7               \n\t"
-+
-+                    /* odd 1. pixel */
-+                    "ulw              %[tn1],      4(%[src_ptr])                    \n\t"
-+                    "balign           %[tp2],      %[tp1],          3               \n\t"
-+                    "mtlo             %[vector4a], $ac3                             \n\t"
-+                    "mthi             $0,          $ac3                             \n\t"
-+                    "preceu.ph.qbr    %[n1],       %[tp2]                           \n\t"
-+                    "preceu.ph.qbl    %[n2],       %[tp2]                           \n\t"
-+                    "preceu.ph.qbr    %[n3],       %[tn1]                           \n\t"
-+                    "extr.w           %[Temp3],    $ac2,            7               \n\t"
-+                    "dpa.w.ph         $ac3,        %[n1],           %[vector1b]     \n\t"
-+                    "dpa.w.ph         $ac3,        %[n2],           %[vector2b]     \n\t"
-+
-+                    /* odd 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                             \n\t"
-+                    "mthi             $0,          $ac2                             \n\t"
-+                    "extr.w           %[Temp2],    $ac3,            7               \n\t"
-+                    "dpa.w.ph         $ac2,        %[n2],           %[vector1b]     \n\t"
-+                    "dpa.w.ph         $ac2,        %[n3],           %[vector2b]     \n\t"
-+                    "extr.w           %[Temp4],    $ac2,            7               \n\t"
-+
-+                    /* clamp and store results */
-+                    "lbux             %[tp1],      %[Temp1](%[cm])                  \n\t"
-+                    "lbux             %[tn1],      %[Temp2](%[cm])                  \n\t"
-+                    "lbux             %[tp2],      %[Temp3](%[cm])                  \n\t"
-+                    "sb               %[tp1],      0(%[dst_ptr])                    \n\t"
-+                    "sb               %[tn1],      1(%[dst_ptr])                    \n\t"
-+                    "lbux             %[n2],       %[Temp4](%[cm])                  \n\t"
-+                    "sb               %[tp2],      2(%[dst_ptr])                    \n\t"
-+                    "sb               %[n2],       3(%[dst_ptr])                    \n\t"
-+
-+                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tp3] "=&r" (tp3),
-+                      [tn1] "=&r" (tn1), [p1] "=&r" (p1), [p2] "=&r" (p2),
-+                      [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
-+                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [p3] "=&r" (p3),
-+                      [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
-+                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                      [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr),
-+                      [src_ptr] "r" (src_ptr)
-+                );
-+
-+                src_ptr += 4;
-+            }
-+
-+            /* Next row... */
-+            src_ptr += src_pixels_per_line - 16;
-+            dst_ptr += pitch;
-+        }
-+    }
-+    else
-+    {
-+        for (i = output_height; i--;)
-+        {
-+            /* processing 4 adjacent pixels */
-+            for (j = 0; j < 16; j += 4)
-+            {
-+                /* apply filter with vectors pairs */
-+                __asm__ __volatile__ (
-+                    "ulw              %[tp1],      -1(%[src_ptr])                   \n\t"
-+                    "ulw              %[tp2],      3(%[src_ptr])                    \n\t"
-+
-+                    /* even 1. pixel */
-+                    "mtlo             %[vector4a], $ac3                             \n\t"
-+                    "mthi             $0,          $ac3                             \n\t"
-+                    "preceu.ph.qbr    %[p1],       %[tp1]                           \n\t"
-+                    "preceu.ph.qbl    %[p2],       %[tp1]                           \n\t"
-+                    "preceu.ph.qbr    %[p3],       %[tp2]                           \n\t"
-+                    "dpa.w.ph         $ac3,        %[p1],           %[vector1b]     \n\t"
-+                    "dpa.w.ph         $ac3,        %[p2],           %[vector2b]     \n\t"
-+
-+                    /* even 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                             \n\t"
-+                    "mthi             $0,          $ac2                             \n\t"
-+                    "dpa.w.ph         $ac2,        %[p2],           %[vector1b]     \n\t"
-+                    "dpa.w.ph         $ac2,        %[p3],           %[vector2b]     \n\t"
-+                    "extr.w           %[Temp1],    $ac3,            7               \n\t"
-+
-+                    /* odd 1. pixel */
-+                    "ulw              %[tn1],      4(%[src_ptr])                    \n\t"
-+                    "balign           %[tp2],      %[tp1],          3               \n\t"
-+                    "mtlo             %[vector4a], $ac3                             \n\t"
-+                    "mthi             $0,          $ac3                             \n\t"
-+                    "preceu.ph.qbr    %[n1],       %[tp2]                           \n\t"
-+                    "preceu.ph.qbl    %[n2],       %[tp2]                           \n\t"
-+                    "preceu.ph.qbr    %[n3],       %[tn1]                           \n\t"
-+                    "extr.w           %[Temp3],    $ac2,            7               \n\t"
-+                    "dpa.w.ph         $ac3,        %[n1],           %[vector1b]     \n\t"
-+                    "dpa.w.ph         $ac3,        %[n2],           %[vector2b]     \n\t"
-+
-+                    /* odd 2. pixel */
-+                    "mtlo             %[vector4a], $ac2                             \n\t"
-+                    "mthi             $0,          $ac2                             \n\t"
-+                    "extr.w           %[Temp2],    $ac3,            7               \n\t"
-+                    "dpa.w.ph         $ac2,        %[n2],           %[vector1b]     \n\t"
-+                    "dpa.w.ph         $ac2,        %[n3],           %[vector2b]     \n\t"
-+                    "extr.w           %[Temp4],    $ac2,            7               \n\t"
-+
-+                    /* clamp and store results */
-+                    "lbux             %[tp1],      %[Temp1](%[cm])                  \n\t"
-+                    "lbux             %[tn1],      %[Temp2](%[cm])                  \n\t"
-+                    "lbux             %[tp2],      %[Temp3](%[cm])                  \n\t"
-+                    "sb               %[tp1],      0(%[output_ptr])                 \n\t"
-+                    "sb               %[tn1],      1(%[output_ptr])                 \n\t"
-+                    "lbux             %[n2],       %[Temp4](%[cm])                  \n\t"
-+                    "sb               %[tp2],      2(%[output_ptr])                 \n\t"
-+                    "sb               %[n2],       3(%[output_ptr])                 \n\t"
-+
-+                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1),
-+                      [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
-+                      [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
-+                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                      [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
-+                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                      [vector4a] "r" (vector4a), [cm] "r" (cm),
-+                      [output_ptr] "r" (output_ptr), [src_ptr] "r" (src_ptr)
-+                );
-+
-+                src_ptr += 4;
-+            }
-+
-+            /* next row... */
-+            src_ptr += src_pixels_per_line;
-+            output_ptr += output_width;
-+        }
-+    }
-+}
-+
-+
-+void vp8_filter_block2d_second_pass4
-+(
-+    unsigned char *RESTRICT src_ptr,
-+    unsigned char *RESTRICT output_ptr,
-+    int output_pitch,
-+    int yoffset
-+)
-+{
-+    unsigned int i;
-+
-+    int Temp1, Temp2, Temp3, Temp4;
-+    unsigned int vector1b, vector2b, vector3b, vector4a;
-+
-+    unsigned char src_ptr_l2;
-+    unsigned char src_ptr_l1;
-+    unsigned char src_ptr_0;
-+    unsigned char src_ptr_r1;
-+    unsigned char src_ptr_r2;
-+    unsigned char src_ptr_r3;
-+
-+    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
-+
-+    vector4a = 64;
-+
-+    /* load filter coefficients */
-+    vector1b = sub_pel_filterss[yoffset][0];
-+    vector2b = sub_pel_filterss[yoffset][2];
-+    vector3b = sub_pel_filterss[yoffset][1];
-+
-+    if (vector1b)
-+    {
-+        /* 6 tap filter */
-+
-+        for (i = 2; i--;)
-+        {
-+            /* prefetch src_ptr data to cache memory */
-+            prefetch_load(src_ptr);
-+
-+            /* do not allow compiler to reorder instructions */
-+            __asm__ __volatile__ (
-+                ".set noreorder                                                 \n\t"
-+                :
-+                :
-+            );
-+
-+            /* apply filter with vectors pairs */
-+            __asm__ __volatile__ (
-+                "lbu            %[src_ptr_l2],  -8(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_l1],  -4(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  4(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  8(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r3],  12(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -7(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_l1],  -3(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  5(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  9(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r3],  13(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "extp           %[Temp1],       $ac2,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -6(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_l1],  -2(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   2(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  6(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  10(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  14(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "extp           %[Temp2],       $ac3,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -5(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_l1],  -1(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   3(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  7(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  11(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  15(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "extp           %[Temp3],       $ac0,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp4],       $ac1,           9               \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                  [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
-+                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
-+                  [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
-+                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
-+                  [src_ptr] "r" (src_ptr)
-+            );
-+
-+            /* clamp and store results */
-+            output_ptr[0] = cm[Temp1];
-+            output_ptr[1] = cm[Temp2];
-+            output_ptr[2] = cm[Temp3];
-+            output_ptr[3] = cm[Temp4];
-+
-+            output_ptr += output_pitch;
-+
-+            /* apply filter with vectors pairs */
-+            __asm__ __volatile__ (
-+                "lbu            %[src_ptr_l2],  -4(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_l1],  0(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_0],   4(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  8(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  12(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  16(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -3(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_l1],  1(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_0],   5(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  9(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  13(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  17(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "extp           %[Temp1],       $ac2,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -2(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_l1],  2(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_0],   6(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  10(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  14(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  18(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "extp           %[Temp2],       $ac3,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -1(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_l1],  3(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_0],   7(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  11(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  15(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  19(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "extp           %[Temp3],       $ac0,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp4],       $ac1,           9               \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                  [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
-+                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
-+                  [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
-+                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
-+                  [src_ptr] "r" (src_ptr)
-+            );
-+
-+            /* clamp and store results */
-+            output_ptr[0] = cm[Temp1];
-+            output_ptr[1] = cm[Temp2];
-+            output_ptr[2] = cm[Temp3];
-+            output_ptr[3] = cm[Temp4];
-+
-+            src_ptr += 8;
-+            output_ptr += output_pitch;
-+        }
-+    }
-+    else
-+    {
-+        /* 4 tap filter */
-+
-+        /* prefetch src_ptr data to cache memory */
-+        prefetch_load(src_ptr);
-+
-+        for (i = 2; i--;)
-+        {
-+            /* do not allow compiler to reorder instructions */
-+            __asm__ __volatile__ (
-+                ".set noreorder                                                 \n\t"
-+                :
-+                :
-+            );
-+
-+            /* apply filter with vectors pairs */
-+            __asm__ __volatile__ (
-+                "lbu            %[src_ptr_l1],  -4(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  4(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  8(%[src_ptr])                   \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l1],  -3(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  5(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  9(%[src_ptr])                   \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "extp           %[Temp1],       $ac2,           9               \n\t"
-+
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l1],  -2(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   2(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  6(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  10(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "extp           %[Temp2],       $ac3,           9               \n\t"
-+
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l1],  -1(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   3(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  7(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  11(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "extp           %[Temp3],       $ac0,           9               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp4],       $ac1,           9               \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                  [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
-+                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
-+                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                  [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
-+            );
-+
-+            /* clamp and store results */
-+            output_ptr[0] = cm[Temp1];
-+            output_ptr[1] = cm[Temp2];
-+            output_ptr[2] = cm[Temp3];
-+            output_ptr[3] = cm[Temp4];
-+
-+            output_ptr += output_pitch;
-+
-+            /* apply filter with vectors pairs */
-+            __asm__ __volatile__ (
-+                "lbu            %[src_ptr_l1],  0(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_0],   4(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  8(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  12(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l1],  1(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_0],   5(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  9(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  13(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "extp           %[Temp1],       $ac2,           9               \n\t"
-+
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l1],  2(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_0],   6(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  10(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  14(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "extp           %[Temp2],       $ac3,           9               \n\t"
-+
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l1],  3(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_0],   7(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  11(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  15(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "extp           %[Temp3],       $ac0,           9               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp4],       $ac1,           9               \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                  [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
-+                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
-+                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                  [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
-+            );
-+
-+            /* clamp and store results */
-+            output_ptr[0] = cm[Temp1];
-+            output_ptr[1] = cm[Temp2];
-+            output_ptr[2] = cm[Temp3];
-+            output_ptr[3] = cm[Temp4];
-+
-+            src_ptr += 8;
-+            output_ptr += output_pitch;
-+        }
-+    }
-+}
-+
-+
-+void vp8_filter_block2d_second_pass_8
-+(
-+    unsigned char *RESTRICT src_ptr,
-+    unsigned char *RESTRICT output_ptr,
-+    int output_pitch,
-+    unsigned int output_height,
-+    unsigned int output_width,
-+    unsigned int yoffset
-+)
-+{
-+    unsigned int i;
-+
-+    int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8;
-+    unsigned int vector1b, vector2b, vector3b, vector4a;
-+
-+    unsigned char src_ptr_l2;
-+    unsigned char src_ptr_l1;
-+    unsigned char src_ptr_0;
-+    unsigned char src_ptr_r1;
-+    unsigned char src_ptr_r2;
-+    unsigned char src_ptr_r3;
-+    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
-+
-+    vector4a = 64;
-+
-+    vector1b = sub_pel_filterss[yoffset][0];
-+    vector2b = sub_pel_filterss[yoffset][2];
-+    vector3b = sub_pel_filterss[yoffset][1];
-+
-+    if (vector1b)
-+    {
-+        /* 6 tap filter */
-+
-+        /* prefetch src_ptr data to cache memory */
-+        prefetch_load(src_ptr);
-+
-+        for (i = output_height; i--;)
-+        {
-+            /* apply filter with vectors pairs */
-+            __asm__ __volatile__ (
-+                "lbu            %[src_ptr_l2],  -16(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -8(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  8(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  16(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  24(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -15(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -7(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  9(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  17(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  25(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "extp           %[Temp1],       $ac2,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -14(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -6(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   2(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  10(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  18(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  26(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "extp           %[Temp2],       $ac3,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -13(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -5(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   3(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  11(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  19(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  27(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "extp           %[Temp3],       $ac0,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
-+                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
-+                  [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
-+                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
-+                  [src_ptr] "r" (src_ptr)
-+            );
-+
-+            /* apply filter with vectors pairs */
-+            __asm__ __volatile__ (
-+                "lbu            %[src_ptr_l2],  -12(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -4(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   4(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  12(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  20(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  28(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp4],       $ac1,           9               \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -11(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -3(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   5(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  13(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  21(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  29(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "extp           %[Temp5],       $ac2,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -10(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -2(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   6(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  14(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  22(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  30(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "extp           %[Temp6],       $ac3,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -9(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_l1],  -1(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   7(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  15(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  23(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  31(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "extp           %[Temp7],       $ac0,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp8],       $ac1,           9               \n\t"
-+
-+                : [Temp4] "=&r" (Temp4), [Temp5] "=&r" (Temp5),
-+                  [Temp6] "=&r" (Temp6), [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
-+                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
-+                  [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3)
-+                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
-+                  [src_ptr] "r" (src_ptr)
-+            );
-+
-+            /* clamp and store results */
-+            output_ptr[0] = cm[Temp1];
-+            output_ptr[1] = cm[Temp2];
-+            output_ptr[2] = cm[Temp3];
-+            output_ptr[3] = cm[Temp4];
-+            output_ptr[4] = cm[Temp5];
-+            output_ptr[5] = cm[Temp6];
-+            output_ptr[6] = cm[Temp7];
-+            output_ptr[7] = cm[Temp8];
-+
-+            src_ptr += 8;
-+            output_ptr += output_pitch;
-+        }
-+    }
-+    else
-+    {
-+        /* 4 tap filter */
-+
-+        /* prefetch src_ptr data to cache memory */
-+        prefetch_load(src_ptr);
-+
-+        for (i = output_height; i--;)
-+        {
-+            __asm__ __volatile__ (
-+                "lbu            %[src_ptr_l1],  -8(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  8(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  16(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                : [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
-+                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                  [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "lbu            %[src_ptr_l1],  -7(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  9(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r2],  17(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp1],       $ac2,           9               \n\t"
-+
-+                : [Temp1] "=r" (Temp1),
-+                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
-+                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                  [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
-+            );
-+
-+            src_ptr_l1 = src_ptr[-6];
-+            src_ptr_0  = src_ptr[2];
-+            src_ptr_r1 = src_ptr[10];
-+            src_ptr_r2 = src_ptr[18];
-+
-+            __asm__ __volatile__ (
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp2],       $ac3,           9               \n\t"
-+
-+                : [Temp2] "=r" (Temp2)
-+                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
-+                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
-+                  [vector4a] "r" (vector4a)
-+            );
-+
-+            src_ptr_l1 = src_ptr[-5];
-+            src_ptr_0  = src_ptr[3];
-+            src_ptr_r1 = src_ptr[11];
-+            src_ptr_r2 = src_ptr[19];
-+
-+            __asm__ __volatile__ (
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp3],       $ac0,           9               \n\t"
-+
-+                : [Temp3] "=r" (Temp3)
-+                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
-+                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
-+                  [vector4a] "r" (vector4a)
-+            );
-+
-+            src_ptr_l1 = src_ptr[-4];
-+            src_ptr_0  = src_ptr[4];
-+            src_ptr_r1 = src_ptr[12];
-+            src_ptr_r2 = src_ptr[20];
-+
-+            __asm__ __volatile__ (
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp4],       $ac1,           9               \n\t"
-+
-+                : [Temp4] "=r" (Temp4)
-+                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
-+                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
-+                  [vector4a] "r" (vector4a)
-+            );
-+
-+            src_ptr_l1 = src_ptr[-3];
-+            src_ptr_0  = src_ptr[5];
-+            src_ptr_r1 = src_ptr[13];
-+            src_ptr_r2 = src_ptr[21];
-+
-+            __asm__ __volatile__ (
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp5],       $ac2,           9               \n\t"
-+
-+                : [Temp5] "=&r" (Temp5)
-+                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
-+                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
-+                  [vector4a] "r" (vector4a)
-+            );
-+
-+            src_ptr_l1 = src_ptr[-2];
-+            src_ptr_0  = src_ptr[6];
-+            src_ptr_r1 = src_ptr[14];
-+            src_ptr_r2 = src_ptr[22];
-+
-+            __asm__ __volatile__ (
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp6],       $ac3,           9               \n\t"
-+
-+                : [Temp6] "=r" (Temp6)
-+                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
-+                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
-+                  [vector4a] "r" (vector4a)
-+            );
-+
-+            src_ptr_l1 = src_ptr[-1];
-+            src_ptr_0  = src_ptr[7];
-+            src_ptr_r1 = src_ptr[15];
-+            src_ptr_r2 = src_ptr[23];
-+
-+            __asm__ __volatile__ (
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp7],       $ac0,           9               \n\t"
-+                "extp           %[Temp8],       $ac1,           9               \n\t"
-+
-+                : [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8)
-+                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
-+                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
-+                  [vector4a] "r" (vector4a)
-+            );
-+
-+            /* clamp and store results */
-+            output_ptr[0] = cm[Temp1];
-+            output_ptr[1] = cm[Temp2];
-+            output_ptr[2] = cm[Temp3];
-+            output_ptr[3] = cm[Temp4];
-+            output_ptr[4] = cm[Temp5];
-+            output_ptr[5] = cm[Temp6];
-+            output_ptr[6] = cm[Temp7];
-+            output_ptr[7] = cm[Temp8];
-+
-+            src_ptr += 8;
-+            output_ptr += output_pitch;
-+        }
-+    }
-+}
-+
-+
-+void vp8_filter_block2d_second_pass161
-+(
-+    unsigned char *RESTRICT src_ptr,
-+    unsigned char *RESTRICT output_ptr,
-+    int output_pitch,
-+    const unsigned short *vp8_filter
-+)
-+{
-+    unsigned int i, j;
-+
-+    int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8;
-+    unsigned int vector4a;
-+    unsigned int vector1b, vector2b, vector3b;
-+
-+    unsigned char src_ptr_l2;
-+    unsigned char src_ptr_l1;
-+    unsigned char src_ptr_0;
-+    unsigned char src_ptr_r1;
-+    unsigned char src_ptr_r2;
-+    unsigned char src_ptr_r3;
-+    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
-+
-+    vector4a = 64;
-+
-+    vector1b = vp8_filter[0];
-+    vector2b = vp8_filter[2];
-+    vector3b = vp8_filter[1];
-+
-+    if (vector1b == 0)
-+    {
-+        /* 4 tap filter */
-+
-+        /* prefetch src_ptr data to cache memory */
-+        prefetch_load(src_ptr + 16);
-+
-+        for (i = 16; i--;)
-+        {
-+            /* unrolling for loop */
-+            for (j = 0; j < 16; j += 8)
-+            {
-+                /* apply filter with vectors pairs */
-+                __asm__ __volatile__ (
-+                    "lbu            %[src_ptr_l1],  -16(%[src_ptr])                 \n\t"
-+                    "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
-+                    "lbu            %[src_ptr_r1],  16(%[src_ptr])                  \n\t"
-+                    "lbu            %[src_ptr_r2],  32(%[src_ptr])                  \n\t"
-+                    "mtlo           %[vector4a],    $ac2                            \n\t"
-+                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                    "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                    "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                    "lbu            %[src_ptr_l1],  -15(%[src_ptr])                 \n\t"
-+                    "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
-+                    "lbu            %[src_ptr_r1],  17(%[src_ptr])                  \n\t"
-+                    "lbu            %[src_ptr_r2],  33(%[src_ptr])                  \n\t"
-+                    "mtlo           %[vector4a],    $ac3                            \n\t"
-+                    "extp           %[Temp1],       $ac2,           9               \n\t"
-+
-+                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                    "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                    "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                    "lbu            %[src_ptr_l1],  -14(%[src_ptr])                 \n\t"
-+                    "lbu            %[src_ptr_0],   2(%[src_ptr])                   \n\t"
-+                    "lbu            %[src_ptr_r1],  18(%[src_ptr])                  \n\t"
-+                    "lbu            %[src_ptr_r2],  34(%[src_ptr])                  \n\t"
-+                    "mtlo           %[vector4a],    $ac1                            \n\t"
-+                    "extp           %[Temp2],       $ac3,           9               \n\t"
-+
-+                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                    "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                    "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                    "lbu            %[src_ptr_l1],  -13(%[src_ptr])                 \n\t"
-+                    "lbu            %[src_ptr_0],   3(%[src_ptr])                   \n\t"
-+                    "lbu            %[src_ptr_r1],  19(%[src_ptr])                  \n\t"
-+                    "lbu            %[src_ptr_r2],  35(%[src_ptr])                  \n\t"
-+                    "mtlo           %[vector4a],    $ac3                            \n\t"
-+                    "extp           %[Temp3],       $ac1,           9               \n\t"
-+
-+                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                    "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                    "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                    "lbu            %[src_ptr_l1],  -12(%[src_ptr])                 \n\t"
-+                    "lbu            %[src_ptr_0],   4(%[src_ptr])                   \n\t"
-+                    "lbu            %[src_ptr_r1],  20(%[src_ptr])                  \n\t"
-+                    "lbu            %[src_ptr_r2],  36(%[src_ptr])                  \n\t"
-+                    "mtlo           %[vector4a],    $ac2                            \n\t"
-+                    "extp           %[Temp4],       $ac3,           9               \n\t"
-+
-+                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                    "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                    "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                    "lbu            %[src_ptr_l1],  -11(%[src_ptr])                 \n\t"
-+                    "lbu            %[src_ptr_0],   5(%[src_ptr])                   \n\t"
-+                    "lbu            %[src_ptr_r1],  21(%[src_ptr])                  \n\t"
-+                    "lbu            %[src_ptr_r2],  37(%[src_ptr])                  \n\t"
-+                    "mtlo           %[vector4a],    $ac3                            \n\t"
-+                    "extp           %[Temp5],       $ac2,           9               \n\t"
-+
-+                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                    "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                    "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                    "lbu            %[src_ptr_l1],  -10(%[src_ptr])                 \n\t"
-+                    "lbu            %[src_ptr_0],   6(%[src_ptr])                   \n\t"
-+                    "lbu            %[src_ptr_r1],  22(%[src_ptr])                  \n\t"
-+                    "lbu            %[src_ptr_r2],  38(%[src_ptr])                  \n\t"
-+                    "mtlo           %[vector4a],    $ac1                            \n\t"
-+                    "extp           %[Temp6],       $ac3,           9               \n\t"
-+
-+                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                    "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                    "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                    "lbu            %[src_ptr_l1],  -9(%[src_ptr])                  \n\t"
-+                    "lbu            %[src_ptr_0],   7(%[src_ptr])                   \n\t"
-+                    "lbu            %[src_ptr_r1],  23(%[src_ptr])                  \n\t"
-+                    "lbu            %[src_ptr_r2],  39(%[src_ptr])                  \n\t"
-+                    "mtlo           %[vector4a],    $ac3                            \n\t"
-+                    "extp           %[Temp7],       $ac1,           9               \n\t"
-+
-+                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                    "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                    "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                    "extp           %[Temp8],       $ac3,           9               \n\t"
-+
-+                    : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                      [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4),
-+                      [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6),
-+                      [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
-+                      [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                      [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
-+                    : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
-+                      [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
-+                );
-+
-+                /* clamp and store results */
-+                output_ptr[j] = cm[Temp1];
-+                output_ptr[j + 1] = cm[Temp2];
-+                output_ptr[j + 2] = cm[Temp3];
-+                output_ptr[j + 3] = cm[Temp4];
-+                output_ptr[j + 4] = cm[Temp5];
-+                output_ptr[j + 5] = cm[Temp6];
-+                output_ptr[j + 6] = cm[Temp7];
-+                output_ptr[j + 7] = cm[Temp8];
-+
-+                src_ptr += 8;
-+            }
-+
-+            output_ptr += output_pitch;
-+        }
-+    }
-+    else
-+    {
-+        /* 4 tap filter */
-+
-+        /* prefetch src_ptr data to cache memory */
-+        prefetch_load(src_ptr + 16);
-+
-+        /* unroll for loop */
-+        for (i = 16; i--;)
-+        {
-+            /* apply filter with vectors pairs */
-+            __asm__ __volatile__ (
-+                "lbu            %[src_ptr_l2],  -32(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -16(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  16(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  32(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  48(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -31(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -15(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  17(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  33(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  49(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "extp           %[Temp1],       $ac2,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -30(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -14(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_0],   2(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  18(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  34(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  50(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "extp           %[Temp2],       $ac0,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -29(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -13(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_0],   3(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  19(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  35(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  51(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "extp           %[Temp3],       $ac1,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -28(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -12(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_0],   4(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  20(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  36(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  52(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+                "extp           %[Temp4],       $ac3,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -27(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -11(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_0],   5(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  21(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  37(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  53(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "extp           %[Temp5],       $ac2,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -26(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -10(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_0],   6(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  22(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  38(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  54(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "extp           %[Temp6],       $ac0,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -25(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -9(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   7(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  23(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  39(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  55(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "extp           %[Temp7],       $ac1,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp8],       $ac3,           9               \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                  [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4),
-+                  [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6),
-+                  [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
-+                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
-+                  [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3)
-+                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
-+                  [src_ptr] "r" (src_ptr)
-+            );
-+
-+            /* clamp and store results */
-+            output_ptr[0] = cm[Temp1];
-+            output_ptr[1] = cm[Temp2];
-+            output_ptr[2] = cm[Temp3];
-+            output_ptr[3] = cm[Temp4];
-+            output_ptr[4] = cm[Temp5];
-+            output_ptr[5] = cm[Temp6];
-+            output_ptr[6] = cm[Temp7];
-+            output_ptr[7] = cm[Temp8];
-+
-+            /* apply filter with vectors pairs */
-+            __asm__ __volatile__ (
-+                "lbu            %[src_ptr_l2],  -24(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -8(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   8(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  24(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  40(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  56(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -23(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -7(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   9(%[src_ptr])                   \n\t"
-+                "lbu            %[src_ptr_r1],  25(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  41(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  57(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "extp           %[Temp1],       $ac2,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -22(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -6(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   10(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r1],  26(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  42(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  58(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "extp           %[Temp2],       $ac0,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -21(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -5(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   11(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r1],  27(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  43(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  59(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "extp           %[Temp3],       $ac1,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -20(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -4(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   12(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r1],  28(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  44(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  60(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac2                            \n\t"
-+                "extp           %[Temp4],       $ac3,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -19(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -3(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   13(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r1],  29(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  45(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  61(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac0                            \n\t"
-+                "extp           %[Temp5],       $ac2,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -18(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -2(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   14(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r1],  30(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  46(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  62(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac1                            \n\t"
-+                "extp           %[Temp6],       $ac0,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+
-+                "lbu            %[src_ptr_l2],  -17(%[src_ptr])                 \n\t"
-+                "lbu            %[src_ptr_l1],  -1(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_0],   15(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r1],  31(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r2],  47(%[src_ptr])                  \n\t"
-+                "lbu            %[src_ptr_r3],  63(%[src_ptr])                  \n\t"
-+                "mtlo           %[vector4a],    $ac3                            \n\t"
-+                "extp           %[Temp7],       $ac1,           9               \n\t"
-+
-+                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
-+                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
-+                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
-+                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
-+                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
-+                "extp           %[Temp8],       $ac3,           9               \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
-+                  [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4),
-+                  [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6),
-+                  [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
-+                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
-+                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
-+                  [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
-+                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
-+                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
-+                  [src_ptr] "r" (src_ptr)
-+            );
-+
-+            src_ptr += 16;
-+            output_ptr[8] = cm[Temp1];
-+            output_ptr[9] = cm[Temp2];
-+            output_ptr[10] = cm[Temp3];
-+            output_ptr[11] = cm[Temp4];
-+            output_ptr[12] = cm[Temp5];
-+            output_ptr[13] = cm[Temp6];
-+            output_ptr[14] = cm[Temp7];
-+            output_ptr[15] = cm[Temp8];
-+
-+            output_ptr += output_pitch;
-+        }
-+    }
-+}
-+
-+
-+void vp8_sixtap_predict4x4_dspr2
-+(
-+    unsigned char *RESTRICT src_ptr,
-+    int   src_pixels_per_line,
-+    int  xoffset,
-+    int  yoffset,
-+    unsigned char *RESTRICT dst_ptr,
-+    int dst_pitch
-+)
-+{
-+    unsigned char FData[9 * 4]; /* Temp data bufffer used in filtering */
-+    unsigned int pos = 16;
-+
-+    /* bit positon for extract from acc */
-+    __asm__ __volatile__ (
-+        "wrdsp      %[pos],     1           \n\t"
-+        :
-+        : [pos] "r" (pos)
-+    );
-+
-+    if (yoffset)
-+    {
-+        /* First filter 1-D horizontally... */
-+        vp8_filter_block2d_first_pass_4(src_ptr - (2 * src_pixels_per_line), FData,
-+                                        src_pixels_per_line, 9, xoffset, 4);
-+        /* then filter verticaly... */
-+        vp8_filter_block2d_second_pass4(FData + 8, dst_ptr, dst_pitch, yoffset);
-+    }
-+    else
-+        /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
-+        vp8_filter_block2d_first_pass_4(src_ptr, dst_ptr, src_pixels_per_line,
-+                                        4, xoffset, dst_pitch);
-+}
-+
-+
-+void vp8_sixtap_predict8x8_dspr2
-+(
-+    unsigned char   *RESTRICT src_ptr,
-+    int  src_pixels_per_line,
-+    int  xoffset,
-+    int  yoffset,
-+    unsigned char *RESTRICT dst_ptr,
-+    int  dst_pitch
-+)
-+{
-+
-+    unsigned char FData[13 * 8]; /* Temp data bufffer used in filtering */
-+    unsigned int pos, Temp1, Temp2;
-+
-+    pos = 16;
-+
-+    /* bit positon for extract from acc */
-+    __asm__ __volatile__ (
-+        "wrdsp      %[pos],     1               \n\t"
-+        :
-+        : [pos] "r" (pos)
-+    );
-+
-+    if (yoffset)
-+    {
-+
-+        src_ptr = src_ptr - (2 * src_pixels_per_line);
-+
-+        if (xoffset)
-+            /* filter 1-D horizontally... */
-+            vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line,
-+                                                13, xoffset, 8);
-+
-+        else
-+        {
-+            /* prefetch src_ptr data to cache memory */
-+            prefetch_load(src_ptr + 2 * src_pixels_per_line);
-+
-+            __asm__ __volatile__ (
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   0(%[FData])                             \n\t"
-+                "sw     %[Temp2],   4(%[FData])                             \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   8(%[FData])                             \n\t"
-+                "sw     %[Temp2],   12(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   16(%[FData])                            \n\t"
-+                "sw     %[Temp2],   20(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   24(%[FData])                            \n\t"
-+                "sw     %[Temp2],   28(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   32(%[FData])                            \n\t"
-+                "sw     %[Temp2],   36(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   40(%[FData])                            \n\t"
-+                "sw     %[Temp2],   44(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   48(%[FData])                            \n\t"
-+                "sw     %[Temp2],   52(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   56(%[FData])                            \n\t"
-+                "sw     %[Temp2],   60(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   64(%[FData])                            \n\t"
-+                "sw     %[Temp2],   68(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   72(%[FData])                            \n\t"
-+                "sw     %[Temp2],   76(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   80(%[FData])                            \n\t"
-+                "sw     %[Temp2],   84(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   88(%[FData])                            \n\t"
-+                "sw     %[Temp2],   92(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   96(%[FData])                            \n\t"
-+                "sw     %[Temp2],   100(%[FData])                           \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
-+                : [FData] "r" (FData), [src_ptr] "r" (src_ptr),
-+                  [src_pixels_per_line] "r" (src_pixels_per_line)
-+            );
-+        }
-+
-+        /* filter verticaly... */
-+        vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 8, 8, yoffset);
-+    }
-+
-+    /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
-+    else
-+    {
-+        if (xoffset)
-+            vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line,
-+                                                8, xoffset, dst_pitch);
-+
-+        else
-+        {
-+            /* copy from src buffer to dst buffer */
-+            __asm__ __volatile__ (
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   0(%[dst_ptr])                           \n\t"
-+                "sw     %[Temp2],   4(%[dst_ptr])                           \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   8(%[dst_ptr])                           \n\t"
-+                "sw     %[Temp2],   12(%[dst_ptr])                          \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   16(%[dst_ptr])                          \n\t"
-+                "sw     %[Temp2],   20(%[dst_ptr])                          \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   24(%[dst_ptr])                          \n\t"
-+                "sw     %[Temp2],   28(%[dst_ptr])                          \n\t"
-+                "addu   %[src_ptr], %[src_ptr],   %[src_pixels_per_line]    \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   32(%[dst_ptr])                          \n\t"
-+                "sw     %[Temp2],   36(%[dst_ptr])                          \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   40(%[dst_ptr])                          \n\t"
-+                "sw     %[Temp2],   44(%[dst_ptr])                          \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   48(%[dst_ptr])                          \n\t"
-+                "sw     %[Temp2],   52(%[dst_ptr])                          \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   56(%[dst_ptr])                          \n\t"
-+                "sw     %[Temp2],   60(%[dst_ptr])                          \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
-+                : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr),
-+                  [src_pixels_per_line] "r" (src_pixels_per_line)
-+            );
-+        }
-+    }
-+}
-+
-+
-+void vp8_sixtap_predict8x4_dspr2
-+(
-+    unsigned char   *RESTRICT src_ptr,
-+    int  src_pixels_per_line,
-+    int  xoffset,
-+    int  yoffset,
-+    unsigned char *RESTRICT dst_ptr,
-+    int  dst_pitch
-+)
-+{
-+    unsigned char FData[9 * 8]; /* Temp data bufffer used in filtering */
-+    unsigned int pos, Temp1, Temp2;
-+
-+    pos = 16;
-+
-+    /* bit positon for extract from acc */
-+    __asm__ __volatile__ (
-+        "wrdsp      %[pos],     1           \n\t"
-+        :
-+        : [pos] "r" (pos)
-+    );
-+
-+    if (yoffset)
-+    {
-+
-+        src_ptr = src_ptr - (2 * src_pixels_per_line);
-+
-+        if (xoffset)
-+            /* filter 1-D horizontally... */
-+            vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line,
-+                                                9, xoffset, 8);
-+
-+        else
-+        {
-+            /* prefetch src_ptr data to cache memory */
-+            prefetch_load(src_ptr + 2 * src_pixels_per_line);
-+
-+            __asm__ __volatile__ (
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   0(%[FData])                             \n\t"
-+                "sw     %[Temp2],   4(%[FData])                             \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   8(%[FData])                             \n\t"
-+                "sw     %[Temp2],   12(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   16(%[FData])                            \n\t"
-+                "sw     %[Temp2],   20(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   24(%[FData])                            \n\t"
-+                "sw     %[Temp2],   28(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   32(%[FData])                            \n\t"
-+                "sw     %[Temp2],   36(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   40(%[FData])                            \n\t"
-+                "sw     %[Temp2],   44(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   48(%[FData])                            \n\t"
-+                "sw     %[Temp2],   52(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   56(%[FData])                            \n\t"
-+                "sw     %[Temp2],   60(%[FData])                            \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   64(%[FData])                            \n\t"
-+                "sw     %[Temp2],   68(%[FData])                            \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
-+                : [FData] "r" (FData), [src_ptr] "r" (src_ptr),
-+                  [src_pixels_per_line] "r" (src_pixels_per_line)
-+            );
-+        }
-+
-+        /* filter verticaly... */
-+        vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 4, 8, yoffset);
-+    }
-+
-+    /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
-+    else
-+    {
-+        if (xoffset)
-+            vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line,
-+                                                4, xoffset, dst_pitch);
-+
-+        else
-+        {
-+            /* copy from src buffer to dst buffer */
-+            __asm__ __volatile__ (
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   0(%[dst_ptr])                           \n\t"
-+                "sw     %[Temp2],   4(%[dst_ptr])                           \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   8(%[dst_ptr])                           \n\t"
-+                "sw     %[Temp2],   12(%[dst_ptr])                          \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   16(%[dst_ptr])                          \n\t"
-+                "sw     %[Temp2],   20(%[dst_ptr])                          \n\t"
-+                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
-+
-+                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
-+                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
-+                "sw     %[Temp1],   24(%[dst_ptr])                          \n\t"
-+                "sw     %[Temp2],   28(%[dst_ptr])                          \n\t"
-+
-+                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
-+                : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr),
-+                  [src_pixels_per_line] "r" (src_pixels_per_line)
-+            );
-+        }
-+    }
-+}
-+
-+
-+void vp8_sixtap_predict16x16_dspr2
-+(
-+    unsigned char   *RESTRICT src_ptr,
-+    int  src_pixels_per_line,
-+    int  xoffset,
-+    int  yoffset,
-+    unsigned char *RESTRICT dst_ptr,
-+    int  dst_pitch
-+)
-+{
-+    const unsigned short *VFilter;
-+    unsigned char FData[21 * 16]; /* Temp data bufffer used in filtering */
-+    unsigned int pos;
-+
-+    VFilter = sub_pel_filterss[yoffset];
-+
-+    pos = 16;
-+
-+    /* bit positon for extract from acc */
-+    __asm__ __volatile__ (
-+        "wrdsp      %[pos],     1           \n\t"
-+        :
-+        : [pos] "r" (pos)
-+    );
-+
-+    if (yoffset)
-+    {
-+
-+        src_ptr = src_ptr - (2 * src_pixels_per_line);
-+
-+        switch (xoffset)
-+        {
-+            /* filter 1-D horizontally... */
-+        case 2:
-+        case 4:
-+        case 6:
-+            /* 6 tap filter */
-+            vp8_filter_block2d_first_pass16_6tap(src_ptr, FData, src_pixels_per_line,
-+                                                 21, xoffset, 16);
-+            break;
-+
-+        case 0:
-+            /* only copy buffer */
-+            vp8_filter_block2d_first_pass16_0(src_ptr, FData, src_pixels_per_line);
-+            break;
-+
-+        case 1:
-+        case 3:
-+        case 5:
-+        case 7:
-+            /* 4 tap filter */
-+            vp8_filter_block2d_first_pass16_4tap(src_ptr, FData, src_pixels_per_line, 16,
-+                                                 21, xoffset, yoffset, dst_ptr, dst_pitch);
-+            break;
-+        }
-+
-+        /* filter verticaly... */
-+        vp8_filter_block2d_second_pass161(FData + 32, dst_ptr, dst_pitch, VFilter);
-+    }
-+    else
-+    {
-+        /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
-+        switch (xoffset)
-+        {
-+        case 2:
-+        case 4:
-+        case 6:
-+            /* 6 tap filter */
-+            vp8_filter_block2d_first_pass16_6tap(src_ptr, dst_ptr, src_pixels_per_line,
-+                                                 16, xoffset, dst_pitch);
-+            break;
-+
-+        case 1:
-+        case 3:
-+        case 5:
-+        case 7:
-+            /* 4 tap filter */
-+            vp8_filter_block2d_first_pass16_4tap(src_ptr, dst_ptr, src_pixels_per_line, 16,
-+                                                 21, xoffset, yoffset, dst_ptr, dst_pitch);
-+            break;
-+        }
-+    }
-+}
-+
-+#endif
-diff --git a/vp8/common/mips/dspr2/idct_blk_dspr2.c b/vp8/common/mips/dspr2/idct_blk_dspr2.c
-new file mode 100644
-index 0000000..1e0ebd1
---- /dev/null
-+++ b/vp8/common/mips/dspr2/idct_blk_dspr2.c
-@@ -0,0 +1,88 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+#include "vpx_config.h"
-+#include "vpx_rtcd.h"
-+
-+#if HAVE_DSPR2
-+
-+void vp8_dequant_idct_add_y_block_dspr2
-+(short *q, short *dq,
-+ unsigned char *dst, int stride, char *eobs)
-+{
-+    int i, j;
-+
-+    for (i = 0; i < 4; i++)
-+    {
-+        for (j = 0; j < 4; j++)
-+        {
-+            if (*eobs++ > 1)
-+                vp8_dequant_idct_add_dspr2(q, dq, dst, stride);
-+            else
-+            {
-+                vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dst, stride, dst, stride);
-+                ((int *)q)[0] = 0;
-+            }
-+
-+            q   += 16;
-+            dst += 4;
-+        }
-+
-+        dst += 4 * stride - 16;
-+    }
-+}
-+
-+void vp8_dequant_idct_add_uv_block_dspr2
-+(short *q, short *dq,
-+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
-+{
-+    int i, j;
-+
-+    for (i = 0; i < 2; i++)
-+    {
-+        for (j = 0; j < 2; j++)
-+        {
-+            if (*eobs++ > 1)
-+                vp8_dequant_idct_add_dspr2(q, dq, dstu, stride);
-+            else
-+            {
-+                vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstu, stride, dstu, stride);
-+                ((int *)q)[0] = 0;
-+            }
-+
-+            q    += 16;
-+            dstu += 4;
-+        }
-+
-+        dstu += 4 * stride - 8;
-+    }
-+
-+    for (i = 0; i < 2; i++)
-+    {
-+        for (j = 0; j < 2; j++)
-+        {
-+            if (*eobs++ > 1)
-+                vp8_dequant_idct_add_dspr2(q, dq, dstv, stride);
-+            else
-+            {
-+                vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstv, stride, dstv, stride);
-+                ((int *)q)[0] = 0;
-+            }
-+
-+            q    += 16;
-+            dstv += 4;
-+        }
-+
-+        dstv += 4 * stride - 8;
-+    }
-+}
-+
-+#endif
-+
-diff --git a/vp8/common/mips/dspr2/idctllm_dspr2.c b/vp8/common/mips/dspr2/idctllm_dspr2.c
-new file mode 100644
-index 0000000..25b7936
---- /dev/null
-+++ b/vp8/common/mips/dspr2/idctllm_dspr2.c
-@@ -0,0 +1,369 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+#include "vpx_rtcd.h"
-+
-+#if HAVE_DSPR2
-+#define CROP_WIDTH 256
-+
-+/******************************************************************************
-+ * Notes:
-+ *
-+ * This implementation makes use of 16 bit fixed point version of two multiply
-+ * constants:
-+ *         1.   sqrt(2) * cos (pi/8)
-+ *         2.   sqrt(2) * sin (pi/8)
-+ * Since the first constant is bigger than 1, to maintain the same 16 bit
-+ * fixed point precision as the second one, we use a trick of
-+ *         x * a = x + x*(a-1)
-+ * so
-+ *         x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
-+ ****************************************************************************/
-+extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH];
-+static const int cospi8sqrt2minus1 = 20091;
-+static const int sinpi8sqrt2      = 35468;
-+
-+inline void prefetch_load_short(short *src)
-+{
-+    __asm__ __volatile__ (
-+        "pref   0,  0(%[src])   \n\t"
-+        :
-+        : [src] "r" (src)
-+    );
-+}
-+
-+void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr,
-+                                int pred_stride, unsigned char *dst_ptr,
-+                                int dst_stride)
-+{
-+    int r, c;
-+    int a1, b1, c1, d1;
-+    short output[16];
-+    short *ip = input;
-+    short *op = output;
-+    int temp1, temp2;
-+    int shortpitch = 4;
-+
-+    int c2, d2;
-+    int temp3, temp4;
-+    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
-+
-+    /* prepare data for load */
-+    prefetch_load_short(ip + 8);
-+
-+    /* first loop is unrolled */
-+    a1 = ip[0] + ip[8];
-+    b1 = ip[0] - ip[8];
-+
-+    temp1 = (ip[4] * sinpi8sqrt2) >> 16;
-+    temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
-+    c1 = temp1 - temp2;
-+
-+    temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
-+    temp2 = (ip[12] * sinpi8sqrt2) >> 16;
-+    d1 = temp1 + temp2;
-+
-+    temp3 = (ip[5] * sinpi8sqrt2) >> 16;
-+    temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
-+    c2 = temp3 - temp4;
-+
-+    temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
-+    temp4 = (ip[13] * sinpi8sqrt2) >> 16;
-+    d2 = temp3 + temp4;
-+
-+    op[0] = a1 + d1;
-+    op[12] = a1 - d1;
-+    op[4] = b1 + c1;
-+    op[8] = b1 - c1;
-+
-+    a1 = ip[1] + ip[9];
-+    b1 = ip[1] - ip[9];
-+
-+    op[1] = a1 + d2;
-+    op[13] = a1 - d2;
-+    op[5] = b1 + c2;
-+    op[9] = b1 - c2;
-+
-+    a1 = ip[2] + ip[10];
-+    b1 = ip[2] - ip[10];
-+
-+    temp1 = (ip[6] * sinpi8sqrt2) >> 16;
-+    temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16);
-+    c1 = temp1 - temp2;
-+
-+    temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16);
-+    temp2 = (ip[14] * sinpi8sqrt2) >> 16;
-+    d1 = temp1 + temp2;
-+
-+    temp3 = (ip[7] * sinpi8sqrt2) >> 16;
-+    temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
-+    c2 = temp3 - temp4;
-+
-+    temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
-+    temp4 = (ip[15] * sinpi8sqrt2) >> 16;
-+    d2 = temp3 + temp4;
-+
-+    op[2] = a1 + d1;
-+    op[14] = a1 - d1;
-+    op[6] = b1 + c1;
-+    op[10] = b1 - c1;
-+
-+    a1 = ip[3] + ip[11];
-+    b1 = ip[3] - ip[11];
-+
-+    op[3] = a1 + d2;
-+    op[15] = a1 - d2;
-+    op[7] = b1 + c2;
-+    op[11] = b1 - c2;
-+
-+    ip = output;
-+
-+    /* prepare data for load */
-+    prefetch_load_short(ip + shortpitch);
-+
-+    /* second loop is unrolled */
-+    a1 = ip[0] + ip[2];
-+    b1 = ip[0] - ip[2];
-+
-+    temp1 = (ip[1] * sinpi8sqrt2) >> 16;
-+    temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
-+    c1 = temp1 - temp2;
-+
-+    temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
-+    temp2 = (ip[3] * sinpi8sqrt2) >> 16;
-+    d1 = temp1 + temp2;
-+
-+    temp3 = (ip[5] * sinpi8sqrt2) >> 16;
-+    temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
-+    c2 = temp3 - temp4;
-+
-+    temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
-+    temp4 = (ip[7] * sinpi8sqrt2) >> 16;
-+    d2 = temp3 + temp4;
-+
-+    op[0] = (a1 + d1 + 4) >> 3;
-+    op[3] = (a1 - d1 + 4) >> 3;
-+    op[1] = (b1 + c1 + 4) >> 3;
-+    op[2] = (b1 - c1 + 4) >> 3;
-+
-+    a1 = ip[4] + ip[6];
-+    b1 = ip[4] - ip[6];
-+
-+    op[4] = (a1 + d2 + 4) >> 3;
-+    op[7] = (a1 - d2 + 4) >> 3;
-+    op[5] = (b1 + c2 + 4) >> 3;
-+    op[6] = (b1 - c2 + 4) >> 3;
-+
-+    a1 = ip[8] + ip[10];
-+    b1 = ip[8] - ip[10];
-+
-+    temp1 = (ip[9] * sinpi8sqrt2) >> 16;
-+    temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16);
-+    c1 = temp1 - temp2;
-+
-+    temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16);
-+    temp2 = (ip[11] * sinpi8sqrt2) >> 16;
-+    d1 = temp1 + temp2;
-+
-+    temp3 = (ip[13] * sinpi8sqrt2) >> 16;
-+    temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
-+    c2 = temp3 - temp4;
-+
-+    temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
-+    temp4 = (ip[15] * sinpi8sqrt2) >> 16;
-+    d2 = temp3 + temp4;
-+
-+    op[8] = (a1 + d1 + 4) >> 3;
-+    op[11] = (a1 - d1 + 4) >> 3;
-+    op[9] = (b1 + c1 + 4) >> 3;
-+    op[10] = (b1 - c1 + 4) >> 3;
-+
-+    a1 = ip[12] + ip[14];
-+    b1 = ip[12] - ip[14];
-+
-+    op[12] = (a1 + d2 + 4) >> 3;
-+    op[15] = (a1 - d2 + 4) >> 3;
-+    op[13] = (b1 + c2 + 4) >> 3;
-+    op[14] = (b1 - c2 + 4) >> 3;
-+
-+    ip = output;
-+
-+    for (r = 0; r < 4; r++)
-+    {
-+        for (c = 0; c < 4; c++)
-+        {
-+            short a = ip[c] + pred_ptr[c] ;
-+            dst_ptr[c] = cm[a] ;
-+        }
-+
-+        ip += 4;
-+        dst_ptr += dst_stride;
-+        pred_ptr += pred_stride;
-+    }
-+}
-+
-+void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride)
-+{
-+    int a1;
-+    int i, absa1;
-+    int t2, vector_a1, vector_a;
-+
-+    /* a1 = ((input_dc + 4) >> 3); */
-+    __asm__ __volatile__ (
-+        "addi  %[a1], %[input_dc], 4   \n\t"
-+        "sra   %[a1], %[a1],       3   \n\t"
-+        : [a1] "=r" (a1)
-+        : [input_dc] "r" (input_dc)
-+    );
-+
-+    if (a1 < 0)
-+    {
-+        /* use quad-byte
-+         * input and output memory are four byte aligned
-+         */
-+        __asm__ __volatile__ (
-+            "abs        %[absa1],     %[a1]         \n\t"
-+            "replv.qb   %[vector_a1], %[absa1]      \n\t"
-+            : [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1)
-+            : [a1] "r" (a1)
-+        );
-+
-+        /* use (a1 - predptr[c]) instead a1 + predptr[c] */
-+        for (i = 4; i--;)
-+        {
-+            __asm__ __volatile__ (
-+                "lw             %[t2],       0(%[pred_ptr])                     \n\t"
-+                "add            %[pred_ptr], %[pred_ptr],    %[pred_stride]     \n\t"
-+                "subu_s.qb      %[vector_a], %[t2],          %[vector_a1]       \n\t"
-+                "sw             %[vector_a], 0(%[dst_ptr])                      \n\t"
-+                "add            %[dst_ptr],  %[dst_ptr],     %[dst_stride]      \n\t"
-+                : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
-+                  [dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
-+                : [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
-+            );
-+        }
-+    }
-+    else
-+    {
-+        /* use quad-byte
-+         * input and output memory are four byte aligned
-+         */
-+        __asm__ __volatile__ (
-+            "replv.qb       %[vector_a1], %[a1]     \n\t"
-+            : [vector_a1] "=r" (vector_a1)
-+            : [a1] "r" (a1)
-+        );
-+
-+        for (i = 4; i--;)
-+        {
-+            __asm__ __volatile__ (
-+                "lw             %[t2],       0(%[pred_ptr])                 \n\t"
-+                "add            %[pred_ptr], %[pred_ptr],    %[pred_stride] \n\t"
-+                "addu_s.qb      %[vector_a], %[vector_a1],   %[t2]          \n\t"
-+                "sw             %[vector_a], 0(%[dst_ptr])                  \n\t"
-+                "add            %[dst_ptr],  %[dst_ptr],     %[dst_stride]  \n\t"
-+                : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
-+                  [dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
-+                : [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
-+            );
-+        }
-+    }
-+
-+}
-+
-+void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff)
-+{
-+    short output[16];
-+    int i;
-+    int a1, b1, c1, d1;
-+    int a2, b2, c2, d2;
-+    short *ip = input;
-+    short *op = output;
-+
-+    prefetch_load_short(ip);
-+
-+    for (i = 4; i--;)
-+    {
-+        a1 = ip[0] + ip[12];
-+        b1 = ip[4] + ip[8];
-+        c1 = ip[4] - ip[8];
-+        d1 = ip[0] - ip[12];
-+
-+        op[0] = a1 + b1;
-+        op[4] = c1 + d1;
-+        op[8] = a1 - b1;
-+        op[12] = d1 - c1;
-+
-+        ip++;
-+        op++;
-+    }
-+
-+    ip = output;
-+    op = output;
-+
-+    prefetch_load_short(ip);
-+
-+    for (i = 4; i--;)
-+    {
-+        a1 = ip[0] + ip[3] + 3;
-+        b1 = ip[1] + ip[2];
-+        c1 = ip[1] - ip[2];
-+        d1 = ip[0] - ip[3] + 3;
-+
-+        a2 = a1 + b1;
-+        b2 = d1 + c1;
-+        c2 = a1 - b1;
-+        d2 = d1 - c1;
-+
-+        op[0] = a2 >> 3;
-+        op[1] = b2 >> 3;
-+        op[2] = c2 >> 3;
-+        op[3] = d2 >> 3;
-+
-+        ip += 4;
-+        op += 4;
-+    }
-+
-+    for (i = 0; i < 16; i++)
-+    {
-+        mb_dqcoeff[i * 16] = output[i];
-+    }
-+}
-+
-+void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff)
-+{
-+    int a1;
-+
-+    a1 = ((input[0] + 3) >> 3);
-+
-+    __asm__ __volatile__ (
-+        "sh             %[a1], 0(%[mb_dqcoeff])                    \n\t"
-+        "sh             %[a1], 32(%[mb_dqcoeff])                   \n\t"
-+        "sh             %[a1], 64(%[mb_dqcoeff])                   \n\t"
-+        "sh             %[a1], 96(%[mb_dqcoeff])                   \n\t"
-+        "sh             %[a1], 128(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 160(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 192(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 224(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 256(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 288(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 320(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 352(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 384(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 416(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 448(%[mb_dqcoeff])                  \n\t"
-+        "sh             %[a1], 480(%[mb_dqcoeff])                  \n\t"
-+
-+        :
-+        : [a1] "r" (a1), [mb_dqcoeff] "r" (mb_dqcoeff)
-+    );
-+}
-+
-+#endif
-diff --git a/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c
-new file mode 100644
-index 0000000..b8e5e4d
---- /dev/null
-+++ b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c
-@@ -0,0 +1,2622 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+
-+#include <stdlib.h>
-+#include "vpx_rtcd.h"
-+#include "vp8/common/onyxc_int.h"
-+
-+#if HAVE_DSPR2
-+typedef unsigned char uc;
-+
-+/* prefetch data for load */
-+inline void prefetch_load_lf(unsigned char *src)
-+{
-+    __asm__ __volatile__ (
-+        "pref   0,  0(%[src])   \n\t"
-+        :
-+        : [src] "r" (src)
-+    );
-+}
-+
-+
-+/* prefetch data for store */
-+inline void prefetch_store_lf(unsigned char *dst)
-+{
-+    __asm__ __volatile__ (
-+        "pref   1,  0(%[dst])   \n\t"
-+        :
-+        : [dst] "r" (dst)
-+    );
-+}
-+
-+/* processing 4 pixels at the same time
-+ * compute hev and mask in the same function
-+ */
-+static __inline void vp8_filter_mask_vec_mips
-+(
-+    uint32_t limit,
-+    uint32_t flimit,
-+    uint32_t p1,
-+    uint32_t p0,
-+    uint32_t p3,
-+    uint32_t p2,
-+    uint32_t q0,
-+    uint32_t q1,
-+    uint32_t q2,
-+    uint32_t q3,
-+    uint32_t thresh,
-+    uint32_t *hev,
-+    uint32_t *mask
-+)
-+{
-+    uint32_t c, r, r3, r_k;
-+    uint32_t s1, s2, s3;
-+    uint32_t ones = 0xFFFFFFFF;
-+    uint32_t hev1;
-+
-+    __asm__ __volatile__ (
-+        /* mask |= (abs(p3 - p2) > limit) */
-+        "subu_s.qb      %[c],   %[p3],     %[p2]        \n\t"
-+        "subu_s.qb      %[r_k], %[p2],     %[p3]        \n\t"
-+        "or             %[r_k], %[r_k],    %[c]         \n\t"
-+        "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-+        "or             %[r],   $0,        %[c]         \n\t"
-+
-+        /* mask |= (abs(p2 - p1) > limit) */
-+        "subu_s.qb      %[c],   %[p2],     %[p1]        \n\t"
-+        "subu_s.qb      %[r_k], %[p1],     %[p2]        \n\t"
-+        "or             %[r_k], %[r_k],    %[c]         \n\t"
-+        "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-+        "or             %[r],   %[r],      %[c]         \n\t"
-+
-+        /* mask |= (abs(p1 - p0) > limit)
-+         * hev  |= (abs(p1 - p0) > thresh)
-+         */
-+        "subu_s.qb      %[c],   %[p1],     %[p0]        \n\t"
-+        "subu_s.qb      %[r_k], %[p0],     %[p1]        \n\t"
-+        "or             %[r_k], %[r_k],    %[c]         \n\t"
-+        "cmpgu.lt.qb    %[c],   %[thresh], %[r_k]       \n\t"
-+        "or             %[r3],  $0,        %[c]         \n\t"
-+        "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-+        "or             %[r],   %[r],      %[c]         \n\t"
-+
-+        /* mask |= (abs(q1 - q0) > limit)
-+         * hev  |= (abs(q1 - q0) > thresh)
-+         */
-+        "subu_s.qb      %[c],   %[q1],     %[q0]        \n\t"
-+        "subu_s.qb      %[r_k], %[q0],     %[q1]        \n\t"
-+        "or             %[r_k], %[r_k],    %[c]         \n\t"
-+        "cmpgu.lt.qb    %[c],   %[thresh], %[r_k]       \n\t"
-+        "or             %[r3],  %[r3],     %[c]         \n\t"
-+        "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-+        "or             %[r],   %[r],      %[c]         \n\t"
-+
-+        /* mask |= (abs(q2 - q1) > limit) */
-+        "subu_s.qb      %[c],   %[q2],     %[q1]        \n\t"
-+        "subu_s.qb      %[r_k], %[q1],     %[q2]        \n\t"
-+        "or             %[r_k], %[r_k],    %[c]         \n\t"
-+        "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-+        "or             %[r],   %[r],      %[c]         \n\t"
-+        "sll            %[r3],    %[r3],    24          \n\t"
-+
-+        /* mask |= (abs(q3 - q2) > limit) */
-+        "subu_s.qb      %[c],   %[q3],     %[q2]        \n\t"
-+        "subu_s.qb      %[r_k], %[q2],     %[q3]        \n\t"
-+        "or             %[r_k], %[r_k],    %[c]         \n\t"
-+        "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-+        "or             %[r],   %[r],      %[c]         \n\t"
-+
-+        : [c] "=&r" (c), [r_k] "=&r" (r_k),
-+          [r] "=&r" (r), [r3] "=&r" (r3)
-+        : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2),
-+          [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0),
-+          [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh)
-+    );
-+
-+    __asm__ __volatile__ (
-+        /* abs(p0 - q0) */
-+        "subu_s.qb      %[c],   %[p0],     %[q0]        \n\t"
-+        "subu_s.qb      %[r_k], %[q0],     %[p0]        \n\t"
-+        "wrdsp          %[r3]                           \n\t"
-+        "or             %[s1],  %[r_k],    %[c]         \n\t"
-+
-+        /* abs(p1 - q1) */
-+        "subu_s.qb      %[c],    %[p1],    %[q1]        \n\t"
-+        "addu_s.qb      %[s3],   %[s1],    %[s1]        \n\t"
-+        "pick.qb        %[hev1], %[ones],  $0           \n\t"
-+        "subu_s.qb      %[r_k],  %[q1],    %[p1]        \n\t"
-+        "or             %[s2],   %[r_k],   %[c]         \n\t"
-+
-+        /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > flimit * 2 + limit */
-+        "shrl.qb        %[s2],   %[s2],     1           \n\t"
-+        "addu_s.qb      %[s1],   %[s2],     %[s3]       \n\t"
-+        "cmpgu.lt.qb    %[c],    %[flimit], %[s1]       \n\t"
-+        "or             %[r],    %[r],      %[c]        \n\t"
-+        "sll            %[r],    %[r],      24          \n\t"
-+
-+        "wrdsp          %[r]                            \n\t"
-+        "pick.qb        %[s2],  $0,         %[ones]     \n\t"
-+
-+        : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1),
-+          [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3)
-+        : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3),
-+          [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit)
-+    );
-+
-+    *hev = hev1;
-+    *mask = s2;
-+}
-+
-+
-+/* inputs & outputs are quad-byte vectors */
-+static __inline void vp8_filter_mips
-+(
-+    uint32_t mask,
-+    uint32_t hev,
-+    uint32_t *ps1,
-+    uint32_t *ps0,
-+    uint32_t *qs0,
-+    uint32_t *qs1
-+)
-+{
-+    int32_t vp8_filter_l, vp8_filter_r;
-+    int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
-+    int32_t subr_r, subr_l;
-+    uint32_t t1, t2, HWM, t3;
-+    uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r;
-+
-+    int32_t vps1, vps0, vqs0, vqs1;
-+    int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r;
-+    uint32_t N128;
-+
-+    N128 = 0x80808080;
-+    t1  = 0x03000300;
-+    t2  = 0x04000400;
-+    t3  = 0x01000100;
-+    HWM = 0xFF00FF00;
-+
-+    vps0 = (*ps0) ^ N128;
-+    vps1 = (*ps1) ^ N128;
-+    vqs0 = (*qs0) ^ N128;
-+    vqs1 = (*qs1) ^ N128;
-+
-+    /* use halfword pairs instead quad-bytes because of accuracy */
-+    vps0_l = vps0 & HWM;
-+    vps0_r = vps0 << 8;
-+    vps0_r = vps0_r & HWM;
-+
-+    vps1_l = vps1 & HWM;
-+    vps1_r = vps1 << 8;
-+    vps1_r = vps1_r & HWM;
-+
-+    vqs0_l = vqs0 & HWM;
-+    vqs0_r = vqs0 << 8;
-+    vqs0_r = vqs0_r & HWM;
-+
-+    vqs1_l = vqs1 & HWM;
-+    vqs1_r = vqs1 << 8;
-+    vqs1_r = vqs1_r & HWM;
-+
-+    mask_l = mask & HWM;
-+    mask_r = mask << 8;
-+    mask_r = mask_r & HWM;
-+
-+    hev_l = hev & HWM;
-+    hev_r = hev << 8;
-+    hev_r = hev_r & HWM;
-+
-+    __asm__ __volatile__ (
-+        /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */
-+        "subq_s.ph    %[vp8_filter_l], %[vps1_l],       %[vqs1_l]       \n\t"
-+        "subq_s.ph    %[vp8_filter_r], %[vps1_r],       %[vqs1_r]       \n\t"
-+
-+        /* qs0 - ps0 */
-+        "subq_s.ph    %[subr_l],       %[vqs0_l],       %[vps0_l]       \n\t"
-+        "subq_s.ph    %[subr_r],       %[vqs0_r],       %[vps0_r]       \n\t"
-+
-+        /* vp8_filter &= hev; */
-+        "and          %[vp8_filter_l], %[vp8_filter_l], %[hev_l]        \n\t"
-+        "and          %[vp8_filter_r], %[vp8_filter_r], %[hev_r]        \n\t"
-+
-+        /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */
-+        "addq_s.ph    %[vp8_filter_l], %[vp8_filter_l], %[subr_l]       \n\t"
-+        "addq_s.ph    %[vp8_filter_r], %[vp8_filter_r], %[subr_r]       \n\t"
-+        "xor          %[invhev_l],     %[hev_l],        %[HWM]          \n\t"
-+        "addq_s.ph    %[vp8_filter_l], %[vp8_filter_l], %[subr_l]       \n\t"
-+        "addq_s.ph    %[vp8_filter_r], %[vp8_filter_r], %[subr_r]       \n\t"
-+        "xor          %[invhev_r],     %[hev_r],        %[HWM]          \n\t"
-+        "addq_s.ph    %[vp8_filter_l], %[vp8_filter_l], %[subr_l]       \n\t"
-+        "addq_s.ph    %[vp8_filter_r], %[vp8_filter_r], %[subr_r]       \n\t"
-+
-+        /* vp8_filter &= mask; */
-+        "and          %[vp8_filter_l], %[vp8_filter_l], %[mask_l]       \n\t"
-+        "and          %[vp8_filter_r], %[vp8_filter_r], %[mask_r]       \n\t"
-+
-+        : [vp8_filter_l] "=&r" (vp8_filter_l), [vp8_filter_r] "=&r" (vp8_filter_r),
-+          [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r),
-+          [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r)
-+
-+        : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),
-+          [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r),
-+          [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r),
-+          [mask_l] "r" (mask_l), [mask_r] "r" (mask_r),
-+          [hev_l] "r" (hev_l), [hev_r] "r" (hev_r),
-+          [HWM] "r" (HWM)
-+    );
-+
-+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
-+    __asm__ __volatile__ (
-+        /* Filter2 = vp8_signed_char_clamp(vp8_filter + 3) >>= 3; */
-+        "addq_s.ph    %[Filter1_l],    %[vp8_filter_l], %[t2]           \n\t"
-+        "addq_s.ph    %[Filter1_r],    %[vp8_filter_r], %[t2]           \n\t"
-+
-+        /* Filter1 = vp8_signed_char_clamp(vp8_filter + 4) >>= 3; */
-+        "addq_s.ph    %[Filter2_l],    %[vp8_filter_l], %[t1]           \n\t"
-+        "addq_s.ph    %[Filter2_r],    %[vp8_filter_r], %[t1]           \n\t"
-+        "shra.ph      %[Filter1_r],    %[Filter1_r],    3               \n\t"
-+        "shra.ph      %[Filter1_l],    %[Filter1_l],    3               \n\t"
-+
-+        "shra.ph      %[Filter2_l],    %[Filter2_l],    3               \n\t"
-+        "shra.ph      %[Filter2_r],    %[Filter2_r],    3               \n\t"
-+
-+        "and          %[Filter1_l],    %[Filter1_l],    %[HWM]          \n\t"
-+        "and          %[Filter1_r],    %[Filter1_r],    %[HWM]          \n\t"
-+
-+        /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */
-+        "addq_s.ph    %[vps0_l],       %[vps0_l],       %[Filter2_l]    \n\t"
-+        "addq_s.ph    %[vps0_r],       %[vps0_r],       %[Filter2_r]    \n\t"
-+
-+        /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */
-+        "subq_s.ph    %[vqs0_l],       %[vqs0_l],       %[Filter1_l]    \n\t"
-+        "subq_s.ph    %[vqs0_r],       %[vqs0_r],       %[Filter1_r]    \n\t"
-+
-+        : [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r),
-+          [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r),
-+          [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
-+          [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
-+
-+        : [t1] "r" (t1), [t2] "r" (t2),
-+          [vp8_filter_l] "r" (vp8_filter_l), [vp8_filter_r] "r" (vp8_filter_r),
-+          [HWM] "r" (HWM)
-+    );
-+
-+    __asm__ __volatile__ (
-+        /* (vp8_filter += 1) >>= 1 */
-+        "addqh.ph    %[Filter1_l],    %[Filter1_l],     %[t3]           \n\t"
-+        "addqh.ph    %[Filter1_r],    %[Filter1_r],     %[t3]           \n\t"
-+
-+        /* vp8_filter &= ~hev; */
-+        "and          %[Filter1_l],    %[Filter1_l],    %[invhev_l]     \n\t"
-+        "and          %[Filter1_r],    %[Filter1_r],    %[invhev_r]     \n\t"
-+
-+        /* vps1 = vp8_signed_char_clamp(ps1 + vp8_filter); */
-+        "addq_s.ph    %[vps1_l],       %[vps1_l],       %[Filter1_l]    \n\t"
-+        "addq_s.ph    %[vps1_r],       %[vps1_r],       %[Filter1_r]    \n\t"
-+
-+        /* vqs1 = vp8_signed_char_clamp(qs1 - vp8_filter); */
-+        "subq_s.ph    %[vqs1_l],       %[vqs1_l],       %[Filter1_l]    \n\t"
-+        "subq_s.ph    %[vqs1_r],       %[vqs1_r],       %[Filter1_r]    \n\t"
-+
-+        : [Filter1_l] "+r" (Filter1_l), [Filter1_r] "+r" (Filter1_r),
-+          [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r),
-+          [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r)
-+
-+        : [t3] "r" (t3), [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r)
-+    );
-+
-+    /* Create quad-bytes from halfword pairs */
-+    vqs0_l = vqs0_l & HWM;
-+    vqs1_l = vqs1_l & HWM;
-+    vps0_l = vps0_l & HWM;
-+    vps1_l = vps1_l & HWM;
-+
-+    __asm__ __volatile__ (
-+        "shrl.ph      %[vqs0_r],       %[vqs0_r],       8               \n\t"
-+        "shrl.ph      %[vps0_r],       %[vps0_r],       8               \n\t"
-+        "shrl.ph      %[vqs1_r],       %[vqs1_r],       8               \n\t"
-+        "shrl.ph      %[vps1_r],       %[vps1_r],       8               \n\t"
-+
-+        : [vps1_r] "+r" (vps1_r), [vqs1_r] "+r" (vqs1_r),
-+          [vps0_r] "+r" (vps0_r), [vqs0_r] "+r" (vqs0_r)
-+        :
-+    );
-+
-+    vqs0 = vqs0_l | vqs0_r;
-+    vqs1 = vqs1_l | vqs1_r;
-+    vps0 = vps0_l | vps0_r;
-+    vps1 = vps1_l | vps1_r;
-+
-+    *ps0 = vps0 ^ N128;
-+    *ps1 = vps1 ^ N128;
-+    *qs0 = vqs0 ^ N128;
-+    *qs1 = vqs1 ^ N128;
-+}
-+
-+void vp8_loop_filter_horizontal_edge_mips
-+(
-+    unsigned char *s,
-+    int p,
-+    unsigned int flimit,
-+    unsigned int limit,
-+    unsigned int thresh,
-+    int count
-+)
-+{
-+    uint32_t mask;
-+    uint32_t hev;
-+    uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-+    unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
-+
-+    mask = 0;
-+    hev = 0;
-+    p1 = 0;
-+    p2 = 0;
-+    p3 = 0;
-+    p4 = 0;
-+
-+    /* prefetch data for store */
-+    prefetch_store_lf(s);
-+
-+    /* loop filter designed to work using chars so that we can make maximum use
-+     * of 8 bit simd instructions.
-+     */
-+
-+    sm1 = s - (p << 2);
-+    s0 = s - p - p - p;
-+    s1 = s - p - p ;
-+    s2 = s - p;
-+    s3 = s;
-+    s4 = s + p;
-+    s5 = s + p + p;
-+    s6 = s + p + p + p;
-+
-+    /* load quad-byte vectors
-+     * memory is 4 byte aligned
-+     */
-+    p1 = *((uint32_t *)(s1));
-+    p2 = *((uint32_t *)(s2));
-+    p3 = *((uint32_t *)(s3));
-+    p4 = *((uint32_t *)(s4));
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        pm1 = *((uint32_t *)(sm1));
-+        p0  = *((uint32_t *)(s0));
-+        p5  = *((uint32_t *)(s5));
-+        p6  = *((uint32_t *)(s6));
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
-+
-+            /* unpack processed 4x4 neighborhood */
-+            *((uint32_t *)s1) = p1;
-+            *((uint32_t *)s2) = p2;
-+            *((uint32_t *)s3) = p3;
-+            *((uint32_t *)s4) = p4;
-+        }
-+    }
-+
-+    sm1 += 4;
-+    s0  += 4;
-+    s1  += 4;
-+    s2  += 4;
-+    s3  += 4;
-+    s4  += 4;
-+    s5  += 4;
-+    s6  += 4;
-+
-+    /* load quad-byte vectors
-+     * memory is 4 byte aligned
-+     */
-+    p1 = *((uint32_t *)(s1));
-+    p2 = *((uint32_t *)(s2));
-+    p3 = *((uint32_t *)(s3));
-+    p4 = *((uint32_t *)(s4));
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        pm1 = *((uint32_t *)(sm1));
-+        p0  = *((uint32_t *)(s0));
-+        p5  = *((uint32_t *)(s5));
-+        p6  = *((uint32_t *)(s6));
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
-+
-+            /* unpack processed 4x4 neighborhood */
-+            *((uint32_t *)s1) = p1;
-+            *((uint32_t *)s2) = p2;
-+            *((uint32_t *)s3) = p3;
-+            *((uint32_t *)s4) = p4;
-+        }
-+    }
-+
-+    sm1 += 4;
-+    s0  += 4;
-+    s1  += 4;
-+    s2  += 4;
-+    s3  += 4;
-+    s4  += 4;
-+    s5  += 4;
-+    s6  += 4;
-+
-+    /* load quad-byte vectors
-+     * memory is 4 byte aligned
-+     */
-+    p1 = *((uint32_t *)(s1));
-+    p2 = *((uint32_t *)(s2));
-+    p3 = *((uint32_t *)(s3));
-+    p4 = *((uint32_t *)(s4));
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        pm1 = *((uint32_t *)(sm1));
-+        p0  = *((uint32_t *)(s0));
-+        p5  = *((uint32_t *)(s5));
-+        p6  = *((uint32_t *)(s6));
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
-+
-+            /* unpack processed 4x4 neighborhood */
-+            *((uint32_t *)s1) = p1;
-+            *((uint32_t *)s2) = p2;
-+            *((uint32_t *)s3) = p3;
-+            *((uint32_t *)s4) = p4;
-+        }
-+    }
-+
-+    sm1 += 4;
-+    s0  += 4;
-+    s1  += 4;
-+    s2  += 4;
-+    s3  += 4;
-+    s4  += 4;
-+    s5  += 4;
-+    s6  += 4;
-+
-+    /* load quad-byte vectors
-+     * memory is 4 byte aligned
-+     */
-+    p1 = *((uint32_t *)(s1));
-+    p2 = *((uint32_t *)(s2));
-+    p3 = *((uint32_t *)(s3));
-+    p4 = *((uint32_t *)(s4));
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        pm1 = *((uint32_t *)(sm1));
-+        p0  = *((uint32_t *)(s0));
-+        p5  = *((uint32_t *)(s5));
-+        p6  = *((uint32_t *)(s6));
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
-+
-+            /* unpack processed 4x4 neighborhood */
-+            *((uint32_t *)s1) = p1;
-+            *((uint32_t *)s2) = p2;
-+            *((uint32_t *)s3) = p3;
-+            *((uint32_t *)s4) = p4;
-+        }
-+    }
-+}
-+
-+void vp8_loop_filter_uvhorizontal_edge_mips
-+(
-+    unsigned char *s,
-+    int p,
-+    unsigned int flimit,
-+    unsigned int limit,
-+    unsigned int thresh,
-+    int count
-+)
-+{
-+    uint32_t mask;
-+    uint32_t hev;
-+    uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-+    unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
-+
-+    mask = 0;
-+    hev = 0;
-+    p1 = 0;
-+    p2 = 0;
-+    p3 = 0;
-+    p4 = 0;
-+
-+    /* loop filter designed to work using chars so that we can make maximum use
-+     * of 8 bit simd instructions.
-+     */
-+
-+    sm1 = s - (p << 2);
-+    s0  = s - p - p - p;
-+    s1  = s - p - p ;
-+    s2  = s - p;
-+    s3  = s;
-+    s4  = s + p;
-+    s5  = s + p + p;
-+    s6  = s + p + p + p;
-+
-+    /* load quad-byte vectors
-+     * memory is 4 byte aligned
-+     */
-+    p1 = *((uint32_t *)(s1));
-+    p2 = *((uint32_t *)(s2));
-+    p3 = *((uint32_t *)(s3));
-+    p4 = *((uint32_t *)(s4));
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        pm1 = *((uint32_t *)(sm1));
-+        p0  = *((uint32_t *)(s0));
-+        p5  = *((uint32_t *)(s5));
-+        p6  = *((uint32_t *)(s6));
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
-+
-+            /* unpack processed 4x4 neighborhood */
-+            *((uint32_t *)s1) = p1;
-+            *((uint32_t *)s2) = p2;
-+            *((uint32_t *)s3) = p3;
-+            *((uint32_t *)s4) = p4;
-+        }
-+    }
-+
-+    sm1 += 4;
-+    s0  += 4;
-+    s1  += 4;
-+    s2  += 4;
-+    s3  += 4;
-+    s4  += 4;
-+    s5  += 4;
-+    s6  += 4;
-+
-+    /* load quad-byte vectors
-+     * memory is 4 byte aligned
-+     */
-+    p1 = *((uint32_t *)(s1));
-+    p2 = *((uint32_t *)(s2));
-+    p3 = *((uint32_t *)(s3));
-+    p4 = *((uint32_t *)(s4));
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        pm1 = *((uint32_t *)(sm1));
-+        p0  = *((uint32_t *)(s0));
-+        p5  = *((uint32_t *)(s5));
-+        p6  = *((uint32_t *)(s6));
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
-+
-+            /* unpack processed 4x4 neighborhood */
-+            *((uint32_t *)s1) = p1;
-+            *((uint32_t *)s2) = p2;
-+            *((uint32_t *)s3) = p3;
-+            *((uint32_t *)s4) = p4;
-+        }
-+    }
-+}
-+
-+void vp8_loop_filter_vertical_edge_mips
-+(
-+    unsigned char *s,
-+    int p,
-+    const unsigned int flimit,
-+    const unsigned int limit,
-+    const unsigned int thresh,
-+    int count
-+)
-+{
-+    int i;
-+    uint32_t mask, hev;
-+    uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-+    unsigned char *s1, *s2, *s3, *s4;
-+    uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
-+
-+    hev = 0;
-+    mask = 0;
-+    i = 0;
-+    pm1 = 0;
-+    p0 = 0;
-+    p1 = 0;
-+    p2 = 0;
-+    p3 = 0;
-+    p4 = 0;
-+    p5 = 0;
-+    p6 = 0;
-+
-+    /* loop filter designed to work using chars so that we can make maximum use
-+     * of 8 bit simd instructions.
-+     */
-+
-+    /* apply filter on 4 pixesl at the same time */
-+    do
-+    {
-+
-+        /* prefetch data for store */
-+        prefetch_store_lf(s + p);
-+
-+        s1 = s;
-+        s2 = s + p;
-+        s3 = s2 + p;
-+        s4 = s3 + p;
-+        s  = s4 + p;
-+
-+        /* load quad-byte vectors
-+         * memory is 4 byte aligned
-+         */
-+        p2  = *((uint32_t *)(s1 - 4));
-+        p6  = *((uint32_t *)(s1));
-+        p1  = *((uint32_t *)(s2 - 4));
-+        p5  = *((uint32_t *)(s2));
-+        p0  = *((uint32_t *)(s3 - 4));
-+        p4  = *((uint32_t *)(s3));
-+        pm1 = *((uint32_t *)(s4 - 4));
-+        p3  = *((uint32_t *)(s4));
-+
-+        /* transpose pm1, p0, p1, p2 */
-+        __asm__ __volatile__ (
-+            "precrq.qb.ph   %[prim1],   %[p2],      %[p1]       \n\t"
-+            "precr.qb.ph    %[prim2],   %[p2],      %[p1]       \n\t"
-+            "precrq.qb.ph   %[prim3],   %[p0],      %[pm1]      \n\t"
-+            "precr.qb.ph    %[prim4],   %[p0],      %[pm1]      \n\t"
-+
-+            "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-+            "precr.qb.ph    %[pm1],     %[prim1],   %[prim2]    \n\t"
-+            "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+            "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+            "precrq.ph.w    %[p2],      %[p1],      %[sec3]     \n\t"
-+            "precrq.ph.w    %[p0],      %[pm1],     %[sec4]     \n\t"
-+            "append         %[p1],      %[sec3],    16          \n\t"
-+            "append         %[pm1],     %[sec4],    16          \n\t"
-+
-+            : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+              [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+              [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
-+              [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+            :
-+        );
-+
-+        /* transpose p3, p4, p5, p6 */
-+        __asm__ __volatile__ (
-+            "precrq.qb.ph   %[prim1],   %[p6],      %[p5]       \n\t"
-+            "precr.qb.ph    %[prim2],   %[p6],      %[p5]       \n\t"
-+            "precrq.qb.ph   %[prim3],   %[p4],      %[p3]       \n\t"
-+            "precr.qb.ph    %[prim4],   %[p4],      %[p3]       \n\t"
-+
-+            "precrq.qb.ph   %[p5],      %[prim1],   %[prim2]    \n\t"
-+            "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-+            "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+            "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+            "precrq.ph.w    %[p6],      %[p5],      %[sec3]     \n\t"
-+            "precrq.ph.w    %[p4],      %[p3],      %[sec4]     \n\t"
-+            "append         %[p5],      %[sec3],    16          \n\t"
-+            "append         %[p3],      %[sec4],    16          \n\t"
-+
-+            : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+              [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+              [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+              [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+            :
-+        );
-+
-+        /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+         * mask will be zero and filtering is not needed
-+         */
-+        if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+        {
-+
-+            vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                     thresh, &hev, &mask);
-+
-+            /* if mask == 0 do filtering is not needed */
-+            if (mask)
-+            {
-+                /* filtering */
-+                vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
-+
-+                /* unpack processed 4x4 neighborhood
-+                 * don't use transpose on output data
-+                 * because memory isn't aligned
-+                 */
-+                __asm__ __volatile__ (
-+                    "sb         %[p4],  1(%[s4])    \n\t"
-+                    "sb         %[p3],  0(%[s4])    \n\t"
-+                    "sb         %[p2], -1(%[s4])    \n\t"
-+                    "sb         %[p1], -2(%[s4])    \n\t"
-+                    :
-+                    : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
-+                      [p2] "r" (p2), [p1] "r" (p1)
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "srl        %[p4], %[p4], 8     \n\t"
-+                    "srl        %[p3], %[p3], 8     \n\t"
-+                    "srl        %[p2], %[p2], 8     \n\t"
-+                    "srl        %[p1], %[p1], 8     \n\t"
-+                    : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                    :
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "sb         %[p4],  1(%[s3])    \n\t"
-+                    "sb         %[p3],  0(%[s3])    \n\t"
-+                    "sb         %[p2], -1(%[s3])    \n\t"
-+                    "sb         %[p1], -2(%[s3])    \n\t"
-+                    : [p1] "+r" (p1)
-+                    : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2)
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "srl        %[p4], %[p4], 8     \n\t"
-+                    "srl        %[p3], %[p3], 8     \n\t"
-+                    "srl        %[p2], %[p2], 8     \n\t"
-+                    "srl        %[p1], %[p1], 8     \n\t"
-+                    : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                    :
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "sb         %[p4],  1(%[s2])    \n\t"
-+                    "sb         %[p3],  0(%[s2])    \n\t"
-+                    "sb         %[p2], -1(%[s2])    \n\t"
-+                    "sb         %[p1], -2(%[s2])    \n\t"
-+                    :
-+                    : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
-+                      [p2] "r" (p2), [p1] "r" (p1)
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "srl        %[p4], %[p4], 8     \n\t"
-+                    "srl        %[p3], %[p3], 8     \n\t"
-+                    "srl        %[p2], %[p2], 8     \n\t"
-+                    "srl        %[p1], %[p1], 8     \n\t"
-+                    : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                    :
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "sb         %[p4],  1(%[s1])    \n\t"
-+                    "sb         %[p3],  0(%[s1])    \n\t"
-+                    "sb         %[p2], -1(%[s1])    \n\t"
-+                    "sb         %[p1], -2(%[s1])    \n\t"
-+                    :
-+                    : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
-+                      [p2] "r" (p2), [p1] "r" (p1)
-+                );
-+            }
-+        }
-+
-+        s1 = s;
-+        s2 = s + p;
-+        s3 = s2 + p;
-+        s4 = s3 + p;
-+        s  = s4 + p;
-+
-+        /* load quad-byte vectors
-+         * memory is 4 byte aligned
-+         */
-+        p2  = *((uint32_t *)(s1 - 4));
-+        p6  = *((uint32_t *)(s1));
-+        p1  = *((uint32_t *)(s2 - 4));
-+        p5  = *((uint32_t *)(s2));
-+        p0  = *((uint32_t *)(s3 - 4));
-+        p4  = *((uint32_t *)(s3));
-+        pm1 = *((uint32_t *)(s4 - 4));
-+        p3  = *((uint32_t *)(s4));
-+
-+        /* transpose pm1, p0, p1, p2 */
-+        __asm__ __volatile__ (
-+            "precrq.qb.ph   %[prim1],   %[p2],      %[p1]       \n\t"
-+            "precr.qb.ph    %[prim2],   %[p2],      %[p1]       \n\t"
-+            "precrq.qb.ph   %[prim3],   %[p0],      %[pm1]      \n\t"
-+            "precr.qb.ph    %[prim4],   %[p0],      %[pm1]      \n\t"
-+
-+            "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-+            "precr.qb.ph    %[pm1],     %[prim1],   %[prim2]    \n\t"
-+            "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+            "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+            "precrq.ph.w    %[p2],      %[p1],      %[sec3]     \n\t"
-+            "precrq.ph.w    %[p0],      %[pm1],     %[sec4]     \n\t"
-+            "append         %[p1],      %[sec3],    16          \n\t"
-+            "append         %[pm1],     %[sec4],    16          \n\t"
-+
-+            : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+              [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+              [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
-+              [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+            :
-+        );
-+
-+        /* transpose p3, p4, p5, p6 */
-+        __asm__ __volatile__ (
-+            "precrq.qb.ph   %[prim1],   %[p6],      %[p5]       \n\t"
-+            "precr.qb.ph    %[prim2],   %[p6],      %[p5]       \n\t"
-+            "precrq.qb.ph   %[prim3],   %[p4],      %[p3]       \n\t"
-+            "precr.qb.ph    %[prim4],   %[p4],      %[p3]       \n\t"
-+
-+            "precrq.qb.ph   %[p5],      %[prim1],   %[prim2]    \n\t"
-+            "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-+            "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+            "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+            "precrq.ph.w    %[p6],      %[p5],      %[sec3]     \n\t"
-+            "precrq.ph.w    %[p4],      %[p3],      %[sec4]     \n\t"
-+            "append         %[p5],      %[sec3],    16          \n\t"
-+            "append         %[p3],      %[sec4],    16          \n\t"
-+
-+            : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+              [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+              [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+              [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+            :
-+        );
-+
-+        /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+         * mask will be zero and filtering is not needed
-+         */
-+        if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+        {
-+
-+            vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                     thresh, &hev, &mask);
-+
-+            /* if mask == 0 do filtering is not needed */
-+            if (mask)
-+            {
-+                /* filtering */
-+                vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
-+
-+                /* unpack processed 4x4 neighborhood
-+                 * don't use transpose on output data
-+                 * because memory isn't aligned
-+                 */
-+                __asm__ __volatile__ (
-+                    "sb         %[p4],  1(%[s4])    \n\t"
-+                    "sb         %[p3],  0(%[s4])    \n\t"
-+                    "sb         %[p2], -1(%[s4])    \n\t"
-+                    "sb         %[p1], -2(%[s4])    \n\t"
-+                    :
-+                    : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
-+                      [p2] "r" (p2), [p1] "r" (p1)
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "srl        %[p4], %[p4], 8     \n\t"
-+                    "srl        %[p3], %[p3], 8     \n\t"
-+                    "srl        %[p2], %[p2], 8     \n\t"
-+                    "srl        %[p1], %[p1], 8     \n\t"
-+                    : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                    :
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "sb         %[p4],  1(%[s3])    \n\t"
-+                    "sb         %[p3],  0(%[s3])    \n\t"
-+                    "sb         %[p2], -1(%[s3])    \n\t"
-+                    "sb         %[p1], -2(%[s3])    \n\t"
-+                    : [p1] "+r" (p1)
-+                    : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2)
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "srl        %[p4], %[p4], 8     \n\t"
-+                    "srl        %[p3], %[p3], 8     \n\t"
-+                    "srl        %[p2], %[p2], 8     \n\t"
-+                    "srl        %[p1], %[p1], 8     \n\t"
-+                    : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                    :
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "sb         %[p4],  1(%[s2])    \n\t"
-+                    "sb         %[p3],  0(%[s2])    \n\t"
-+                    "sb         %[p2], -1(%[s2])    \n\t"
-+                    "sb         %[p1], -2(%[s2])    \n\t"
-+                    :
-+                    : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
-+                      [p2] "r" (p2), [p1] "r" (p1)
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "srl        %[p4], %[p4], 8     \n\t"
-+                    "srl        %[p3], %[p3], 8     \n\t"
-+                    "srl        %[p2], %[p2], 8     \n\t"
-+                    "srl        %[p1], %[p1], 8     \n\t"
-+                    : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                    :
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "sb         %[p4],  1(%[s1])    \n\t"
-+                    "sb         %[p3],  0(%[s1])    \n\t"
-+                    "sb         %[p2], -1(%[s1])    \n\t"
-+                    "sb         %[p1], -2(%[s1])    \n\t"
-+                    :
-+                    : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
-+                      [p2] "r" (p2), [p1] "r" (p1)
-+                );
-+            }
-+        }
-+
-+        i += 8;
-+    }
-+
-+    while (i < count);
-+}
-+
-+void vp8_loop_filter_uvvertical_edge_mips
-+(
-+    unsigned char *s,
-+    int p,
-+    unsigned int flimit,
-+    unsigned int limit,
-+    unsigned int thresh,
-+    int count
-+)
-+{
-+    uint32_t mask, hev;
-+    uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-+    unsigned char *s1, *s2, *s3, *s4;
-+    uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
-+
-+    /* loop filter designed to work using chars so that we can make maximum use
-+     * of 8 bit simd instructions.
-+     */
-+
-+    /* apply filter on 4 pixesl at the same time */
-+
-+    s1 = s;
-+    s2 = s + p;
-+    s3 = s2 + p;
-+    s4 = s3 + p;
-+
-+    /* load quad-byte vectors
-+    * memory is 4 byte aligned
-+    */
-+    p2  = *((uint32_t *)(s1 - 4));
-+    p6  = *((uint32_t *)(s1));
-+    p1  = *((uint32_t *)(s2 - 4));
-+    p5  = *((uint32_t *)(s2));
-+    p0  = *((uint32_t *)(s3 - 4));
-+    p4  = *((uint32_t *)(s3));
-+    pm1 = *((uint32_t *)(s4 - 4));
-+    p3  = *((uint32_t *)(s4));
-+
-+    /* transpose pm1, p0, p1, p2 */
-+    __asm__ __volatile__ (
-+        "precrq.qb.ph   %[prim1],   %[p2],      %[p1]       \n\t"
-+        "precr.qb.ph    %[prim2],   %[p2],      %[p1]       \n\t"
-+        "precrq.qb.ph   %[prim3],   %[p0],      %[pm1]      \n\t"
-+        "precr.qb.ph    %[prim4],   %[p0],      %[pm1]      \n\t"
-+
-+        "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-+        "precr.qb.ph    %[pm1],     %[prim1],   %[prim2]    \n\t"
-+        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+        "precrq.ph.w    %[p2],      %[p1],      %[sec3]     \n\t"
-+        "precrq.ph.w    %[p0],      %[pm1],     %[sec4]     \n\t"
-+        "append         %[p1],      %[sec3],    16          \n\t"
-+        "append         %[pm1],     %[sec4],    16          \n\t"
-+
-+        : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+          [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+          [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
-+          [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+        :
-+    );
-+
-+    /* transpose p3, p4, p5, p6 */
-+    __asm__ __volatile__ (
-+        "precrq.qb.ph   %[prim1],   %[p6],      %[p5]       \n\t"
-+        "precr.qb.ph    %[prim2],   %[p6],      %[p5]       \n\t"
-+        "precrq.qb.ph   %[prim3],   %[p4],      %[p3]       \n\t"
-+        "precr.qb.ph    %[prim4],   %[p4],      %[p3]       \n\t"
-+
-+        "precrq.qb.ph   %[p5],      %[prim1],   %[prim2]    \n\t"
-+        "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-+        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+        "precrq.ph.w    %[p6],      %[p5],      %[sec3]     \n\t"
-+        "precrq.ph.w    %[p4],      %[p3],      %[sec4]     \n\t"
-+        "append         %[p5],      %[sec3],    16          \n\t"
-+        "append         %[p3],      %[sec4],    16          \n\t"
-+
-+        : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+          [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+          [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+          [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+        :
-+    );
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+    * mask will be zero and filtering is not needed
-+    */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
-+
-+            /* unpack processed 4x4 neighborhood
-+             * don't use transpose on output data
-+             * because memory isn't aligned
-+             */
-+            __asm__ __volatile__ (
-+                "sb         %[p4],  1(%[s4])    \n\t"
-+                "sb         %[p3],  0(%[s4])    \n\t"
-+                "sb         %[p2], -1(%[s4])    \n\t"
-+                "sb         %[p1], -2(%[s4])    \n\t"
-+                :
-+                : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
-+                  [p2] "r" (p2), [p1] "r" (p1)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p4], %[p4], 8     \n\t"
-+                "srl        %[p3], %[p3], 8     \n\t"
-+                "srl        %[p2], %[p2], 8     \n\t"
-+                "srl        %[p1], %[p1], 8     \n\t"
-+                : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p4],  1(%[s3])    \n\t"
-+                "sb         %[p3],  0(%[s3])    \n\t"
-+                "sb         %[p2], -1(%[s3])    \n\t"
-+                "sb         %[p1], -2(%[s3])    \n\t"
-+                : [p1] "+r" (p1)
-+                : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p4], %[p4], 8     \n\t"
-+                "srl        %[p3], %[p3], 8     \n\t"
-+                "srl        %[p2], %[p2], 8     \n\t"
-+                "srl        %[p1], %[p1], 8     \n\t"
-+                : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p4],  1(%[s2])    \n\t"
-+                "sb         %[p3],  0(%[s2])    \n\t"
-+                "sb         %[p2], -1(%[s2])    \n\t"
-+                "sb         %[p1], -2(%[s2])    \n\t"
-+                :
-+                : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
-+                  [p2] "r" (p2), [p1] "r" (p1)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p4], %[p4], 8     \n\t"
-+                "srl        %[p3], %[p3], 8     \n\t"
-+                "srl        %[p2], %[p2], 8     \n\t"
-+                "srl        %[p1], %[p1], 8     \n\t"
-+                : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p4],  1(%[s1])    \n\t"
-+                "sb         %[p3],  0(%[s1])    \n\t"
-+                "sb         %[p2], -1(%[s1])    \n\t"
-+                "sb         %[p1], -2(%[s1])    \n\t"
-+                :
-+                : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), [p2] "r" (p2), [p1] "r" (p1)
-+            );
-+        }
-+    }
-+
-+    s1 = s4 + p;
-+    s2 = s1 + p;
-+    s3 = s2 + p;
-+    s4 = s3 + p;
-+
-+    /* load quad-byte vectors
-+     * memory is 4 byte aligned
-+     */
-+    p2  = *((uint32_t *)(s1 - 4));
-+    p6  = *((uint32_t *)(s1));
-+    p1  = *((uint32_t *)(s2 - 4));
-+    p5  = *((uint32_t *)(s2));
-+    p0  = *((uint32_t *)(s3 - 4));
-+    p4  = *((uint32_t *)(s3));
-+    pm1 = *((uint32_t *)(s4 - 4));
-+    p3  = *((uint32_t *)(s4));
-+
-+    /* transpose pm1, p0, p1, p2 */
-+    __asm__ __volatile__ (
-+        "precrq.qb.ph   %[prim1],   %[p2],      %[p1]       \n\t"
-+        "precr.qb.ph    %[prim2],   %[p2],      %[p1]       \n\t"
-+        "precrq.qb.ph   %[prim3],   %[p0],      %[pm1]      \n\t"
-+        "precr.qb.ph    %[prim4],   %[p0],      %[pm1]      \n\t"
-+
-+        "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-+        "precr.qb.ph    %[pm1],     %[prim1],   %[prim2]    \n\t"
-+        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+        "precrq.ph.w    %[p2],      %[p1],      %[sec3]     \n\t"
-+        "precrq.ph.w    %[p0],      %[pm1],     %[sec4]     \n\t"
-+        "append         %[p1],      %[sec3],    16          \n\t"
-+        "append         %[pm1],     %[sec4],    16          \n\t"
-+
-+        : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+          [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+          [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
-+          [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+        :
-+    );
-+
-+    /* transpose p3, p4, p5, p6 */
-+    __asm__ __volatile__ (
-+        "precrq.qb.ph   %[prim1],   %[p6],      %[p5]       \n\t"
-+        "precr.qb.ph    %[prim2],   %[p6],      %[p5]       \n\t"
-+        "precrq.qb.ph   %[prim3],   %[p4],      %[p3]       \n\t"
-+        "precr.qb.ph    %[prim4],   %[p4],      %[p3]       \n\t"
-+
-+        "precrq.qb.ph   %[p5],      %[prim1],   %[prim2]    \n\t"
-+        "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-+        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+        "precrq.ph.w    %[p6],      %[p5],      %[sec3]     \n\t"
-+        "precrq.ph.w    %[p4],      %[p3],      %[sec4]     \n\t"
-+        "append         %[p5],      %[sec3],    16          \n\t"
-+        "append         %[p3],      %[sec4],    16          \n\t"
-+
-+        : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+          [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+          [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+          [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+        :
-+    );
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4);
-+
-+            /* unpack processed 4x4 neighborhood
-+             * don't use transpose on output data
-+             * because memory isn't aligned
-+             */
-+            __asm__ __volatile__ (
-+                "sb         %[p4],  1(%[s4])    \n\t"
-+                "sb         %[p3],  0(%[s4])    \n\t"
-+                "sb         %[p2], -1(%[s4])    \n\t"
-+                "sb         %[p1], -2(%[s4])    \n\t"
-+                :
-+                : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
-+                  [p2] "r" (p2), [p1] "r" (p1)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p4], %[p4], 8     \n\t"
-+                "srl        %[p3], %[p3], 8     \n\t"
-+                "srl        %[p2], %[p2], 8     \n\t"
-+                "srl        %[p1], %[p1], 8     \n\t"
-+                : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p4],  1(%[s3])    \n\t"
-+                "sb         %[p3],  0(%[s3])    \n\t"
-+                "sb         %[p2], -1(%[s3])    \n\t"
-+                "sb         %[p1], -2(%[s3])    \n\t"
-+                : [p1] "+r" (p1)
-+                : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p4], %[p4], 8     \n\t"
-+                "srl        %[p3], %[p3], 8     \n\t"
-+                "srl        %[p2], %[p2], 8     \n\t"
-+                "srl        %[p1], %[p1], 8     \n\t"
-+                : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p4],  1(%[s2])    \n\t"
-+                "sb         %[p3],  0(%[s2])    \n\t"
-+                "sb         %[p2], -1(%[s2])    \n\t"
-+                "sb         %[p1], -2(%[s2])    \n\t"
-+                :
-+                : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
-+                  [p2] "r" (p2), [p1] "r" (p1)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p4], %[p4], 8     \n\t"
-+                "srl        %[p3], %[p3], 8     \n\t"
-+                "srl        %[p2], %[p2], 8     \n\t"
-+                "srl        %[p1], %[p1], 8     \n\t"
-+                : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p4],  1(%[s1])    \n\t"
-+                "sb         %[p3],  0(%[s1])    \n\t"
-+                "sb         %[p2], -1(%[s1])    \n\t"
-+                "sb         %[p1], -2(%[s1])    \n\t"
-+                :
-+                : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
-+                  [p2] "r" (p2), [p1] "r" (p1)
-+            );
-+        }
-+    }
-+}
-+
-+/* inputs & outputs are quad-byte vectors */
-+static __inline void vp8_mbfilter_mips
-+(
-+    uint32_t mask,
-+    uint32_t hev,
-+    uint32_t *ps2,
-+    uint32_t *ps1,
-+    uint32_t *ps0,
-+    uint32_t *qs0,
-+    uint32_t *qs1,
-+    uint32_t *qs2
-+)
-+{
-+    int32_t vps2, vps1, vps0, vqs0, vqs1, vqs2;
-+    int32_t vps2_l, vps1_l, vps0_l, vqs0_l, vqs1_l, vqs2_l;
-+    int32_t vps2_r, vps1_r, vps0_r, vqs0_r, vqs1_r, vqs2_r;
-+    uint32_t HWM, vp8_filter_l, vp8_filter_r, mask_l, mask_r, hev_l, hev_r, subr_r, subr_l;
-+    uint32_t Filter2_l, Filter2_r, t1, t2, Filter1_l, Filter1_r, invhev_l, invhev_r;
-+    uint32_t N128, R63;
-+    uint32_t u1_l, u1_r, u2_l, u2_r, u3_l, u3_r;
-+
-+    R63  = 0x003F003F;
-+    HWM  = 0xFF00FF00;
-+    N128 = 0x80808080;
-+    t1   = 0x03000300;
-+    t2   = 0x04000400;
-+
-+    vps0 = (*ps0) ^ N128;
-+    vps1 = (*ps1) ^ N128;
-+    vps2 = (*ps2) ^ N128;
-+    vqs0 = (*qs0) ^ N128;
-+    vqs1 = (*qs1) ^ N128;
-+    vqs2 = (*qs2) ^ N128;
-+
-+    /* use halfword pairs instead quad-bytes because of accuracy */
-+    vps0_l = vps0 & HWM;
-+    vps0_r = vps0 << 8;
-+    vps0_r = vps0_r & HWM;
-+
-+    vqs0_l = vqs0 & HWM;
-+    vqs0_r = vqs0 << 8;
-+    vqs0_r = vqs0_r & HWM;
-+
-+    vps1_l = vps1 & HWM;
-+    vps1_r = vps1 << 8;
-+    vps1_r = vps1_r & HWM;
-+
-+    vqs1_l = vqs1 & HWM;
-+    vqs1_r = vqs1 << 8;
-+    vqs1_r = vqs1_r & HWM;
-+
-+    vqs2_l = vqs2 & HWM;
-+    vqs2_r = vqs2 << 8;
-+    vqs2_r = vqs2_r & HWM;
-+
-+    __asm__ __volatile__ (
-+        /* qs0 - ps0 */
-+        "subq_s.ph    %[subr_l],       %[vqs0_l],       %[vps0_l]       \n\t"
-+        "subq_s.ph    %[subr_r],       %[vqs0_r],       %[vps0_r]       \n\t"
-+
-+        /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */
-+        "subq_s.ph    %[vp8_filter_l], %[vps1_l],       %[vqs1_l]       \n\t"
-+        "subq_s.ph    %[vp8_filter_r], %[vps1_r],       %[vqs1_r]       \n\t"
-+
-+        : [vp8_filter_l] "=&r" (vp8_filter_l), [vp8_filter_r] "=r" (vp8_filter_r),
-+          [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r)
-+        : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),
-+          [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r),
-+          [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r)
-+    );
-+
-+    vps2_l = vps2 & HWM;
-+    vps2_r = vps2 << 8;
-+    vps2_r = vps2_r & HWM;
-+
-+    /* add outer taps if we have high edge variance */
-+    __asm__ __volatile__ (
-+        /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */
-+        "addq_s.ph    %[vp8_filter_l], %[vp8_filter_l], %[subr_l]       \n\t"
-+        "addq_s.ph    %[vp8_filter_r], %[vp8_filter_r], %[subr_r]       \n\t"
-+        "and          %[mask_l],       %[HWM],          %[mask]         \n\t"
-+        "sll          %[mask_r],       %[mask],         8               \n\t"
-+        "and          %[mask_r],       %[HWM],          %[mask_r]       \n\t"
-+        "addq_s.ph    %[vp8_filter_l], %[vp8_filter_l], %[subr_l]       \n\t"
-+        "addq_s.ph    %[vp8_filter_r], %[vp8_filter_r], %[subr_r]       \n\t"
-+        "and          %[hev_l],        %[HWM],          %[hev]          \n\t"
-+        "sll          %[hev_r],        %[hev],          8               \n\t"
-+        "and          %[hev_r],        %[HWM],          %[hev_r]        \n\t"
-+        "addq_s.ph    %[vp8_filter_l], %[vp8_filter_l], %[subr_l]       \n\t"
-+        "addq_s.ph    %[vp8_filter_r], %[vp8_filter_r], %[subr_r]       \n\t"
-+
-+        /* vp8_filter &= mask; */
-+        "and          %[vp8_filter_l], %[vp8_filter_l], %[mask_l]       \n\t"
-+        "and          %[vp8_filter_r], %[vp8_filter_r], %[mask_r]       \n\t"
-+
-+        /* Filter2 = vp8_filter & hev; */
-+        "and          %[Filter2_l],    %[vp8_filter_l], %[hev_l]        \n\t"
-+        "and          %[Filter2_r],    %[vp8_filter_r], %[hev_r]        \n\t"
-+
-+        : [vp8_filter_l] "+r" (vp8_filter_l), [vp8_filter_r] "+r" (vp8_filter_r),
-+          [hev_l] "=&r" (hev_l), [hev_r] "=&r" (hev_r),
-+          [mask_l] "=&r" (mask_l), [mask_r] "=&r" (mask_r),
-+          [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r)
-+        : [subr_l] "r" (subr_l), [subr_r] "r" (subr_r),
-+          [HWM] "r" (HWM), [hev]  "r" (hev), [mask] "r" (mask)
-+    );
-+
-+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
-+    __asm__ __volatile__ (
-+        /* Filter1 = vp8_signed_char_clamp(Filter2 + 4) >>= 3; */
-+        "addq_s.ph    %[Filter1_l],    %[Filter2_l],    %[t2]           \n\t"
-+        "xor          %[invhev_l],     %[hev_l],        %[HWM]          \n\t"
-+        "addq_s.ph    %[Filter1_r],    %[Filter2_r],    %[t2]           \n\t"
-+
-+        /* Filter2 = vp8_signed_char_clamp(Filter2 + 3) >>= 3; */
-+        "addq_s.ph    %[Filter2_l],    %[Filter2_l],    %[t1]           \n\t"
-+        "addq_s.ph    %[Filter2_r],    %[Filter2_r],    %[t1]           \n\t"
-+
-+        "shra.ph      %[Filter1_l],    %[Filter1_l],    3               \n\t"
-+        "shra.ph      %[Filter1_r],    %[Filter1_r],    3               \n\t"
-+
-+        "shra.ph      %[Filter2_l],    %[Filter2_l],    3               \n\t"
-+        "shra.ph      %[Filter2_r],    %[Filter2_r],    3               \n\t"
-+        "and          %[Filter1_l],    %[Filter1_l],    %[HWM]          \n\t"
-+        "and          %[Filter1_r],    %[Filter1_r],    %[HWM]          \n\t"
-+        "xor          %[invhev_r],     %[hev_r],        %[HWM]          \n\t"
-+
-+        /* qs0 = vp8_signed_char_clamp(qs0 - Filter1); */
-+        "subq_s.ph    %[vqs0_l],       %[vqs0_l],       %[Filter1_l]    \n\t"
-+        "subq_s.ph    %[vqs0_r],       %[vqs0_r],       %[Filter1_r]    \n\t"
-+
-+        /* ps0 = vp8_signed_char_clamp(ps0 + Filter2); */
-+        "addq_s.ph    %[vps0_l],       %[vps0_l],       %[Filter2_l]    \n\t"
-+        "addq_s.ph    %[vps0_r],       %[vps0_r],       %[Filter2_r]    \n\t"
-+
-+        : [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r),
-+          [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r),
-+          [Filter2_l] "+r" (Filter2_l), [Filter2_r] "+r" (Filter2_r),
-+          [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
-+          [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
-+        : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM),
-+          [hev_l] "r" (hev_l), [hev_r] "r" (hev_r)
-+    );
-+
-+    /* only apply wider filter if not high edge variance */
-+    __asm__ __volatile__ (
-+        /* vp8_filter &= ~hev; */
-+        "and          %[Filter2_l],    %[vp8_filter_l], %[invhev_l]     \n\t"
-+        "and          %[Filter2_r],    %[vp8_filter_r], %[invhev_r]     \n\t"
-+
-+        "shra.ph      %[Filter2_l],    %[Filter2_l],    8               \n\t"
-+        "shra.ph      %[Filter2_r],    %[Filter2_r],    8               \n\t"
-+
-+        : [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r)
-+        : [vp8_filter_l] "r" (vp8_filter_l), [vp8_filter_r] "r" (vp8_filter_r),
-+          [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r)
-+    );
-+
-+    /* roughly 3/7th difference across boundary */
-+    __asm__ __volatile__ (
-+        "shll.ph      %[u3_l],         %[Filter2_l],    3               \n\t"
-+        "shll.ph      %[u3_r],         %[Filter2_r],    3               \n\t"
-+
-+        "addq.ph      %[u3_l],         %[u3_l],         %[Filter2_l]    \n\t"
-+        "addq.ph      %[u3_r],         %[u3_r],         %[Filter2_r]    \n\t"
-+
-+        "shll.ph      %[u2_l],         %[u3_l],         1               \n\t"
-+        "shll.ph      %[u2_r],         %[u3_r],         1               \n\t"
-+
-+        "addq.ph      %[u1_l],         %[u3_l],         %[u2_l]         \n\t"
-+        "addq.ph      %[u1_r],         %[u3_r],         %[u2_r]         \n\t"
-+
-+        "addq.ph      %[u2_l],         %[u2_l],         %[R63]          \n\t"
-+        "addq.ph      %[u2_r],         %[u2_r],         %[R63]          \n\t"
-+
-+        "addq.ph      %[u3_l],         %[u3_l],         %[R63]          \n\t"
-+        "addq.ph      %[u3_r],         %[u3_r],         %[R63]          \n\t"
-+
-+        /* vp8_signed_char_clamp((63 + Filter2 * 27) >> 7)
-+         * vp8_signed_char_clamp((63 + Filter2 * 18) >> 7)
-+         */
-+        "addq.ph      %[u1_l],         %[u1_l],         %[R63]          \n\t"
-+        "addq.ph      %[u1_r],         %[u1_r],         %[R63]          \n\t"
-+        "shra.ph      %[u1_l],         %[u1_l],         7               \n\t"
-+        "shra.ph      %[u1_r],         %[u1_r],         7               \n\t"
-+        "shra.ph      %[u2_l],         %[u2_l],         7               \n\t"
-+        "shra.ph      %[u2_r],         %[u2_r],         7               \n\t"
-+        "shll.ph      %[u1_l],         %[u1_l],         8               \n\t"
-+        "shll.ph      %[u1_r],         %[u1_r],         8               \n\t"
-+        "shll.ph      %[u2_l],         %[u2_l],         8               \n\t"
-+        "shll.ph      %[u2_r],         %[u2_r],         8               \n\t"
-+
-+        /* vqs0 = vp8_signed_char_clamp(qs0 - u); */
-+        "subq_s.ph    %[vqs0_l],       %[vqs0_l],       %[u1_l]         \n\t"
-+        "subq_s.ph    %[vqs0_r],       %[vqs0_r],       %[u1_r]         \n\t"
-+
-+        /* vps0 = vp8_signed_char_clamp(ps0 + u); */
-+        "addq_s.ph    %[vps0_l],       %[vps0_l],       %[u1_l]         \n\t"
-+        "addq_s.ph    %[vps0_r],       %[vps0_r],       %[u1_r]         \n\t"
-+
-+        : [u1_l] "=&r" (u1_l), [u1_r] "=&r" (u1_r), [u2_l] "=&r" (u2_l),
-+          [u2_r] "=&r" (u2_r), [u3_l] "=&r" (u3_l), [u3_r] "=&r" (u3_r),
-+          [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
-+          [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
-+        : [R63]  "r" (R63),
-+          [Filter2_l] "r" (Filter2_l), [Filter2_r] "r" (Filter2_r)
-+    );
-+
-+    __asm__ __volatile__ (
-+        /* vqs1 = vp8_signed_char_clamp(qs1 - u); */
-+        "subq_s.ph    %[vqs1_l],       %[vqs1_l],       %[u2_l]         \n\t"
-+        "addq_s.ph    %[vps1_l],       %[vps1_l],       %[u2_l]         \n\t"
-+
-+        /* vps1 = vp8_signed_char_clamp(ps1 + u); */
-+        "addq_s.ph    %[vps1_r],       %[vps1_r],       %[u2_r]         \n\t"
-+        "subq_s.ph    %[vqs1_r],       %[vqs1_r],       %[u2_r]         \n\t"
-+
-+        : [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r),
-+          [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r)
-+        : [u2_l] "r" (u2_l), [u2_r] "r" (u2_r)
-+    );
-+
-+    /* roughly 1/7th difference across boundary */
-+    __asm__ __volatile__ (
-+        /* u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); */
-+        "shra.ph      %[u3_l],         %[u3_l],         7               \n\t"
-+        "shra.ph      %[u3_r],         %[u3_r],         7               \n\t"
-+        "shll.ph      %[u3_l],         %[u3_l],         8               \n\t"
-+        "shll.ph      %[u3_r],         %[u3_r],         8               \n\t"
-+
-+        /* vqs2 = vp8_signed_char_clamp(qs2 - u); */
-+        "subq_s.ph    %[vqs2_l],       %[vqs2_l],       %[u3_l]         \n\t"
-+        "subq_s.ph    %[vqs2_r],       %[vqs2_r],       %[u3_r]         \n\t"
-+
-+        /* vps2 = vp8_signed_char_clamp(ps2 + u); */
-+        "addq_s.ph    %[vps2_l],       %[vps2_l],       %[u3_l]         \n\t"
-+        "addq_s.ph    %[vps2_r],       %[vps2_r],       %[u3_r]         \n\t"
-+
-+        : [u3_l] "+r" (u3_l), [u3_r] "+r" (u3_r), [vps2_l] "+r" (vps2_l),
-+          [vps2_r] "+r" (vps2_r), [vqs2_l] "+r" (vqs2_l), [vqs2_r] "+r" (vqs2_r)
-+        :
-+    );
-+
-+    /* Create quad-bytes from halfword pairs */
-+    __asm__ __volatile__ (
-+        "and          %[vqs0_l],       %[vqs0_l],       %[HWM]          \n\t"
-+        "shrl.ph      %[vqs0_r],       %[vqs0_r],       8               \n\t"
-+
-+        "and          %[vps0_l],       %[vps0_l],       %[HWM]          \n\t"
-+        "shrl.ph      %[vps0_r],       %[vps0_r],       8               \n\t"
-+
-+        "and          %[vqs1_l],       %[vqs1_l],       %[HWM]          \n\t"
-+        "shrl.ph      %[vqs1_r],       %[vqs1_r],       8               \n\t"
-+
-+        "and          %[vps1_l],       %[vps1_l],       %[HWM]          \n\t"
-+        "shrl.ph      %[vps1_r],       %[vps1_r],       8               \n\t"
-+
-+        "and          %[vqs2_l],       %[vqs2_l],       %[HWM]          \n\t"
-+        "shrl.ph      %[vqs2_r],       %[vqs2_r],       8               \n\t"
-+
-+        "and          %[vps2_l],       %[vps2_l],       %[HWM]          \n\t"
-+        "shrl.ph      %[vps2_r],       %[vps2_r],       8               \n\t"
-+
-+        "or           %[vqs0_r],       %[vqs0_l],       %[vqs0_r]       \n\t"
-+        "or           %[vps0_r],       %[vps0_l],       %[vps0_r]       \n\t"
-+        "or           %[vqs1_r],       %[vqs1_l],       %[vqs1_r]       \n\t"
-+        "or           %[vps1_r],       %[vps1_l],       %[vps1_r]       \n\t"
-+        "or           %[vqs2_r],       %[vqs2_l],       %[vqs2_r]       \n\t"
-+        "or           %[vps2_r],       %[vps2_l],       %[vps2_r]       \n\t"
-+
-+        : [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), [vqs1_l] "+r" (vqs1_l),
-+          [vqs1_r] "+r" (vqs1_r), [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
-+          [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r), [vqs2_l] "+r" (vqs2_l),
-+          [vqs2_r] "+r" (vqs2_r), [vps2_r] "+r" (vps2_r), [vps2_l] "+r" (vps2_l)
-+        : [HWM] "r" (HWM)
-+    );
-+
-+    *ps0 = vps0_r ^ N128;
-+    *ps1 = vps1_r ^ N128;
-+    *ps2 = vps2_r ^ N128;
-+    *qs0 = vqs0_r ^ N128;
-+    *qs1 = vqs1_r ^ N128;
-+    *qs2 = vqs2_r ^ N128;
-+}
-+
-+void vp8_mbloop_filter_horizontal_edge_mips
-+(
-+    unsigned char *s,
-+    int p,
-+    unsigned int flimit,
-+    unsigned int limit,
-+    unsigned int thresh,
-+    int count
-+)
-+{
-+    int i;
-+    uint32_t mask, hev;
-+    uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-+    unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
-+
-+    mask = 0;
-+    hev = 0;
-+    i = 0;
-+    p1 = 0;
-+    p2 = 0;
-+    p3 = 0;
-+    p4 = 0;
-+
-+    /* loop filter designed to work using chars so that we can make maximum use
-+     * of 8 bit simd instructions.
-+     */
-+
-+    sm1 = s - (p << 2);
-+    s0  = s - p - p - p;
-+    s1  = s - p - p;
-+    s2  = s - p;
-+    s3  = s;
-+    s4  = s + p;
-+    s5  = s + p + p;
-+    s6  = s + p + p + p;
-+
-+    /* prefetch data for load */
-+    prefetch_load_lf(s + p);
-+
-+    /* apply filter on 4 pixesl at the same time */
-+    do
-+    {
-+        /* load quad-byte vectors
-+         * memory is 4 byte aligned
-+         */
-+        p1 = *((uint32_t *)(s1));
-+        p2 = *((uint32_t *)(s2));
-+        p3 = *((uint32_t *)(s3));
-+        p4 = *((uint32_t *)(s4));
-+
-+        /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+         * mask will be zero and filtering is not needed
-+         */
-+        if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+        {
-+
-+            pm1 = *((uint32_t *)(sm1));
-+            p0  = *((uint32_t *)(s0));
-+            p5  = *((uint32_t *)(s5));
-+            p6  = *((uint32_t *)(s6));
-+
-+            vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                     thresh, &hev, &mask);
-+
-+            /* if mask == 0 do filtering is not needed */
-+            if (mask)
-+            {
-+                /* filtering */
-+                vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
-+
-+                /* unpack processed 4x4 neighborhood
-+                 * memory is 4 byte aligned
-+                 */
-+                *((uint32_t *)s0) = p0;
-+                *((uint32_t *)s1) = p1;
-+                *((uint32_t *)s2) = p2;
-+                *((uint32_t *)s3) = p3;
-+                *((uint32_t *)s4) = p4;
-+                *((uint32_t *)s5) = p5;
-+            }
-+        }
-+
-+        sm1 += 4;
-+        s0  += 4;
-+        s1  += 4;
-+        s2  += 4;
-+        s3  += 4;
-+        s4  += 4;
-+        s5  += 4;
-+        s6  += 4;
-+
-+        /* load quad-byte vectors
-+         * memory is 4 byte aligned
-+         */
-+        p1 = *((uint32_t *)(s1));
-+        p2 = *((uint32_t *)(s2));
-+        p3 = *((uint32_t *)(s3));
-+        p4 = *((uint32_t *)(s4));
-+
-+        /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+         * mask will be zero and filtering is not needed
-+         */
-+        if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+        {
-+
-+            pm1 = *((uint32_t *)(sm1));
-+            p0  = *((uint32_t *)(s0));
-+            p5  = *((uint32_t *)(s5));
-+            p6  = *((uint32_t *)(s6));
-+
-+            vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                     thresh, &hev, &mask);
-+
-+            /* if mask == 0 do filtering is not needed */
-+            if (mask)
-+            {
-+                /* filtering */
-+                vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
-+
-+                /* unpack processed 4x4 neighborhood
-+                 * memory is 4 byte aligned
-+                 */
-+                *((uint32_t *)s0) = p0;
-+                *((uint32_t *)s1) = p1;
-+                *((uint32_t *)s2) = p2;
-+                *((uint32_t *)s3) = p3;
-+                *((uint32_t *)s4) = p4;
-+                *((uint32_t *)s5) = p5;
-+            }
-+        }
-+
-+        sm1 += 4;
-+        s0  += 4;
-+        s1  += 4;
-+        s2  += 4;
-+        s3  += 4;
-+        s4  += 4;
-+        s5  += 4;
-+        s6  += 4;
-+
-+        i += 8;
-+    }
-+
-+    while (i < count);
-+}
-+
-+void vp8_mbloop_filter_uvhorizontal_edge_mips
-+(
-+    unsigned char *s,
-+    int p,
-+    unsigned int flimit,
-+    unsigned int limit,
-+    unsigned int thresh,
-+    int count
-+)
-+{
-+    uint32_t mask, hev;
-+    uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-+    unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
-+
-+    mask = 0;
-+    hev = 0;
-+    p1 = 0;
-+    p2 = 0;
-+    p3 = 0;
-+    p4 = 0;
-+
-+    /* loop filter designed to work using chars so that we can make maximum use
-+     * of 8 bit simd instructions.
-+     */
-+
-+    sm1 = s - (p << 2);
-+    s0  = s - p - p - p;
-+    s1  = s - p - p;
-+    s2  = s - p;
-+    s3  = s;
-+    s4  = s + p;
-+    s5  = s + p + p;
-+    s6  = s + p + p + p;
-+
-+    /* load quad-byte vectors
-+     * memory is 4 byte aligned
-+     */
-+    p1 = *((uint32_t *)(s1));
-+    p2 = *((uint32_t *)(s2));
-+    p3 = *((uint32_t *)(s3));
-+    p4 = *((uint32_t *)(s4));
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        pm1 = *((uint32_t *)(sm1));
-+        p0  = *((uint32_t *)(s0));
-+        p5  = *((uint32_t *)(s5));
-+        p6  = *((uint32_t *)(s6));
-+
-+        /* if mask == 0 do filtering is not needed */
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
-+
-+            /* unpack processed 4x4 neighborhood
-+             * memory is 4 byte aligned
-+             */
-+            *((uint32_t *)s0) = p0;
-+            *((uint32_t *)s1) = p1;
-+            *((uint32_t *)s2) = p2;
-+            *((uint32_t *)s3) = p3;
-+            *((uint32_t *)s4) = p4;
-+            *((uint32_t *)s5) = p5;
-+        }
-+    }
-+
-+    sm1 += 4;
-+    s0  += 4;
-+    s1  += 4;
-+    s2  += 4;
-+    s3  += 4;
-+    s4  += 4;
-+    s5  += 4;
-+    s6  += 4;
-+
-+    /* load quad-byte vectors
-+     * memory is 4 byte aligned
-+     */
-+    p1 = *((uint32_t *)(s1));
-+    p2 = *((uint32_t *)(s2));
-+    p3 = *((uint32_t *)(s3));
-+    p4 = *((uint32_t *)(s4));
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        pm1 = *((uint32_t *)(sm1));
-+        p0  = *((uint32_t *)(s0));
-+        p5  = *((uint32_t *)(s5));
-+        p6  = *((uint32_t *)(s6));
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
-+
-+            /* unpack processed 4x4 neighborhood
-+             * memory is 4 byte aligned
-+             */
-+            *((uint32_t *)s0) = p0;
-+            *((uint32_t *)s1) = p1;
-+            *((uint32_t *)s2) = p2;
-+            *((uint32_t *)s3) = p3;
-+            *((uint32_t *)s4) = p4;
-+            *((uint32_t *)s5) = p5;
-+        }
-+    }
-+}
-+
-+
-+void vp8_mbloop_filter_vertical_edge_mips
-+(
-+    unsigned char *s,
-+    int p,
-+    unsigned int flimit,
-+    unsigned int limit,
-+    unsigned int thresh,
-+    int count
-+)
-+{
-+
-+    int i;
-+    uint32_t mask, hev;
-+    uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-+    unsigned char *s1, *s2, *s3, *s4;
-+    uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
-+
-+    mask = 0;
-+    hev = 0;
-+    i = 0;
-+    pm1 = 0;
-+    p0 = 0;
-+    p1 = 0;
-+    p2 = 0;
-+    p3 = 0;
-+    p4 = 0;
-+    p5 = 0;
-+    p6 = 0;
-+
-+    /* loop filter designed to work using chars so that we can make maximum use
-+     * of 8 bit simd instructions.
-+     */
-+
-+    /* apply filter on 4 pixesl at the same time */
-+    do
-+    {
-+        s1 = s;
-+        s2 = s + p;
-+        s3 = s2 + p;
-+        s4 = s3 + p;
-+        s  = s4 + p;
-+
-+        /* load quad-byte vectors
-+         * memory is 4 byte aligned
-+         */
-+        p2  = *((uint32_t *)(s1 - 4));
-+        p6  = *((uint32_t *)(s1));
-+        p1  = *((uint32_t *)(s2 - 4));
-+        p5  = *((uint32_t *)(s2));
-+        p0  = *((uint32_t *)(s3 - 4));
-+        p4  = *((uint32_t *)(s3));
-+        pm1 = *((uint32_t *)(s4 - 4));
-+        p3  = *((uint32_t *)(s4));
-+
-+        /* transpose pm1, p0, p1, p2 */
-+        __asm__ __volatile__ (
-+            "precrq.qb.ph   %[prim1],   %[p2],      %[p1]       \n\t"
-+            "precr.qb.ph    %[prim2],   %[p2],      %[p1]       \n\t"
-+            "precrq.qb.ph   %[prim3],   %[p0],      %[pm1]      \n\t"
-+            "precr.qb.ph    %[prim4],   %[p0],      %[pm1]      \n\t"
-+
-+            "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-+            "precr.qb.ph    %[pm1],     %[prim1],   %[prim2]    \n\t"
-+            "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+            "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+            "precrq.ph.w    %[p2],      %[p1],      %[sec3]     \n\t"
-+            "precrq.ph.w    %[p0],      %[pm1],     %[sec4]     \n\t"
-+            "append         %[p1],      %[sec3],    16          \n\t"
-+            "append         %[pm1],     %[sec4],    16          \n\t"
-+
-+            : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+              [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+              [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
-+              [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+            :
-+        );
-+
-+        /* transpose p3, p4, p5, p6 */
-+        __asm__ __volatile__ (
-+            "precrq.qb.ph   %[prim1],   %[p6],      %[p5]       \n\t"
-+            "precr.qb.ph    %[prim2],   %[p6],      %[p5]       \n\t"
-+            "precrq.qb.ph   %[prim3],   %[p4],      %[p3]       \n\t"
-+            "precr.qb.ph    %[prim4],   %[p4],      %[p3]       \n\t"
-+
-+            "precrq.qb.ph   %[p5],      %[prim1],   %[prim2]    \n\t"
-+            "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-+            "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+            "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+            "precrq.ph.w    %[p6],      %[p5],      %[sec3]     \n\t"
-+            "precrq.ph.w    %[p4],      %[p3],      %[sec4]     \n\t"
-+            "append         %[p5],      %[sec3],    16          \n\t"
-+            "append         %[p3],      %[sec4],    16          \n\t"
-+
-+            : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+              [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+              [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+              [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+            :
-+        );
-+
-+        /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+         * mask will be zero and filtering is not needed
-+         */
-+        if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+        {
-+
-+            vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                     thresh, &hev, &mask);
-+
-+            /* if mask == 0 do filtering is not needed */
-+            if (mask)
-+            {
-+                /* filtering */
-+                vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
-+
-+                /* don't use transpose on output data
-+                 * because memory isn't aligned
-+                 */
-+                __asm__ __volatile__ (
-+                    "sb         %[p5],  2(%[s4])        \n\t"
-+                    "sb         %[p4],  1(%[s4])        \n\t"
-+                    "sb         %[p3],  0(%[s4])        \n\t"
-+                    "sb         %[p2], -1(%[s4])        \n\t"
-+                    "sb         %[p1], -2(%[s4])        \n\t"
-+                    "sb         %[p0], -3(%[s4])        \n\t"
-+                    :
-+                    : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
-+                      [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "srl        %[p5], %[p5], 8         \n\t"
-+                    "srl        %[p4], %[p4], 8         \n\t"
-+                    "srl        %[p3], %[p3], 8         \n\t"
-+                    "srl        %[p2], %[p2], 8         \n\t"
-+                    "srl        %[p1], %[p1], 8         \n\t"
-+                    "srl        %[p0], %[p0], 8         \n\t"
-+                    : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+                      [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
-+                    :
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "sb         %[p5],  2(%[s3])        \n\t"
-+                    "sb         %[p4],  1(%[s3])        \n\t"
-+                    "sb         %[p3],  0(%[s3])        \n\t"
-+                    "sb         %[p2], -1(%[s3])        \n\t"
-+                    "sb         %[p1], -2(%[s3])        \n\t"
-+                    "sb         %[p0], -3(%[s3])        \n\t"
-+                    :
-+                    : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3),
-+                      [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "srl        %[p5], %[p5], 8         \n\t"
-+                    "srl        %[p4], %[p4], 8         \n\t"
-+                    "srl        %[p3], %[p3], 8         \n\t"
-+                    "srl        %[p2], %[p2], 8         \n\t"
-+                    "srl        %[p1], %[p1], 8         \n\t"
-+                    "srl        %[p0], %[p0], 8         \n\t"
-+                    : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+                      [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
-+                    :
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "sb         %[p5],  2(%[s2])        \n\t"
-+                    "sb         %[p4],  1(%[s2])        \n\t"
-+                    "sb         %[p3],  0(%[s2])        \n\t"
-+                    "sb         %[p2], -1(%[s2])        \n\t"
-+                    "sb         %[p1], -2(%[s2])        \n\t"
-+                    "sb         %[p0], -3(%[s2])        \n\t"
-+                    :
-+                    : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
-+                      [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "srl        %[p5], %[p5], 8         \n\t"
-+                    "srl        %[p4], %[p4], 8         \n\t"
-+                    "srl        %[p3], %[p3], 8         \n\t"
-+                    "srl        %[p2], %[p2], 8         \n\t"
-+                    "srl        %[p1], %[p1], 8         \n\t"
-+                    "srl        %[p0], %[p0], 8         \n\t"
-+                    : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+                      [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
-+                    :
-+                );
-+
-+                __asm__ __volatile__ (
-+                    "sb         %[p5],  2(%[s1])        \n\t"
-+                    "sb         %[p4],  1(%[s1])        \n\t"
-+                    "sb         %[p3],  0(%[s1])        \n\t"
-+                    "sb         %[p2], -1(%[s1])        \n\t"
-+                    "sb         %[p1], -2(%[s1])        \n\t"
-+                    "sb         %[p0], -3(%[s1])        \n\t"
-+                    :
-+                    : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
-+                      [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+                );
-+            }
-+        }
-+
-+        i += 4;
-+    }
-+
-+    while (i < count);
-+}
-+
-+void vp8_mbloop_filter_uvvertical_edge_mips
-+(
-+    unsigned char *s,
-+    int p,
-+    unsigned int flimit,
-+    unsigned int limit,
-+    unsigned int thresh,
-+    int count
-+)
-+{
-+    uint32_t mask, hev;
-+    uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-+    unsigned char *s1, *s2, *s3, *s4;
-+    uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
-+
-+    mask = 0;
-+    hev = 0;
-+    pm1 = 0;
-+    p0 = 0;
-+    p1 = 0;
-+    p2 = 0;
-+    p3 = 0;
-+    p4 = 0;
-+    p5 = 0;
-+    p6 = 0;
-+
-+    /* loop filter designed to work using chars so that we can make maximum use
-+     * of 8 bit simd instructions.
-+     */
-+
-+    /* apply filter on 4 pixesl at the same time */
-+
-+    s1 = s;
-+    s2 = s + p;
-+    s3 = s2 + p;
-+    s4 = s3 + p;
-+
-+    /* prefetch data for load */
-+    prefetch_load_lf(s + 2 * p);
-+
-+    /* load quad-byte vectors
-+     * memory is 4 byte aligned
-+     */
-+    p2  = *((uint32_t *)(s1 - 4));
-+    p6  = *((uint32_t *)(s1));
-+    p1  = *((uint32_t *)(s2 - 4));
-+    p5  = *((uint32_t *)(s2));
-+    p0  = *((uint32_t *)(s3 - 4));
-+    p4  = *((uint32_t *)(s3));
-+    pm1 = *((uint32_t *)(s4 - 4));
-+    p3  = *((uint32_t *)(s4));
-+
-+    /* transpose pm1, p0, p1, p2 */
-+    __asm__ __volatile__ (
-+        "precrq.qb.ph   %[prim1],   %[p2],      %[p1]       \n\t"
-+        "precr.qb.ph    %[prim2],   %[p2],      %[p1]       \n\t"
-+        "precrq.qb.ph   %[prim3],   %[p0],      %[pm1]      \n\t"
-+        "precr.qb.ph    %[prim4],   %[p0],      %[pm1]      \n\t"
-+
-+        "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-+        "precr.qb.ph    %[pm1],     %[prim1],   %[prim2]    \n\t"
-+        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+        "precrq.ph.w    %[p2],      %[p1],      %[sec3]     \n\t"
-+        "precrq.ph.w    %[p0],      %[pm1],     %[sec4]     \n\t"
-+        "append         %[p1],      %[sec3],    16          \n\t"
-+        "append         %[pm1],     %[sec4],    16          \n\t"
-+
-+        : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+          [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+          [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
-+          [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+        :
-+    );
-+
-+    /* transpose p3, p4, p5, p6 */
-+    __asm__ __volatile__ (
-+        "precrq.qb.ph   %[prim1],   %[p6],      %[p5]       \n\t"
-+        "precr.qb.ph    %[prim2],   %[p6],      %[p5]       \n\t"
-+        "precrq.qb.ph   %[prim3],   %[p4],      %[p3]       \n\t"
-+        "precr.qb.ph    %[prim4],   %[p4],      %[p3]       \n\t"
-+
-+        "precrq.qb.ph   %[p5],      %[prim1],   %[prim2]    \n\t"
-+        "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-+        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+        "precrq.ph.w    %[p6],      %[p5],      %[sec3]     \n\t"
-+        "precrq.ph.w    %[p4],      %[p3],      %[sec4]     \n\t"
-+        "append         %[p5],      %[sec3],    16          \n\t"
-+        "append         %[p3],      %[sec4],    16          \n\t"
-+
-+        : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+          [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+          [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+          [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+        :
-+    );
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
-+                                 thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
-+
-+            /* don't use transpose on output data
-+             * because memory isn't aligned
-+             */
-+            __asm__ __volatile__ (
-+                "sb         %[p5],  2(%[s4])        \n\t"
-+                "sb         %[p4],  1(%[s4])        \n\t"
-+                "sb         %[p3],  0(%[s4])        \n\t"
-+                "sb         %[p2], -1(%[s4])        \n\t"
-+                "sb         %[p1], -2(%[s4])        \n\t"
-+                "sb         %[p0], -3(%[s4])        \n\t"
-+                :
-+                : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
-+                  [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p5], %[p5], 8         \n\t"
-+                "srl        %[p4], %[p4], 8         \n\t"
-+                "srl        %[p3], %[p3], 8         \n\t"
-+                "srl        %[p2], %[p2], 8         \n\t"
-+                "srl        %[p1], %[p1], 8         \n\t"
-+                "srl        %[p0], %[p0], 8         \n\t"
-+                : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+                  [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p5],  2(%[s3])        \n\t"
-+                "sb         %[p4],  1(%[s3])        \n\t"
-+                "sb         %[p3],  0(%[s3])        \n\t"
-+                "sb         %[p2], -1(%[s3])        \n\t"
-+                "sb         %[p1], -2(%[s3])        \n\t"
-+                "sb         %[p0], -3(%[s3])        \n\t"
-+                :
-+                : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3),
-+                  [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p5], %[p5], 8         \n\t"
-+                "srl        %[p4], %[p4], 8         \n\t"
-+                "srl        %[p3], %[p3], 8         \n\t"
-+                "srl        %[p2], %[p2], 8         \n\t"
-+                "srl        %[p1], %[p1], 8         \n\t"
-+                "srl        %[p0], %[p0], 8         \n\t"
-+                : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+                  [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p5],  2(%[s2])        \n\t"
-+                "sb         %[p4],  1(%[s2])        \n\t"
-+                "sb         %[p3],  0(%[s2])        \n\t"
-+                "sb         %[p2], -1(%[s2])        \n\t"
-+                "sb         %[p1], -2(%[s2])        \n\t"
-+                "sb         %[p0], -3(%[s2])        \n\t"
-+                :
-+                : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
-+                  [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p5], %[p5], 8         \n\t"
-+                "srl        %[p4], %[p4], 8         \n\t"
-+                "srl        %[p3], %[p3], 8         \n\t"
-+                "srl        %[p2], %[p2], 8         \n\t"
-+                "srl        %[p1], %[p1], 8         \n\t"
-+                "srl        %[p0], %[p0], 8         \n\t"
-+                : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+                  [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p5],  2(%[s1])        \n\t"
-+                "sb         %[p4],  1(%[s1])        \n\t"
-+                "sb         %[p3],  0(%[s1])        \n\t"
-+                "sb         %[p2], -1(%[s1])        \n\t"
-+                "sb         %[p1], -2(%[s1])        \n\t"
-+                "sb         %[p0], -3(%[s1])        \n\t"
-+                :
-+                : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
-+                  [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+            );
-+        }
-+    }
-+
-+    s1 = s4 + p;
-+    s2 = s1 + p;
-+    s3 = s2 + p;
-+    s4 = s3 + p;
-+
-+    /* load quad-byte vectors
-+    * memory is 4 byte aligned
-+    */
-+    p2  = *((uint32_t *)(s1 - 4));
-+    p6  = *((uint32_t *)(s1));
-+    p1  = *((uint32_t *)(s2 - 4));
-+    p5  = *((uint32_t *)(s2));
-+    p0  = *((uint32_t *)(s3 - 4));
-+    p4  = *((uint32_t *)(s3));
-+    pm1 = *((uint32_t *)(s4 - 4));
-+    p3  = *((uint32_t *)(s4));
-+
-+    /* transpose pm1, p0, p1, p2 */
-+    __asm__ __volatile__ (
-+        "precrq.qb.ph   %[prim1],   %[p2],      %[p1]       \n\t"
-+        "precr.qb.ph    %[prim2],   %[p2],      %[p1]       \n\t"
-+        "precrq.qb.ph   %[prim3],   %[p0],      %[pm1]      \n\t"
-+        "precr.qb.ph    %[prim4],   %[p0],      %[pm1]      \n\t"
-+
-+        "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-+        "precr.qb.ph    %[pm1],     %[prim1],   %[prim2]    \n\t"
-+        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+        "precrq.ph.w    %[p2],      %[p1],      %[sec3]     \n\t"
-+        "precrq.ph.w    %[p0],      %[pm1],     %[sec4]     \n\t"
-+        "append         %[p1],      %[sec3],    16          \n\t"
-+        "append         %[pm1],     %[sec4],    16          \n\t"
-+
-+        : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+          [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+          [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1),
-+          [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+        :
-+    );
-+
-+    /* transpose p3, p4, p5, p6 */
-+    __asm__ __volatile__ (
-+        "precrq.qb.ph   %[prim1],   %[p6],      %[p5]       \n\t"
-+        "precr.qb.ph    %[prim2],   %[p6],      %[p5]       \n\t"
-+        "precrq.qb.ph   %[prim3],   %[p4],      %[p3]       \n\t"
-+        "precr.qb.ph    %[prim4],   %[p4],      %[p3]       \n\t"
-+
-+        "precrq.qb.ph   %[p5],      %[prim1],   %[prim2]    \n\t"
-+        "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-+        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-+        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-+
-+        "precrq.ph.w    %[p6],      %[p5],      %[sec3]     \n\t"
-+        "precrq.ph.w    %[p4],      %[p3],      %[sec4]     \n\t"
-+        "append         %[p5],      %[sec3],    16          \n\t"
-+        "append         %[p3],      %[sec4],    16          \n\t"
-+
-+        : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2),
-+          [prim3] "=&r" (prim3), [prim4] "=&r" (prim4),
-+          [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+          [sec3] "=&r" (sec3), [sec4] "=&r" (sec4)
-+        :
-+    );
-+
-+    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-+     * mask will be zero and filtering is not needed
-+     */
-+    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0)))
-+    {
-+
-+        vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask);
-+
-+        /* if mask == 0 do filtering is not needed */
-+        if (mask)
-+        {
-+            /* filtering */
-+            vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5);
-+
-+            /* don't use transpose on output data
-+             * because memory isn't aligned
-+             */
-+            __asm__ __volatile__ (
-+                "sb         %[p5],  2(%[s4])        \n\t"
-+                "sb         %[p4],  1(%[s4])        \n\t"
-+                "sb         %[p3],  0(%[s4])        \n\t"
-+                "sb         %[p2], -1(%[s4])        \n\t"
-+                "sb         %[p1], -2(%[s4])        \n\t"
-+                "sb         %[p0], -3(%[s4])        \n\t"
-+                :
-+                : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4),
-+                  [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p5], %[p5], 8         \n\t"
-+                "srl        %[p4], %[p4], 8         \n\t"
-+                "srl        %[p3], %[p3], 8         \n\t"
-+                "srl        %[p2], %[p2], 8         \n\t"
-+                "srl        %[p1], %[p1], 8         \n\t"
-+                "srl        %[p0], %[p0], 8         \n\t"
-+                : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+                  [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p5],  2(%[s3])        \n\t"
-+                "sb         %[p4],  1(%[s3])        \n\t"
-+                "sb         %[p3],  0(%[s3])        \n\t"
-+                "sb         %[p2], -1(%[s3])        \n\t"
-+                "sb         %[p1], -2(%[s3])        \n\t"
-+                "sb         %[p0], -3(%[s3])        \n\t"
-+                :
-+                : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3),
-+                  [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p5], %[p5], 8         \n\t"
-+                "srl        %[p4], %[p4], 8         \n\t"
-+                "srl        %[p3], %[p3], 8         \n\t"
-+                "srl        %[p2], %[p2], 8         \n\t"
-+                "srl        %[p1], %[p1], 8         \n\t"
-+                "srl        %[p0], %[p0], 8         \n\t"
-+                : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+                  [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p5],  2(%[s2])        \n\t"
-+                "sb         %[p4],  1(%[s2])        \n\t"
-+                "sb         %[p3],  0(%[s2])        \n\t"
-+                "sb         %[p2], -1(%[s2])        \n\t"
-+                "sb         %[p1], -2(%[s2])        \n\t"
-+                "sb         %[p0], -3(%[s2])        \n\t"
-+                :
-+                : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2),
-+                  [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+            );
-+
-+            __asm__ __volatile__ (
-+                "srl        %[p5], %[p5], 8         \n\t"
-+                "srl        %[p4], %[p4], 8         \n\t"
-+                "srl        %[p3], %[p3], 8         \n\t"
-+                "srl        %[p2], %[p2], 8         \n\t"
-+                "srl        %[p1], %[p1], 8         \n\t"
-+                "srl        %[p0], %[p0], 8         \n\t"
-+                : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3),
-+                  [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0)
-+                :
-+            );
-+
-+            __asm__ __volatile__ (
-+                "sb         %[p5],  2(%[s1])        \n\t"
-+                "sb         %[p4],  1(%[s1])        \n\t"
-+                "sb         %[p3],  0(%[s1])        \n\t"
-+                "sb         %[p2], -1(%[s1])        \n\t"
-+                "sb         %[p1], -2(%[s1])        \n\t"
-+                "sb         %[p0], -3(%[s1])        \n\t"
-+                :
-+                : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1),
-+                  [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0)
-+            );
-+        }
-+    }
-+}
-+
-+/* Horizontal MB filtering */
-+void vp8_loop_filter_mbh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
-+                               int y_stride, int uv_stride, loop_filter_info *lfi)
-+{
-+    unsigned int thresh_vec, flimit_vec, limit_vec;
-+    unsigned char thresh, flimit, limit, flimit_temp;
-+
-+    /* use direct value instead pointers */
-+    limit = *(lfi->lim);
-+    flimit_temp = *(lfi->mblim);
-+    thresh = *(lfi->hev_thr);
-+    flimit = flimit_temp;
-+
-+    /* create quad-byte */
-+    __asm__ __volatile__ (
-+        "replv.qb       %[thresh_vec], %[thresh]    \n\t"
-+        "replv.qb       %[flimit_vec], %[flimit]    \n\t"
-+        "replv.qb       %[limit_vec],  %[limit]     \n\t"
-+        : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec)
-+        : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit)
-+    );
-+
-+    vp8_mbloop_filter_horizontal_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
-+
-+    if (u_ptr)
-+    {
-+        vp8_mbloop_filter_uvhorizontal_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
-+    }
-+
-+    if (v_ptr)
-+    {
-+        vp8_mbloop_filter_uvhorizontal_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
-+    }
-+}
-+
-+
-+/* Vertical MB Filtering */
-+void vp8_loop_filter_mbv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
-+                               int y_stride, int uv_stride, loop_filter_info *lfi)
-+{
-+    unsigned int thresh_vec, flimit_vec, limit_vec;
-+    unsigned char thresh, flimit, limit, flimit_temp;
-+
-+    /* use direct value instead pointers */
-+    limit = *(lfi->lim);
-+    flimit_temp = *(lfi->mblim);
-+    thresh = *(lfi->hev_thr);
-+    flimit = flimit_temp;
-+
-+    /* create quad-byte */
-+    __asm__ __volatile__ (
-+        "replv.qb       %[thresh_vec], %[thresh]    \n\t"
-+        "replv.qb       %[flimit_vec], %[flimit]    \n\t"
-+        "replv.qb       %[limit_vec],  %[limit]     \n\t"
-+        : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec)
-+        : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit)
-+    );
-+
-+    vp8_mbloop_filter_vertical_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
-+
-+    if (u_ptr)
-+        vp8_mbloop_filter_uvvertical_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
-+
-+    if (v_ptr)
-+        vp8_mbloop_filter_uvvertical_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
-+}
-+
-+
-+/* Horizontal B Filtering */
-+void vp8_loop_filter_bh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
-+                              int y_stride, int uv_stride, loop_filter_info *lfi)
-+{
-+    unsigned int thresh_vec, flimit_vec, limit_vec;
-+    unsigned char thresh, flimit, limit, flimit_temp;
-+
-+    /* use direct value instead pointers */
-+    limit = *(lfi->lim);
-+    flimit_temp = *(lfi->blim);
-+    thresh = *(lfi->hev_thr);
-+    flimit = flimit_temp;
-+
-+    /* create quad-byte */
-+    __asm__ __volatile__ (
-+        "replv.qb       %[thresh_vec], %[thresh]    \n\t"
-+        "replv.qb       %[flimit_vec], %[flimit]    \n\t"
-+        "replv.qb       %[limit_vec],  %[limit]     \n\t"
-+        : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec)
-+        : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit)
-+    );
-+
-+    vp8_loop_filter_horizontal_edge_mips(y_ptr + 4 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
-+    vp8_loop_filter_horizontal_edge_mips(y_ptr + 8 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
-+    vp8_loop_filter_horizontal_edge_mips(y_ptr + 12 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
-+
-+    if (u_ptr)
-+        vp8_loop_filter_uvhorizontal_edge_mips(u_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
-+
-+    if (v_ptr)
-+        vp8_loop_filter_uvhorizontal_edge_mips(v_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
-+}
-+
-+
-+/* Vertical B Filtering */
-+void vp8_loop_filter_bv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
-+                              int y_stride, int uv_stride, loop_filter_info *lfi)
-+{
-+    unsigned int thresh_vec, flimit_vec, limit_vec;
-+    unsigned char thresh, flimit, limit, flimit_temp;
-+
-+    /* use direct value instead pointers */
-+    limit = *(lfi->lim);
-+    flimit_temp = *(lfi->blim);
-+    thresh = *(lfi->hev_thr);
-+    flimit = flimit_temp;
-+
-+    /* create quad-byte */
-+    __asm__ __volatile__ (
-+        "replv.qb       %[thresh_vec], %[thresh]    \n\t"
-+        "replv.qb       %[flimit_vec], %[flimit]    \n\t"
-+        "replv.qb       %[limit_vec],  %[limit]     \n\t"
-+        : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec)
-+        : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit)
-+    );
-+
-+    vp8_loop_filter_vertical_edge_mips(y_ptr + 4, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
-+    vp8_loop_filter_vertical_edge_mips(y_ptr + 8, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
-+    vp8_loop_filter_vertical_edge_mips(y_ptr + 12, y_stride, flimit_vec, limit_vec, thresh_vec, 16);
-+
-+    if (u_ptr)
-+        vp8_loop_filter_uvvertical_edge_mips(u_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
-+
-+    if (v_ptr)
-+        vp8_loop_filter_uvvertical_edge_mips(v_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0);
-+}
-+
-+#endif
-diff --git a/vp8/common/mips/dspr2/reconinter_dspr2.c b/vp8/common/mips/dspr2/reconinter_dspr2.c
-new file mode 100644
-index 0000000..a5239a3
---- /dev/null
-+++ b/vp8/common/mips/dspr2/reconinter_dspr2.c
-@@ -0,0 +1,121 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+
-+#include "vpx_config.h"
-+#include "vpx_rtcd.h"
-+#include "vpx/vpx_integer.h"
-+
-+#if HAVE_DSPR2
-+inline void prefetch_load_int(unsigned char *src)
-+{
-+    __asm__ __volatile__ (
-+        "pref   0,  0(%[src])   \n\t"
-+        :
-+        : [src] "r" (src)
-+    );
-+}
-+
-+
-+__inline void vp8_copy_mem16x16_dspr2(
-+    unsigned char *RESTRICT src,
-+    int src_stride,
-+    unsigned char *RESTRICT dst,
-+    int dst_stride)
-+{
-+    int r;
-+    unsigned int a0, a1, a2, a3;
-+
-+    for (r = 16; r--;)
-+    {
-+        /* load src data in cache memory */
-+        prefetch_load_int(src + src_stride);
-+
-+        /* use unaligned memory load and store */
-+        __asm__ __volatile__ (
-+            "ulw    %[a0], 0(%[src])            \n\t"
-+            "ulw    %[a1], 4(%[src])            \n\t"
-+            "ulw    %[a2], 8(%[src])            \n\t"
-+            "ulw    %[a3], 12(%[src])           \n\t"
-+            "sw     %[a0], 0(%[dst])            \n\t"
-+            "sw     %[a1], 4(%[dst])            \n\t"
-+            "sw     %[a2], 8(%[dst])            \n\t"
-+            "sw     %[a3], 12(%[dst])           \n\t"
-+            : [a0] "=&r" (a0), [a1] "=&r" (a1),
-+              [a2] "=&r" (a2), [a3] "=&r" (a3)
-+            : [src] "r" (src), [dst] "r" (dst)
-+        );
-+
-+        src += src_stride;
-+        dst += dst_stride;
-+    }
-+}
-+
-+
-+__inline void vp8_copy_mem8x8_dspr2(
-+    unsigned char *RESTRICT src,
-+    int src_stride,
-+    unsigned char *RESTRICT dst,
-+    int dst_stride)
-+{
-+    int r;
-+    unsigned int a0, a1;
-+
-+    /* load src data in cache memory */
-+    prefetch_load_int(src + src_stride);
-+
-+    for (r = 8; r--;)
-+    {
-+        /* use unaligned memory load and store */
-+        __asm__ __volatile__ (
-+            "ulw    %[a0], 0(%[src])            \n\t"
-+            "ulw    %[a1], 4(%[src])            \n\t"
-+            "sw     %[a0], 0(%[dst])            \n\t"
-+            "sw     %[a1], 4(%[dst])            \n\t"
-+            : [a0] "=&r" (a0), [a1] "=&r" (a1)
-+            : [src] "r" (src), [dst] "r" (dst)
-+        );
-+
-+        src += src_stride;
-+        dst += dst_stride;
-+    }
-+}
-+
-+
-+__inline void vp8_copy_mem8x4_dspr2(
-+    unsigned char *RESTRICT src,
-+    int src_stride,
-+    unsigned char *RESTRICT dst,
-+    int dst_stride)
-+{
-+    int r;
-+    unsigned int a0, a1;
-+
-+    /* load src data in cache memory */
-+    prefetch_load_int(src + src_stride);
-+
-+    for (r = 4; r--;)
-+    {
-+        /* use unaligned memory load and store */
-+        __asm__ __volatile__ (
-+            "ulw    %[a0], 0(%[src])            \n\t"
-+            "ulw    %[a1], 4(%[src])            \n\t"
-+            "sw     %[a0], 0(%[dst])            \n\t"
-+            "sw     %[a1], 4(%[dst])            \n\t"
-+           : [a0] "=&r" (a0), [a1] "=&r" (a1)
-+           : [src] "r" (src), [dst] "r" (dst)
-+        );
-+
-+        src += src_stride;
-+        dst += dst_stride;
-+    }
-+}
-+
-+#endif
-diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
-index 2e282f6..766b4ea 100644
---- a/vp8/common/onyx.h
-+++ b/vp8/common/onyx.h
-@@ -39,14 +39,6 @@ extern "C"
- 
-     typedef enum
-     {
--        VP8_LAST_FLAG = 1,
--        VP8_GOLD_FLAG = 2,
--        VP8_ALT_FLAG = 4
--    } VP8_REFFRAME;
--
--
--    typedef enum
--    {
-         USAGE_STREAM_FROM_SERVER    = 0x0,
-         USAGE_LOCAL_FILE_PLAYBACK   = 0x1,
-         USAGE_CONSTRAINED_QUALITY   = 0x2
-@@ -102,83 +94,101 @@ extern "C"
- 
-     typedef struct
-     {
--        int Version;            // 4 versions of bitstream defined 0 best quality/slowest decode, 3 lowest quality/fastest decode
--        int Width;              // width of data passed to the compressor
--        int Height;             // height of data passed to the compressor
-+        /* 4 versions of bitstream defined:
-+         *   0 best quality/slowest decode, 3 lowest quality/fastest decode
-+         */
-+        int Version;
-+        int Width;
-+        int Height;
-         struct vpx_rational  timebase;
--        int target_bandwidth;    // bandwidth to be used in kilobits per second
-+        unsigned int target_bandwidth;    /* kilobits per second */
-+
-+        /* parameter used for applying pre processing blur: recommendation 0 */
-+        int noise_sensitivity;
- 
--        int noise_sensitivity;   // parameter used for applying pre processing blur: recommendation 0
--        int Sharpness;          // parameter used for sharpening output: recommendation 0:
-+        /* parameter used for sharpening output: recommendation 0: */
-+        int Sharpness;
-         int cpu_used;
-         unsigned int rc_max_intra_bitrate_pct;
- 
--        // mode ->
--        //(0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing
--        //    a television signal or feed from a live camera). ( speed setting controls how fast )
--        //(1)=Good Quality Fast Encoding. The encoder balances quality with the amount of time it takes to
--        //    encode the output. ( speed setting controls how fast )
--        //(2)=One Pass - Best Quality. The encoder places priority on the quality of the output over encoding
--        //    speed. The output is compressed at the highest possible quality. This option takes the longest
--        //    amount of time to encode. ( speed setting ignored )
--        //(3)=Two Pass - First Pass. The encoder generates a file of statistics for use in the second encoding
--        //    pass. ( speed setting controls how fast )
--        //(4)=Two Pass - Second Pass. The encoder uses the statistics that were generated in the first encoding
--        //    pass to create the compressed output. ( speed setting controls how fast )
--        //(5)=Two Pass - Second Pass Best.  The encoder uses the statistics that were generated in the first
--        //    encoding pass to create the compressed output using the highest possible quality, and taking a
--        //    longer amount of time to encode.. ( speed setting ignored )
--        int Mode;               //
--
--        // Key Framing Operations
--        int auto_key;            // automatically detect cut scenes and set the keyframes
--        int key_freq;            // maximum distance to key frame.
--
--        int allow_lag;           // allow lagged compression (if 0 lagin frames is ignored)
--        int lag_in_frames;        // how many frames lag before we start encoding
--
--        //----------------------------------------------------------------
--        // DATARATE CONTROL OPTIONS
--
--        int end_usage; // vbr or cbr
--
--        // buffer targeting aggressiveness
-+        /* mode ->
-+         *(0)=Realtime/Live Encoding. This mode is optimized for realtim
-+         *    encoding (for example, capturing a television signal or feed
-+         *    from a live camera). ( speed setting controls how fast )
-+         *(1)=Good Quality Fast Encoding. The encoder balances quality with
-+         *    the amount of time it takes to encode the output. ( speed
-+         *    setting controls how fast )
-+         *(2)=One Pass - Best Quality. The encoder places priority on the
-+         *    quality of the output over encoding speed. The output is
-+         *    compressed at the highest possible quality. This option takes
-+         *    the longest amount of time to encode. ( speed setting ignored
-+         *    )
-+         *(3)=Two Pass - First Pass. The encoder generates a file of
-+         *    statistics for use in the second encoding pass. ( speed
-+         *    setting controls how fast )
-+         *(4)=Two Pass - Second Pass. The encoder uses the statistics that
-+         *    were generated in the first encoding pass to create the
-+         *    compressed output. ( speed setting controls how fast )
-+         *(5)=Two Pass - Second Pass Best.  The encoder uses the statistics
-+         *    that were generated in the first encoding pass to create the
-+         *    compressed output using the highest possible quality, and
-+         *    taking a longer amount of time to encode.. ( speed setting
-+         *    ignored )
-+         */
-+        int Mode;
-+
-+        /* Key Framing Operations */
-+        int auto_key;       /* automatically detect cut scenes */
-+        int key_freq;       /* maximum distance to key frame. */
-+
-+        /* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */
-+        int allow_lag;
-+        int lag_in_frames; /* how many frames lag before we start encoding */
-+
-+        /*
-+         * DATARATE CONTROL OPTIONS
-+         */
-+
-+        int end_usage; /* vbr or cbr */
-+
-+        /* buffer targeting aggressiveness */
-         int under_shoot_pct;
-         int over_shoot_pct;
- 
--        // buffering parameters
--        int64_t starting_buffer_level;  // in bytes
-+        /* buffering parameters */
-+        int64_t starting_buffer_level;
-         int64_t optimal_buffer_level;
-         int64_t maximum_buffer_size;
- 
--        int64_t starting_buffer_level_in_ms;  // in milli-seconds
-+        int64_t starting_buffer_level_in_ms;
-         int64_t optimal_buffer_level_in_ms;
-         int64_t maximum_buffer_size_in_ms;
- 
--        // controlling quality
-+        /* controlling quality */
-         int fixed_q;
-         int worst_allowed_q;
-         int best_allowed_q;
-         int cq_level;
- 
--        // allow internal resizing ( currently disabled in the build !!!!!)
-+        /* allow internal resizing */
-         int allow_spatial_resampling;
-         int resample_down_water_mark;
-         int resample_up_water_mark;
- 
--        // allow internal frame rate alterations
-+        /* allow internal frame rate alterations */
-         int allow_df;
-         int drop_frames_water_mark;
- 
--        // two pass datarate control
--        int two_pass_vbrbias;        // two pass datarate control tweaks
-+        /* two pass datarate control */
-+        int two_pass_vbrbias;
-         int two_pass_vbrmin_section;
-         int two_pass_vbrmax_section;
--        // END DATARATE CONTROL OPTIONS
--        //----------------------------------------------------------------
- 
-+        /*
-+         * END DATARATE CONTROL OPTIONS
-+         */
- 
--        // these parameters aren't to be used in final build don't use!!!
-+        /* these parameters aren't to be used in final build don't use!!! */
-         int play_alternate;
-         int alt_freq;
-         int alt_q;
-@@ -186,26 +196,28 @@ extern "C"
-         int gold_q;
- 
- 
--        int multi_threaded;   // how many threads to run the encoder on
--        int token_partitions; // how many token partitions to create for multi core decoding
--        int encode_breakout;  // early breakout encode threshold : for video conf recommend 800
-+        int multi_threaded;   /* how many threads to run the encoder on */
-+        int token_partitions; /* how many token partitions to create */
-+
-+        /* early breakout threshold: for video conf recommend 800 */
-+        int encode_breakout;
- 
--        unsigned int error_resilient_mode; // Bitfield defining the error
--                                   // resiliency features to enable. Can provide
--                                   // decodable frames after losses in previous
--                                   // frames and decodable partitions after
--                                   // losses in the same frame.
-+        /* Bitfield defining the error resiliency features to enable.
-+         * Can provide decodable frames after losses in previous
-+         * frames and decodable partitions after losses in the same frame.
-+         */
-+        unsigned int error_resilient_mode;
- 
-         int arnr_max_frames;
--        int arnr_strength ;
--        int arnr_type     ;
-+        int arnr_strength;
-+        int arnr_type;
- 
--        struct vpx_fixed_buf         two_pass_stats_in;
-+        struct vpx_fixed_buf        two_pass_stats_in;
-         struct vpx_codec_pkt_list  *output_pkt_list;
- 
-         vp8e_tuning tuning;
- 
--        // Temporal scaling parameters
-+        /* Temporal scaling parameters */
-         unsigned int number_of_layers;
-         unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY];
-         unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY];
-@@ -236,16 +248,14 @@ extern "C"
-     void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
-     void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
- 
--// receive a frames worth of data caller can assume that a copy of this frame is made
--// and not just a copy of the pointer..
-     int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
-     int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush);
-     int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
- 
-     int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags);
-     int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags);
--    int vp8_get_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
--    int vp8_set_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
-+    int vp8_get_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
-+    int vp8_set_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
-     int vp8_update_entropy(struct VP8_COMP* comp, int update);
-     int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]);
-     int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols);
-diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
-index c3215c0..5325bac 100644
---- a/vp8/common/onyxc_int.h
-+++ b/vp8/common/onyxc_int.h
-@@ -42,7 +42,6 @@ typedef struct frame_contexts
-     vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
-     vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-     MV_CONTEXT mvc[2];
--    MV_CONTEXT pre_mvc[2];  /* not to caculate the mvcost for the frame if mvc doesn't change. */
- } FRAME_CONTEXT;
- 
- typedef enum
-@@ -59,12 +58,6 @@ typedef enum
-     RECON_CLAMP_NOTREQUIRED     = 1
- } CLAMP_TYPE;
- 
--typedef enum
--{
--    SIXTAP   = 0,
--    BILINEAR = 1
--} INTERPOLATIONFILTERTYPE;
--
- typedef struct VP8Common
- 
- {
-@@ -94,6 +87,7 @@ typedef struct VP8Common
-     YV12_BUFFER_CONFIG post_proc_buffer;
-     YV12_BUFFER_CONFIG post_proc_buffer_int;
-     int post_proc_buffer_int_used;
-+    unsigned char *pp_limits_buffer;   /* post-processing filter coefficients */
- #endif
- 
-     FRAME_TYPE last_frame_type;  /* Save last frame's frame type for motion search. */
-@@ -114,7 +108,6 @@ typedef struct VP8Common
-     int full_pixel;
- 
-     int base_qindex;
--    int last_kf_gf_q;  /* Q used on the last GF or KF */
- 
-     int y1dc_delta_q;
-     int y2dc_delta_q;
-@@ -130,11 +123,11 @@ typedef struct VP8Common
- 
-     MODE_INFO *mip; /* Base of allocated array */
-     MODE_INFO *mi;  /* Corresponds to upper left visible macroblock */
-+#if CONFIG_ERROR_CONCEALMENT
-     MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
-     MODE_INFO *prev_mi;  /* 'mi' from last frame (points into prev_mip) */
-+#endif
- 
--
--    INTERPOLATIONFILTERTYPE mcomp_filter_type;
-     LOOPFILTERTYPE filter_type;
- 
-     loop_filter_info_n lf_info;
-@@ -158,14 +151,6 @@ typedef struct VP8Common
-     ENTROPY_CONTEXT_PLANES *above_context;   /* row of context for each plane */
-     ENTROPY_CONTEXT_PLANES left_context;  /* (up to) 4 contexts "" */
- 
--
--    /* keyframe block modes are predicted by their above, left neighbors */
--
--    vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1];
--    vp8_prob kf_ymode_prob [VP8_YMODES-1];  /* keyframe "" */
--    vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1];
--
--
-     FRAME_CONTEXT lfc; /* last frame entropy */
-     FRAME_CONTEXT fc;  /* this frame entropy */
- 
-diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h
-index 35a8b6e..fd7e051 100644
---- a/vp8/common/onyxd.h
-+++ b/vp8/common/onyxd.h
-@@ -22,6 +22,7 @@ extern "C"
- #include "ppflags.h"
- #include "vpx_ports/mem.h"
- #include "vpx/vpx_codec.h"
-+#include "vpx/vp8.h"
- 
-     struct VP8D_COMP;
- 
-@@ -35,12 +36,6 @@ extern "C"
-         int     error_concealment;
-         int     input_fragments;
-     } VP8D_CONFIG;
--    typedef enum
--    {
--        VP8_LAST_FLAG = 1,
--        VP8_GOLD_FLAG = 2,
--        VP8_ALT_FLAG = 4
--    } VP8_REFFRAME;
- 
-     typedef enum
-     {
-@@ -53,11 +48,13 @@ extern "C"
- 
-     int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst);
- 
--    int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, unsigned long size, const unsigned char *dest, int64_t time_stamp);
-+    int vp8dx_receive_compressed_data(struct VP8D_COMP* comp,
-+                                      size_t size, const uint8_t *dest,
-+                                      int64_t time_stamp);
-     int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
- 
--    vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
--    vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
-+    vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
-+    vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
- 
-     struct VP8D_COMP* vp8dx_create_decompressor(VP8D_CONFIG *oxcf);
- 
-diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
-index ccf6ad7..80fa530 100644
---- a/vp8/common/postproc.c
-+++ b/vp8/common/postproc.c
-@@ -127,27 +127,24 @@ extern void vp8_blit_text(const char *msg, unsigned char *address, const int pit
- extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch);
- /***********************************************************************************************************
-  */
--void vp8_post_proc_down_and_across_c
-+void vp8_post_proc_down_and_across_mb_row_c
- (
-     unsigned char *src_ptr,
-     unsigned char *dst_ptr,
-     int src_pixels_per_line,
-     int dst_pixels_per_line,
--    int rows,
-     int cols,
--    int flimit
-+    unsigned char *f,
-+    int size
- )
- {
-     unsigned char *p_src, *p_dst;
-     int row;
-     int col;
--    int i;
--    int v;
--    int pitch = src_pixels_per_line;
--    unsigned char d[8];
--    (void)dst_pixels_per_line;
-+    unsigned char v;
-+    unsigned char d[4];
- 
--    for (row = 0; row < rows; row++)
-+    for (row = 0; row < size; row++)
-     {
-         /* post_proc_down for one row */
-         p_src = src_ptr;
-@@ -155,20 +152,23 @@ void vp8_post_proc_down_and_across_c
- 
-         for (col = 0; col < cols; col++)
-         {
-+            unsigned char p_above2 = p_src[col - 2 * src_pixels_per_line];
-+            unsigned char p_above1 = p_src[col - src_pixels_per_line];
-+            unsigned char p_below1 = p_src[col + src_pixels_per_line];
-+            unsigned char p_below2 = p_src[col + 2 * src_pixels_per_line];
- 
--            int kernel = 4;
--            int v = p_src[col];
-+            v = p_src[col];
- 
--            for (i = -2; i <= 2; i++)
-+            if ((abs(v - p_above2) < f[col]) && (abs(v - p_above1) < f[col])
-+                && (abs(v - p_below1) < f[col]) && (abs(v - p_below2) < f[col]))
-             {
--                if (abs(v - p_src[col+i*pitch]) > flimit)
--                    goto down_skip_convolve;
--
--                kernel += kernel5[2+i] * p_src[col+i*pitch];
-+                unsigned char k1, k2, k3;
-+                k1 = (p_above2 + p_above1 + 1) >> 1;
-+                k2 = (p_below2 + p_below1 + 1) >> 1;
-+                k3 = (k1 + k2 + 1) >> 1;
-+                v = (k3 + v + 1) >> 1;
-             }
- 
--            v = (kernel >> 3);
--        down_skip_convolve:
-             p_dst[col] = v;
-         }
- 
-@@ -176,45 +176,38 @@ void vp8_post_proc_down_and_across_c
-         p_src = dst_ptr;
-         p_dst = dst_ptr;
- 
--        for (i = -8; i<0; i++)
--          p_src[i]=p_src[0];
--
--        for (i = cols; i<cols+8; i++)
--          p_src[i]=p_src[cols-1];
--
--        for (i = 0; i < 8; i++)
--            d[i] = p_src[i];
-+        p_src[-2] = p_src[-1] = p_src[0];
-+        p_src[cols] = p_src[cols + 1] = p_src[cols - 1];
- 
-         for (col = 0; col < cols; col++)
-         {
--            int kernel = 4;
-             v = p_src[col];
- 
--            d[col&7] = v;
--
--            for (i = -2; i <= 2; i++)
-+            if ((abs(v - p_src[col - 2]) < f[col])
-+                && (abs(v - p_src[col - 1]) < f[col])
-+                && (abs(v - p_src[col + 1]) < f[col])
-+                && (abs(v - p_src[col + 2]) < f[col]))
-             {
--                if (abs(v - p_src[col+i]) > flimit)
--                    goto across_skip_convolve;
--
--                kernel += kernel5[2+i] * p_src[col+i];
-+                unsigned char k1, k2, k3;
-+                k1 = (p_src[col - 2] + p_src[col - 1] + 1) >> 1;
-+                k2 = (p_src[col + 2] + p_src[col + 1] + 1) >> 1;
-+                k3 = (k1 + k2 + 1) >> 1;
-+                v = (k3 + v + 1) >> 1;
-             }
- 
--            d[col&7] = (kernel >> 3);
--        across_skip_convolve:
-+            d[col & 3] = v;
- 
-             if (col >= 2)
--                p_dst[col-2] = d[(col-2)&7];
-+                p_dst[col - 2] = d[(col - 2) & 3];
-         }
- 
-         /* handle the last two pixels */
--        p_dst[col-2] = d[(col-2)&7];
--        p_dst[col-1] = d[(col-1)&7];
--
-+        p_dst[col - 2] = d[(col - 2) & 3];
-+        p_dst[col - 1] = d[(col - 1) & 3];
- 
-         /* next row */
--        src_ptr += pitch;
--        dst_ptr += pitch;
-+        src_ptr += src_pixels_per_line;
-+        dst_ptr += dst_pixels_per_line;
-     }
- }
- 
-@@ -240,8 +233,9 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co
-         for (i = -8; i<0; i++)
-           s[i]=s[0];
- 
--        // 17 avoids valgrind warning - we buffer values in c in d
--        // and only write them when we've read 8 ahead...
-+        /* 17 avoids valgrind warning - we buffer values in c in d
-+         * and only write them when we've read 8 ahead...
-+         */
-         for (i = cols; i<cols+17; i++)
-           s[i]=s[cols-1];
- 
-@@ -275,9 +269,6 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co
- }
- 
- 
--
--
--
- void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, int flimit)
- {
-     int r, c, i;
-@@ -294,8 +285,9 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i
-         for (i = -8; i < 0; i++)
-           s[i*pitch]=s[0];
- 
--        // 17 avoids valgrind warning - we buffer values in c in d
--        // and only write them when we've read 8 ahead...
-+        /* 17 avoids valgrind warning - we buffer values in c in d
-+         * and only write them when we've read 8 ahead...
-+         */
-         for (i = rows; i < rows+17; i++)
-           s[i*pitch]=s[(rows-1)*pitch];
- 
-@@ -322,28 +314,17 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i
-     }
- }
- 
--
--static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG         *source,
--        YV12_BUFFER_CONFIG         *post,
--        int                         q,
--        int                         low_var_thresh,
--        int                         flag)
-+static void vp8_de_mblock(YV12_BUFFER_CONFIG         *post,
-+                          int                         q)
- {
--    double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
--    int ppl = (int)(level + .5);
--    (void) low_var_thresh;
--    (void) flag;
--
--    vp8_post_proc_down_and_across(source->y_buffer, post->y_buffer, source->y_stride,  post->y_stride, source->y_height, source->y_width,  ppl);
--    vp8_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
--    vp8_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
--
--    vp8_post_proc_down_and_across(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
--    vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
--
-+    vp8_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
-+                              post->y_width, q2mbl(q));
-+    vp8_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
-+                         post->y_width, q2mbl(q));
- }
- 
--void vp8_deblock(YV12_BUFFER_CONFIG         *source,
-+void vp8_deblock(VP8_COMMON                 *cm,
-+                 YV12_BUFFER_CONFIG         *source,
-                  YV12_BUFFER_CONFIG         *post,
-                  int                         q,
-                  int                         low_var_thresh,
-@@ -351,16 +332,64 @@ void vp8_deblock(YV12_BUFFER_CONFIG         *source,
- {
-     double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
-     int ppl = (int)(level + .5);
-+
-+    const MODE_INFO *mode_info_context = cm->mi;
-+    int mbr, mbc;
-+
-+    /* The pixel thresholds are adjusted according to if or not the macroblock
-+     * is a skipped block.  */
-+    unsigned char *ylimits = cm->pp_limits_buffer;
-+    unsigned char *uvlimits = cm->pp_limits_buffer + 16 * cm->mb_cols;
-     (void) low_var_thresh;
-     (void) flag;
- 
--    vp8_post_proc_down_and_across(source->y_buffer, post->y_buffer, source->y_stride,  post->y_stride, source->y_height, source->y_width,   ppl);
--    vp8_post_proc_down_and_across(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride,  source->uv_height, source->uv_width, ppl);
--    vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
-+    if (ppl > 0)
-+    {
-+        for (mbr = 0; mbr < cm->mb_rows; mbr++)
-+        {
-+            unsigned char *ylptr = ylimits;
-+            unsigned char *uvlptr = uvlimits;
-+            for (mbc = 0; mbc < cm->mb_cols; mbc++)
-+            {
-+                unsigned char mb_ppl;
-+
-+                if (mode_info_context->mbmi.mb_skip_coeff)
-+                    mb_ppl = (unsigned char)ppl >> 1;
-+                else
-+                    mb_ppl = (unsigned char)ppl;
-+
-+                vpx_memset(ylptr, mb_ppl, 16);
-+                vpx_memset(uvlptr, mb_ppl, 8);
-+
-+                ylptr += 16;
-+                uvlptr += 8;
-+                mode_info_context++;
-+            }
-+            mode_info_context++;
-+
-+            vp8_post_proc_down_and_across_mb_row(
-+                source->y_buffer + 16 * mbr * source->y_stride,
-+                post->y_buffer + 16 * mbr * post->y_stride, source->y_stride,
-+                post->y_stride, source->y_width, ylimits, 16);
-+
-+            vp8_post_proc_down_and_across_mb_row(
-+                source->u_buffer + 8 * mbr * source->uv_stride,
-+                post->u_buffer + 8 * mbr * post->uv_stride, source->uv_stride,
-+                post->uv_stride, source->uv_width, uvlimits, 8);
-+            vp8_post_proc_down_and_across_mb_row(
-+                source->v_buffer + 8 * mbr * source->uv_stride,
-+                post->v_buffer + 8 * mbr * post->uv_stride, source->uv_stride,
-+                post->uv_stride, source->uv_width, uvlimits, 8);
-+        }
-+    } else
-+    {
-+        vp8_yv12_copy_frame(source, post);
-+    }
- }
- 
- #if !(CONFIG_TEMPORAL_DENOISING)
--void vp8_de_noise(YV12_BUFFER_CONFIG         *source,
-+void vp8_de_noise(VP8_COMMON                 *cm,
-+                  YV12_BUFFER_CONFIG         *source,
-                   YV12_BUFFER_CONFIG         *post,
-                   int                         q,
-                   int                         low_var_thresh,
-@@ -368,33 +397,33 @@ void vp8_de_noise(YV12_BUFFER_CONFIG         *source,
- {
-     double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
-     int ppl = (int)(level + .5);
-+    int mb_rows = source->y_width >> 4;
-+    int mb_cols = source->y_height >> 4;
-+    unsigned char *limits = cm->pp_limits_buffer;;
-+    int mbr, mbc;
-     (void) post;
-     (void) low_var_thresh;
-     (void) flag;
- 
--    vp8_post_proc_down_and_across(
--        source->y_buffer + 2 * source->y_stride + 2,
--        source->y_buffer + 2 * source->y_stride + 2,
--        source->y_stride,
--        source->y_stride,
--        source->y_height - 4,
--        source->y_width - 4,
--        ppl);
--    vp8_post_proc_down_and_across(
--        source->u_buffer + 2 * source->uv_stride + 2,
--        source->u_buffer + 2 * source->uv_stride + 2,
--        source->uv_stride,
--        source->uv_stride,
--        source->uv_height - 4,
--        source->uv_width - 4, ppl);
--    vp8_post_proc_down_and_across(
--        source->v_buffer + 2 * source->uv_stride + 2,
--        source->v_buffer + 2 * source->uv_stride + 2,
--        source->uv_stride,
--        source->uv_stride,
--        source->uv_height - 4,
--        source->uv_width - 4, ppl);
-+    vpx_memset(limits, (unsigned char)ppl, 16 * mb_cols);
- 
-+    /* TODO: The original code don't filter the 2 outer rows and columns. */
-+    for (mbr = 0; mbr < mb_rows; mbr++)
-+    {
-+        vp8_post_proc_down_and_across_mb_row(
-+            source->y_buffer + 16 * mbr * source->y_stride,
-+            source->y_buffer + 16 * mbr * source->y_stride,
-+            source->y_stride, source->y_stride, source->y_width, limits, 16);
-+
-+        vp8_post_proc_down_and_across_mb_row(
-+            source->u_buffer + 8 * mbr * source->uv_stride,
-+            source->u_buffer + 8 * mbr * source->uv_stride,
-+            source->uv_stride, source->uv_stride, source->uv_width, limits, 8);
-+        vp8_post_proc_down_and_across_mb_row(
-+            source->v_buffer + 8 * mbr * source->uv_stride,
-+            source->v_buffer + 8 * mbr * source->uv_stride,
-+            source->uv_stride, source->uv_stride, source->uv_width, limits, 8);
-+    }
- }
- #endif
- 
-@@ -441,7 +470,7 @@ static void fillrd(struct postproc_state *state, int q, int a)
- 
-         }
- 
--        for (next = next; next < 256; next++)
-+        for (; next < 256; next++)
-             char_dist[next] = 0;
- 
-     }
-@@ -731,21 +760,21 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
- 
-             oci->post_proc_buffer_int_used = 1;
- 
--            // insure that postproc is set to all 0's so that post proc
--            // doesn't pull random data in from edge
-+            /* insure that postproc is set to all 0's so that post proc
-+             * doesn't pull random data in from edge
-+             */
-             vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
- 
-         }
-     }
- 
--#if ARCH_X86||ARCH_X86_64
--    vpx_reset_mmx_state();
--#endif
-+    vp8_clear_system_state();
- 
-     if ((flags & VP8D_MFQE) &&
-          oci->postproc_state.last_frame_valid &&
-          oci->current_video_frame >= 2 &&
--         oci->base_qindex - oci->postproc_state.last_base_qindex >= 10)
-+         oci->postproc_state.last_base_qindex < 60 &&
-+         oci->base_qindex - oci->postproc_state.last_base_qindex >= 20)
-     {
-         vp8_multiframe_quality_enhance(oci);
-         if (((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK)) &&
-@@ -754,12 +783,14 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
-             vp8_yv12_copy_frame(&oci->post_proc_buffer, &oci->post_proc_buffer_int);
-             if (flags & VP8D_DEMACROBLOCK)
-             {
--                vp8_deblock_and_de_macro_block(&oci->post_proc_buffer_int, &oci->post_proc_buffer,
-+                vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer,
-                                                q + (deblock_level - 5) * 10, 1, 0);
-+                vp8_de_mblock(&oci->post_proc_buffer,
-+                              q + (deblock_level - 5) * 10);
-             }
-             else if (flags & VP8D_DEBLOCK)
-             {
--                vp8_deblock(&oci->post_proc_buffer_int, &oci->post_proc_buffer,
-+                vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer,
-                             q, 1, 0);
-             }
-         }
-@@ -768,13 +799,15 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
-     }
-     else if (flags & VP8D_DEMACROBLOCK)
-     {
--        vp8_deblock_and_de_macro_block(oci->frame_to_show, &oci->post_proc_buffer,
--                                       q + (deblock_level - 5) * 10, 1, 0);
-+        vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer,
-+                                     q + (deblock_level - 5) * 10, 1, 0);
-+        vp8_de_mblock(&oci->post_proc_buffer, q + (deblock_level - 5) * 10);
-+
-         oci->postproc_state.last_base_qindex = oci->base_qindex;
-     }
-     else if (flags & VP8D_DEBLOCK)
-     {
--        vp8_deblock(oci->frame_to_show, &oci->post_proc_buffer,
-+        vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer,
-                     q, 1, 0);
-         oci->postproc_state.last_base_qindex = oci->base_qindex;
-     }
-diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
-index 6ac788c..495a2c9 100644
---- a/vp8/common/postproc.h
-+++ b/vp8/common/postproc.h
-@@ -30,13 +30,15 @@ int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
-                         vp8_ppflags_t *flags);
- 
- 
--void vp8_de_noise(YV12_BUFFER_CONFIG         *source,
-+void vp8_de_noise(struct VP8Common           *oci,
-+                  YV12_BUFFER_CONFIG         *source,
-                   YV12_BUFFER_CONFIG         *post,
-                   int                         q,
-                   int                         low_var_thresh,
-                   int                         flag);
- 
--void vp8_deblock(YV12_BUFFER_CONFIG         *source,
-+void vp8_deblock(struct VP8Common           *oci,
-+                 YV12_BUFFER_CONFIG         *source,
-                  YV12_BUFFER_CONFIG         *post,
-                  int                         q,
-                  int                         low_var_thresh,
-diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
-index 7046a63..87f4cac 100644
---- a/vp8/common/ppc/systemdependent.c
-+++ b/vp8/common/ppc/systemdependent.c
-@@ -19,14 +19,14 @@ void (*vp8_short_idct4x4)(short *input, short *output, int pitch);
- void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch);
- void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch);
- 
--extern void (*vp8_post_proc_down_and_across)(
-+extern void (*vp8_post_proc_down_and_across_mb_row)(
-     unsigned char *src_ptr,
-     unsigned char *dst_ptr,
-     int src_pixels_per_line,
-     int dst_pixels_per_line,
--    int rows,
-     int cols,
--    int flimit
-+    unsigned char *f,
-+    int size
- );
- 
- extern void (*vp8_mbpost_proc_down)(unsigned char *dst, int pitch, int rows, int cols, int flimit);
-@@ -34,15 +34,15 @@ extern void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int
- extern void (*vp8_mbpost_proc_across_ip)(unsigned char *src, int pitch, int rows, int cols, int flimit);
- extern void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int cols, int flimit);
- 
--extern void vp8_post_proc_down_and_across_c
-+extern void vp8_post_proc_down_and_across_mb_row_c
- (
-     unsigned char *src_ptr,
-     unsigned char *dst_ptr,
-     int src_pixels_per_line,
-     int dst_pixels_per_line,
--    int rows,
-     int cols,
--    int flimit
-+    unsigned char *f,
-+    int size
- );
- void vp8_plane_add_noise_c(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a);
- 
-@@ -158,7 +158,7 @@ void vp8_machine_specific_config(void)
-     vp8_lf_mbhsimple                     = loop_filter_mbhs_ppc;
-     vp8_lf_bhsimple                      = loop_filter_bhs_ppc;
- 
--    vp8_post_proc_down_and_across           = vp8_post_proc_down_and_across_c;
-+    vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c;
-     vp8_mbpost_proc_down                  = vp8_mbpost_proc_down_c;
-     vp8_mbpost_proc_across_ip              = vp8_mbpost_proc_across_ip_c;
-     vp8_plane_add_noise                   = vp8_plane_add_noise_c;
-diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c
-index e9833fe..05f9210 100644
---- a/vp8/common/quant_common.c
-+++ b/vp8/common/quant_common.c
-@@ -109,7 +109,10 @@ int vp8_ac2quant(int QIndex, int Delta)
-     else if (QIndex < 0)
-         QIndex = 0;
- 
--    retval = (ac_qlookup[ QIndex ] * 155) / 100;
-+    /* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
-+     * The smallest precision for that is '(x*6349) >> 12' but 16 is a good
-+     * word size. */
-+    retval = (ac_qlookup[ QIndex ] * 101581) >> 16;
- 
-     if (retval < 8)
-         retval = 8;
-diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
-index dcc35ec..7bb8d0a 100644
---- a/vp8/common/reconintra4x4.c
-+++ b/vp8/common/reconintra4x4.c
-@@ -13,11 +13,11 @@
- #include "vpx_rtcd.h"
- #include "blockd.h"
- 
--void vp8_intra4x4_predict_d_c(unsigned char *Above,
--                              unsigned char *yleft, int left_stride,
--                              int b_mode,
--                              unsigned char *dst, int dst_stride,
--                              unsigned char top_left)
-+void vp8_intra4x4_predict_c(unsigned char *Above,
-+                            unsigned char *yleft, int left_stride,
-+                            B_PREDICTION_MODE b_mode,
-+                            unsigned char *dst, int dst_stride,
-+                            unsigned char top_left)
- {
-     int i, r, c;
- 
-@@ -290,19 +290,8 @@ void vp8_intra4x4_predict_d_c(unsigned char *Above,
-     }
-     break;
- 
-+    default:
-+    break;
- 
-     }
- }
--
--void vp8_intra4x4_predict_c(unsigned char *src, int src_stride,
--                            int b_mode,
--                            unsigned char *dst, int dst_stride)
--{
--    unsigned char *Above = src - src_stride;
--
--    vp8_intra4x4_predict_d_c(Above,
--                             src - 1, src_stride,
--                             b_mode,
--                             dst, dst_stride,
--                             Above[-1]);
--}
-diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c
-index 232640d..01dad46 100644
---- a/vp8/common/rtcd.c
-+++ b/vp8/common/rtcd.c
-@@ -10,3 +10,96 @@
- #include "vpx_config.h"
- #define RTCD_C
- #include "vpx_rtcd.h"
-+
-+#if CONFIG_MULTITHREAD && defined(_WIN32)
-+#include <windows.h>
-+#include <stdlib.h>
-+static void once(void (*func)(void))
-+{
-+    static CRITICAL_SECTION *lock;
-+    static LONG waiters;
-+    static int done;
-+    void *lock_ptr = &lock;
-+
-+    /* If the initialization is complete, return early. This isn't just an
-+     * optimization, it prevents races on the destruction of the global
-+     * lock.
-+     */
-+    if(done)
-+        return;
-+
-+    InterlockedIncrement(&waiters);
-+
-+    /* Get a lock. We create one and try to make it the one-true-lock,
-+     * throwing it away if we lost the race.
-+     */
-+
-+    {
-+        /* Scope to protect access to new_lock */
-+        CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION));
-+        InitializeCriticalSection(new_lock);
-+        if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL)
-+        {
-+            DeleteCriticalSection(new_lock);
-+            free(new_lock);
-+        }
-+    }
-+
-+    /* At this point, we have a lock that can be synchronized on. We don't
-+     * care which thread actually performed the allocation.
-+     */
-+
-+    EnterCriticalSection(lock);
-+
-+    if (!done)
-+    {
-+        func();
-+        done = 1;
-+    }
-+
-+    LeaveCriticalSection(lock);
-+
-+    /* Last one out should free resources. The destructed objects are
-+     * protected by checking if(done) above.
-+     */
-+    if(!InterlockedDecrement(&waiters))
-+    {
-+        DeleteCriticalSection(lock);
-+        free(lock);
-+        lock = NULL;
-+    }
-+}
-+
-+
-+#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
-+#include <pthread.h>
-+static void once(void (*func)(void))
-+{
-+    static pthread_once_t lock = PTHREAD_ONCE_INIT;
-+    pthread_once(&lock, func);
-+}
-+
-+
-+#else
-+/* No-op version that performs no synchronization. vpx_rtcd() is idempotent,
-+ * so as long as your platform provides atomic loads/stores of pointers
-+ * no synchronization is strictly necessary.
-+ */
-+
-+static void once(void (*func)(void))
-+{
-+    static int done;
-+
-+    if(!done)
-+    {
-+        func();
-+        done = 1;
-+    }
-+}
-+#endif
-+
-+
-+void vpx_rtcd()
-+{
-+    once(setup_rtcd_internal);
-+}
-diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
-index 33bf08b..0f950f8 100644
---- a/vp8/common/rtcd_defs.sh
-+++ b/vp8/common/rtcd_defs.sh
-@@ -1,5 +1,7 @@
- common_forward_decls() {
- cat <<EOF
-+#include "vp8/common/blockd.h"
-+
- struct blockd;
- struct macroblockd;
- struct loop_filter_info;
-@@ -22,35 +24,42 @@ specialize vp8_dequantize_b mmx media neon
- vp8_dequantize_b_media=vp8_dequantize_b_v6
- 
- prototype void vp8_dequant_idct_add "short *input, short *dq, unsigned char *output, int stride"
--specialize vp8_dequant_idct_add mmx media neon
-+specialize vp8_dequant_idct_add mmx media neon dspr2
- vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6
-+vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2
- 
- prototype void vp8_dequant_idct_add_y_block "short *q, short *dq, unsigned char *dst, int stride, char *eobs"
--specialize vp8_dequant_idct_add_y_block mmx sse2 media neon
-+specialize vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2
- vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6
-+vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
- 
- prototype void vp8_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"
--specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon
-+specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2
- vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6
-+vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
- 
- #
- # Loopfilter
- #
- prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
--specialize vp8_loop_filter_mbv mmx sse2 media neon
-+specialize vp8_loop_filter_mbv mmx sse2 media neon dspr2
- vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6
-+vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2
- 
- prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
--specialize vp8_loop_filter_bv mmx sse2 media neon
-+specialize vp8_loop_filter_bv mmx sse2 media neon dspr2
- vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6
-+vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2
- 
- prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
--specialize vp8_loop_filter_mbh mmx sse2 media neon
-+specialize vp8_loop_filter_mbh mmx sse2 media neon dspr2
- vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6
-+vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2
- 
- prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
--specialize vp8_loop_filter_bh mmx sse2 media neon
-+specialize vp8_loop_filter_bh mmx sse2 media neon dspr2
- vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6
-+vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2
- 
- 
- prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit"
-@@ -90,37 +99,45 @@ vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon
- #
- #idct16
- prototype void vp8_short_idct4x4llm "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"
--specialize vp8_short_idct4x4llm mmx media neon
-+specialize vp8_short_idct4x4llm mmx media neon dspr2
- vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual
-+vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2
- 
- #iwalsh1
- prototype void vp8_short_inv_walsh4x4_1 "short *input, short *output"
-+specialize vp8_short_inv_walsh4x4_1 dspr2
-+vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2
- # no asm yet
- 
- #iwalsh16
- prototype void vp8_short_inv_walsh4x4 "short *input, short *output"
--specialize vp8_short_inv_walsh4x4 mmx sse2 media neon
-+specialize vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2
- vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6
-+vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2
- 
- #idct1_scalar_add
- prototype void vp8_dc_only_idct_add "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"
--specialize vp8_dc_only_idct_add	mmx media neon
-+specialize vp8_dc_only_idct_add	mmx media neon dspr2
- vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6
-+vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2
- 
- #
- # RECON
- #
- prototype void vp8_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
--specialize vp8_copy_mem16x16 mmx sse2 media neon
-+specialize vp8_copy_mem16x16 mmx sse2 media neon dspr2
- vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6
-+vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2
- 
- prototype void vp8_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
--specialize vp8_copy_mem8x8 mmx media neon
-+specialize vp8_copy_mem8x8 mmx media neon dspr2
- vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6
-+vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2
- 
- prototype void vp8_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
--specialize vp8_copy_mem8x4 mmx media neon
-+specialize vp8_copy_mem8x4 mmx media neon dspr2
- vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6
-+vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2
- 
- prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"
- specialize vp8_build_intra_predictors_mby_s sse2 ssse3
-@@ -129,8 +146,7 @@ specialize vp8_build_intra_predictors_mby_s sse2 ssse3
- prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row,  unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
- specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
- 
--prototype void vp8_intra4x4_predict_d "unsigned char *above, unsigned char *left, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
--prototype void vp8_intra4x4_predict "unsigned char *src, int src_stride, int b_mode, unsigned char *dst, int dst_stride"
-+prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
- specialize vp8_intra4x4_predict media
- vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6
- 
-@@ -146,9 +162,8 @@ if [ "$CONFIG_POSTPROC" = "yes" ]; then
-     specialize vp8_mbpost_proc_across_ip sse2
-     vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm
- 
--    prototype void vp8_post_proc_down_and_across "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int rows, int cols, int flimit"
--    specialize vp8_post_proc_down_and_across mmx sse2
--    vp8_post_proc_down_and_across_sse2=vp8_post_proc_down_and_across_xmm
-+    prototype void vp8_post_proc_down_and_across_mb_row "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"
-+    specialize vp8_post_proc_down_and_across_mb_row sse2
- 
-     prototype void vp8_plane_add_noise "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch"
-     specialize vp8_plane_add_noise mmx sse2
-@@ -177,20 +192,24 @@ fi
- # Subpixel
- #
- prototype void vp8_sixtap_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
--specialize vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon
-+specialize vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2
- vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6
-+vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2
- 
- prototype void vp8_sixtap_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
--specialize vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon
-+specialize vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2
- vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6
-+vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2
- 
- prototype void vp8_sixtap_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
--specialize vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon
-+specialize vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2
- vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6
-+vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2
- 
- prototype void vp8_sixtap_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
--specialize vp8_sixtap_predict4x4 mmx ssse3 media neon
-+specialize vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2
- vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6
-+vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2
- 
- prototype void vp8_bilinear_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
- specialize vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon
-@@ -276,23 +295,23 @@ vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6
- #
- # Single block SAD
- #
--prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
-+prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
- specialize vp8_sad4x4 mmx sse2 neon
- vp8_sad4x4_sse2=vp8_sad4x4_wmt
- 
--prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
-+prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
- specialize vp8_sad8x8 mmx sse2 neon
- vp8_sad8x8_sse2=vp8_sad8x8_wmt
- 
--prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
-+prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
- specialize vp8_sad8x16 mmx sse2 neon
- vp8_sad8x16_sse2=vp8_sad8x16_wmt
- 
--prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
-+prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
- specialize vp8_sad16x8 mmx sse2 neon
- vp8_sad16x8_sse2=vp8_sad16x8_wmt
- 
--prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
-+prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
- specialize vp8_sad16x16 mmx sse2 sse3 media neon
- vp8_sad16x16_sse2=vp8_sad16x16_wmt
- vp8_sad16x16_media=vp8_sad16x16_armv6
-@@ -300,59 +319,59 @@ vp8_sad16x16_media=vp8_sad16x16_armv6
- #
- # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
- #
--prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-+prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
- specialize vp8_sad4x4x3 sse3
- 
--prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-+prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
- specialize vp8_sad8x8x3 sse3
- 
--prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-+prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
- specialize vp8_sad8x16x3 sse3
- 
--prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-+prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
- specialize vp8_sad16x8x3 sse3 ssse3
- 
--prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-+prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
- specialize vp8_sad16x16x3 sse3 ssse3
- 
- # Note the only difference in the following prototypes is that they return into
- # an array of short
--prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-+prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
- specialize vp8_sad4x4x8 sse4_1
- vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4
- 
--prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-+prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
- specialize vp8_sad8x8x8 sse4_1
- vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4
- 
--prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-+prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
- specialize vp8_sad8x16x8 sse4_1
- vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4
- 
--prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-+prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
- specialize vp8_sad16x8x8 sse4_1
- vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4
- 
--prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-+prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
- specialize vp8_sad16x16x8 sse4_1
- vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4
- 
- #
- # Multi-block SAD, comparing a reference to N independent blocks
- #
--prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr[4], int  ref_stride, unsigned int *sad_array"
-+prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
- specialize vp8_sad4x4x4d sse3
- 
--prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr[4], int  ref_stride, unsigned int *sad_array"
-+prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
- specialize vp8_sad8x8x4d sse3
- 
--prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr[4], int  ref_stride, unsigned int *sad_array"
-+prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
- specialize vp8_sad8x16x4d sse3
- 
--prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr[4], int  ref_stride, unsigned int *sad_array"
-+prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
- specialize vp8_sad16x8x4d sse3
- 
--prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr[4], int  ref_stride, unsigned int *sad_array"
-+prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
- specialize vp8_sad16x16x4d sse3
- 
- #
-@@ -501,6 +520,14 @@ fi
- prototype void vp8_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
- specialize vp8_yv12_copy_partial_frame neon
- 
-+#
-+# Denoiser filter
-+#
-+if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
-+    prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
-+    specialize vp8_denoiser_filter sse2
-+fi
-+
- # End of encoder only functions
- fi
- 
-diff --git a/vp8/common/sad_c.c b/vp8/common/sad_c.c
-index 6a3e889..5f36fc9 100644
---- a/vp8/common/sad_c.c
-+++ b/vp8/common/sad_c.c
-@@ -9,21 +9,15 @@
-  */
- 
- 
-+#include <limits.h>
- #include <stdlib.h>
- #include "vpx_config.h"
- #include "vpx/vpx_integer.h"
- 
--static
--unsigned int sad_mx_n_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    int  max_sad,
--    int  m,
--    int  n)
-+static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride,
-+                               const unsigned char *ref_ptr, int ref_stride,
-+                               unsigned int max_sad, int m, int n)
- {
--
-     int r, c;
-     unsigned int sad = 0;
- 
-@@ -48,298 +42,211 @@ unsigned int sad_mx_n_c(
-  * implementations of these functions are not required to check it.
-  */
- 
--unsigned int vp8_sad16x16_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    int  max_sad)
-+unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride,
-+                            const unsigned char *ref_ptr, int ref_stride,
-+                            unsigned int max_sad)
- {
--
-     return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16);
- }
- 
--
--unsigned int vp8_sad8x8_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    int  max_sad)
-+unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride,
-+                          const unsigned char *ref_ptr, int ref_stride,
-+                          unsigned int max_sad)
- {
--
-     return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8);
- }
- 
--
--unsigned int vp8_sad16x8_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    int  max_sad)
-+unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride,
-+                           const unsigned char *ref_ptr, int ref_stride,
-+                           unsigned int max_sad)
- {
--
-     return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8);
- 
- }
- 
--
--unsigned int vp8_sad8x16_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    int  max_sad)
-+unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride,
-+                           const unsigned char *ref_ptr, int ref_stride,
-+                           unsigned int max_sad)
- {
--
-     return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16);
- }
- 
--
--unsigned int vp8_sad4x4_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    int  max_sad)
-+unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride,
-+                          const unsigned char *ref_ptr, int ref_stride,
-+                          unsigned int max_sad)
- {
--
-     return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4);
- }
- 
--void vp8_sad16x16x3_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    unsigned int *sad_array
--)
-+void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride,
-+                      const unsigned char *ref_ptr, int ref_stride,
-+                      unsigned int *sad_array)
- {
--    sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
--    sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
--    sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
-+    sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
-+    sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
-+    sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
- }
- 
--void vp8_sad16x16x8_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    unsigned short *sad_array
--)
-+void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride,
-+                      const unsigned char *ref_ptr, int ref_stride,
-+                      unsigned short *sad_array)
- {
--    sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
--    sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
--    sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
--    sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
--    sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
--    sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
--    sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
--    sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
-+    sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
-+    sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
-+    sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
-+    sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
-+    sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
-+    sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
-+    sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
-+    sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
- }
- 
--void vp8_sad16x8x3_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    unsigned int *sad_array
--)
-+void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride,
-+                     const unsigned char *ref_ptr, int ref_stride,
-+                     unsigned int *sad_array)
- {
--    sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
--    sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
--    sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
-+    sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
-+    sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
-+    sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
- }
- 
--void vp8_sad16x8x8_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    unsigned short *sad_array
--)
-+void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride,
-+                     const unsigned char *ref_ptr, int ref_stride,
-+                     unsigned short *sad_array)
- {
--    sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
--    sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
--    sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
--    sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
--    sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
--    sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
--    sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
--    sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
-+    sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
-+    sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
-+    sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
-+    sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
-+    sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
-+    sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
-+    sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
-+    sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
- }
- 
--void vp8_sad8x8x3_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    unsigned int *sad_array
--)
-+void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride,
-+                    const unsigned char *ref_ptr, int ref_stride,
-+                    unsigned int *sad_array)
- {
--    sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
--    sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
--    sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
-+    sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
-+    sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
-+    sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
- }
- 
--void vp8_sad8x8x8_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    unsigned short *sad_array
--)
-+void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride,
-+                    const unsigned char *ref_ptr, int ref_stride,
-+                    unsigned short *sad_array)
- {
--    sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
--    sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
--    sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
--    sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
--    sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
--    sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
--    sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
--    sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
-+    sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
-+    sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
-+    sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
-+    sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
-+    sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
-+    sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
-+    sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
-+    sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
- }
- 
--void vp8_sad8x16x3_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    unsigned int *sad_array
--)
-+void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride,
-+                     const unsigned char *ref_ptr, int ref_stride,
-+                     unsigned int *sad_array)
- {
--    sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
--    sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
--    sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
-+    sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
-+    sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
-+    sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
- }
- 
--void vp8_sad8x16x8_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    unsigned short *sad_array
--)
-+void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride,
-+                     const unsigned char *ref_ptr, int ref_stride,
-+                     unsigned short *sad_array)
- {
--    sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
--    sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
--    sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
--    sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
--    sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
--    sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
--    sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
--    sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
-+    sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
-+    sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
-+    sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
-+    sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
-+    sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
-+    sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
-+    sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
-+    sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
- }
- 
--void vp8_sad4x4x3_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    unsigned int *sad_array
--)
-+void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride,
-+                    const unsigned char *ref_ptr, int ref_stride,
-+                    unsigned int *sad_array)
- {
--    sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
--    sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
--    sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
-+    sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
-+    sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
-+    sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
- }
- 
--void vp8_sad4x4x8_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    const unsigned char *ref_ptr,
--    int  ref_stride,
--    unsigned short *sad_array
--)
-+void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride,
-+                    const unsigned char *ref_ptr, int ref_stride,
-+                    unsigned short *sad_array)
- {
--    sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
--    sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
--    sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
--    sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
--    sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
--    sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
--    sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
--    sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
-+    sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
-+    sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
-+    sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
-+    sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
-+    sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
-+    sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
-+    sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
-+    sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
- }
- 
--void vp8_sad16x16x4d_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    unsigned char *ref_ptr[],
--    int  ref_stride,
--    unsigned int *sad_array
--)
-+void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride,
-+                       const unsigned char * const ref_ptr[], int ref_stride,
-+                       unsigned int *sad_array)
- {
--    sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
--    sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
--    sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
--    sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
-+    sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
-+    sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
-+    sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
-+    sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
- }
- 
--void vp8_sad16x8x4d_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    unsigned char *ref_ptr[],
--    int  ref_stride,
--    unsigned int *sad_array
--)
-+void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride,
-+                      const unsigned char * const ref_ptr[], int ref_stride,
-+                      unsigned int *sad_array)
- {
--    sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
--    sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
--    sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
--    sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
-+    sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
-+    sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
-+    sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
-+    sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
- }
- 
--void vp8_sad8x8x4d_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    unsigned char *ref_ptr[],
--    int  ref_stride,
--    unsigned int *sad_array
--)
-+void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride,
-+                     const unsigned char * const ref_ptr[], int ref_stride,
-+                     unsigned int *sad_array)
- {
--    sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
--    sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
--    sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
--    sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
-+    sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
-+    sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
-+    sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
-+    sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
- }
- 
--void vp8_sad8x16x4d_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    unsigned char *ref_ptr[],
--    int  ref_stride,
--    unsigned int *sad_array
--)
-+void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride,
-+                      const unsigned char * const ref_ptr[], int ref_stride,
-+                      unsigned int *sad_array)
- {
--    sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
--    sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
--    sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
--    sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
-+    sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
-+    sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
-+    sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
-+    sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
- }
- 
--void vp8_sad4x4x4d_c(
--    const unsigned char *src_ptr,
--    int  src_stride,
--    unsigned char *ref_ptr[],
--    int  ref_stride,
--    unsigned int *sad_array
--)
-+void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride,
-+                     const unsigned char * const ref_ptr[], int  ref_stride,
-+                     unsigned int *sad_array)
- {
--    sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
--    sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
--    sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
--    sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
-+    sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
-+    sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
-+    sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
-+    sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
- }
- 
- /* Copy 2 macroblocks to a buffer */
--void vp8_copy32xn_c(
--    unsigned char *src_ptr,
--    int  src_stride,
--    unsigned char *dst_ptr,
--    int  dst_stride,
--    int height)
-+void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride,
-+                    unsigned char *dst_ptr, int dst_stride,
-+                    int height)
- {
-     int r;
- 
-diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c
-index 7976e25..60afe51 100644
---- a/vp8/common/setupintrarecon.c
-+++ b/vp8/common/setupintrarecon.c
-@@ -30,3 +30,10 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
-         ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
- 
- }
-+
-+void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf)
-+{
-+    vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
-+    vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
-+    vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
-+}
-diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h
-index 5264fd0..e515c3a 100644
---- a/vp8/common/setupintrarecon.h
-+++ b/vp8/common/setupintrarecon.h
-@@ -11,3 +11,23 @@
- 
- #include "vpx_scale/yv12config.h"
- extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
-+extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf);
-+
-+static
-+void setup_intra_recon_left(unsigned char *y_buffer,
-+                            unsigned char *u_buffer,
-+                            unsigned char *v_buffer,
-+                            int y_stride,
-+                            int uv_stride)
-+{
-+    int i;
-+
-+    for (i = 0; i < 16; i++)
-+        y_buffer[y_stride *i] = (unsigned char) 129;
-+
-+    for (i = 0; i < 8; i++)
-+        u_buffer[uv_stride *i] = (unsigned char) 129;
-+
-+    for (i = 0; i < 8; i++)
-+        v_buffer[uv_stride *i] = (unsigned char) 129;
-+}
-diff --git a/vp8/common/variance.h b/vp8/common/variance.h
-index b77aa28..01193b8 100644
---- a/vp8/common/variance.h
-+++ b/vp8/common/variance.h
-@@ -12,14 +12,14 @@
- #ifndef VARIANCE_H
- #define VARIANCE_H
- 
--typedef unsigned int(*vp8_sad_fn_t)
--    (
-+#include "vpx_config.h"
-+
-+typedef unsigned int(*vp8_sad_fn_t)(
-     const unsigned char *src_ptr,
-     int source_stride,
-     const unsigned char *ref_ptr,
-     int ref_stride,
--    int max_sad
--    );
-+    unsigned int max_sad);
- 
- typedef void (*vp8_copy32xn_fn_t)(
-     const unsigned char *src_ptr,
-@@ -48,7 +48,7 @@ typedef void (*vp8_sad_multi_d_fn_t)
-     (
-      const unsigned char *src_ptr,
-      int source_stride,
--     unsigned char *ref_ptr[4],
-+     const unsigned char * const ref_ptr[],
-      int  ref_stride,
-      unsigned int *sad_array
-     );
-diff --git a/vp8/common/variance_c.c b/vp8/common/variance_c.c
-index 996404d..da08aff 100644
---- a/vp8/common/variance_c.c
-+++ b/vp8/common/variance_c.c
-@@ -205,14 +205,14 @@ static void var_filter_block2d_bil_first_pass
-     {
-         for (j = 0; j < output_width; j++)
-         {
--            // Apply bilinear filter
-+            /* Apply bilinear filter */
-             output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
-                              ((int)src_ptr[pixel_step] * vp8_filter[1]) +
-                              (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
-             src_ptr++;
-         }
- 
--        // Next row...
-+        /* Next row... */
-         src_ptr    += src_pixels_per_line - output_width;
-         output_ptr += output_width;
-     }
-@@ -264,15 +264,15 @@ static void var_filter_block2d_bil_second_pass
-     {
-         for (j = 0; j < output_width; j++)
-         {
--            // Apply filter
--            Temp = ((int)src_ptr[0]         * vp8_filter[0]) +
-+            /* Apply filter */
-+            Temp = ((int)src_ptr[0]          * vp8_filter[0]) +
-                    ((int)src_ptr[pixel_step] * vp8_filter[1]) +
-                    (VP8_FILTER_WEIGHT / 2);
-             output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
-             src_ptr++;
-         }
- 
--        // Next row...
-+        /* Next row... */
-         src_ptr    += src_pixels_per_line - output_width;
-         output_ptr += output_width;
-     }
-@@ -292,15 +292,15 @@ unsigned int vp8_sub_pixel_variance4x4_c
- {
-     unsigned char  temp2[20*16];
-     const short *HFilter, *VFilter;
--    unsigned short FData3[5*4]; // Temp data bufffer used in filtering
-+    unsigned short FData3[5*4]; /* Temp data bufffer used in filtering */
- 
-     HFilter = vp8_bilinear_filters[xoffset];
-     VFilter = vp8_bilinear_filters[yoffset];
- 
--    // First filter 1d Horizontal
-+    /* First filter 1d Horizontal */
-     var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
- 
--    // Now filter Verticaly
-+    /* Now filter Verticaly */
-     var_filter_block2d_bil_second_pass(FData3, temp2, 4,  4,  4,  4, VFilter);
- 
-     return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
-@@ -318,7 +318,7 @@ unsigned int vp8_sub_pixel_variance8x8_c
-     unsigned int *sse
- )
- {
--    unsigned short FData3[9*8]; // Temp data bufffer used in filtering
-+    unsigned short FData3[9*8]; /* Temp data bufffer used in filtering */
-     unsigned char  temp2[20*16];
-     const short *HFilter, *VFilter;
- 
-@@ -342,7 +342,7 @@ unsigned int vp8_sub_pixel_variance16x16_c
-     unsigned int *sse
- )
- {
--    unsigned short FData3[17*16];   // Temp data bufffer used in filtering
-+    unsigned short FData3[17*16];   /* Temp data bufffer used in filtering */
-     unsigned char  temp2[20*16];
-     const short *HFilter, *VFilter;
- 
-@@ -418,7 +418,7 @@ unsigned int vp8_sub_pixel_variance16x8_c
-     unsigned int *sse
- )
- {
--    unsigned short FData3[16*9];    // Temp data bufffer used in filtering
-+    unsigned short FData3[16*9];    /* Temp data bufffer used in filtering */
-     unsigned char  temp2[20*16];
-     const short *HFilter, *VFilter;
- 
-@@ -442,7 +442,7 @@ unsigned int vp8_sub_pixel_variance8x16_c
-     unsigned int *sse
- )
- {
--    unsigned short FData3[9*16];    // Temp data bufffer used in filtering
-+    unsigned short FData3[9*16];    /* Temp data bufffer used in filtering */
-     unsigned char  temp2[20*16];
-     const short *HFilter, *VFilter;
- 
-diff --git a/vp8/common/vp8_entropymodedata.h b/vp8/common/vp8_entropymodedata.h
-old mode 100755
-new mode 100644
-diff --git a/vp8/common/x86/dequantize_mmx.asm b/vp8/common/x86/dequantize_mmx.asm
-index de9eba8..4e551f0 100644
---- a/vp8/common/x86/dequantize_mmx.asm
-+++ b/vp8/common/x86/dequantize_mmx.asm
-@@ -13,7 +13,7 @@
- 
- 
- ;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
--global sym(vp8_dequantize_b_impl_mmx)
-+global sym(vp8_dequantize_b_impl_mmx) PRIVATE
- sym(vp8_dequantize_b_impl_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -55,7 +55,7 @@ sym(vp8_dequantize_b_impl_mmx):
- ;short *dq,               1
- ;unsigned char *dest,     2
- ;int stride)              3
--global sym(vp8_dequant_idct_add_mmx)
-+global sym(vp8_dequant_idct_add_mmx) PRIVATE
- sym(vp8_dequant_idct_add_mmx):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
-index 0c9c205..96fa2c6 100644
---- a/vp8/common/x86/idctllm_mmx.asm
-+++ b/vp8/common/x86/idctllm_mmx.asm
-@@ -34,7 +34,7 @@
- 
- ;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred,
- ;int pitch, unsigned char *dest,int stride)
--global sym(vp8_short_idct4x4llm_mmx)
-+global sym(vp8_short_idct4x4llm_mmx) PRIVATE
- sym(vp8_short_idct4x4llm_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -224,7 +224,7 @@ sym(vp8_short_idct4x4llm_mmx):
- ;int pred_stride,
- ;unsigned char *dst_ptr,
- ;int stride)
--global sym(vp8_dc_only_idct_add_mmx)
-+global sym(vp8_dc_only_idct_add_mmx) PRIVATE
- sym(vp8_dc_only_idct_add_mmx):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/idctllm_mmx_test.cc b/vp8/common/x86/idctllm_mmx_test.cc
-deleted file mode 100755
-index 8c11533..0000000
---- a/vp8/common/x86/idctllm_mmx_test.cc
-+++ /dev/null
-@@ -1,31 +0,0 @@
--/*
-- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-- *
-- *  Use of this source code is governed by a BSD-style license
-- *  that can be found in the LICENSE file in the root of the source
-- *  tree. An additional intellectual property rights grant can be found
-- *  in the file PATENTS.  All contributing project authors may
-- *  be found in the AUTHORS file in the root of the source tree.
-- */
--
--
-- extern "C" {
--    void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred_ptr,
--                            int pred_stride, unsigned char *dst_ptr,
--                            int dst_stride);
--}
--
--#include "vp8/common/idctllm_test.h"
--
--namespace
--{
--
--INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
--                        ::testing::Values(vp8_short_idct4x4llm_mmx));
--
--} // namespace
--
--int main(int argc, char **argv) {
--  ::testing::InitGoogleTest(&argc, argv);
--  return RUN_ALL_TESTS();
--}
-diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
-index abeb0b6..bf8e2c4 100644
---- a/vp8/common/x86/idctllm_sse2.asm
-+++ b/vp8/common/x86/idctllm_sse2.asm
-@@ -19,7 +19,7 @@
- ;   int dst_stride      - 3
- ; )
- 
--global sym(vp8_idct_dequant_0_2x_sse2)
-+global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE
- sym(vp8_idct_dequant_0_2x_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -101,7 +101,7 @@ sym(vp8_idct_dequant_0_2x_sse2):
- ;   unsigned char *dst  - 2
- ;   int dst_stride      - 3
- ; )
--global sym(vp8_idct_dequant_full_2x_sse2)
-+global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE
- sym(vp8_idct_dequant_full_2x_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -358,7 +358,7 @@ sym(vp8_idct_dequant_full_2x_sse2):
- ;   int dst_stride      - 3
- ;   short *dc           - 4
- ; )
--global sym(vp8_idct_dequant_dc_0_2x_sse2)
-+global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE
- sym(vp8_idct_dequant_dc_0_2x_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -434,7 +434,7 @@ sym(vp8_idct_dequant_dc_0_2x_sse2):
- ;   int dst_stride      - 3
- ;   short *dc           - 4
- ; )
--global sym(vp8_idct_dequant_dc_full_2x_sse2)
-+global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE
- sym(vp8_idct_dequant_dc_full_2x_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
-index 6582687..4aac094 100644
---- a/vp8/common/x86/iwalsh_mmx.asm
-+++ b/vp8/common/x86/iwalsh_mmx.asm
-@@ -12,7 +12,7 @@
- %include "vpx_ports/x86_abi_support.asm"
- 
- ;void vp8_short_inv_walsh4x4_mmx(short *input, short *output)
--global sym(vp8_short_inv_walsh4x4_mmx)
-+global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE
- sym(vp8_short_inv_walsh4x4_mmx):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
-index 51cb5e2..06e86a8 100644
---- a/vp8/common/x86/iwalsh_sse2.asm
-+++ b/vp8/common/x86/iwalsh_sse2.asm
-@@ -12,7 +12,7 @@
- %include "vpx_ports/x86_abi_support.asm"
- 
- ;void vp8_short_inv_walsh4x4_sse2(short *input, short *output)
--global sym(vp8_short_inv_walsh4x4_sse2)
-+global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE
- sym(vp8_short_inv_walsh4x4_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/loopfilter_block_sse2.asm b/vp8/common/x86/loopfilter_block_sse2.asm
-index 4918eb5..3d45c61 100644
---- a/vp8/common/x86/loopfilter_block_sse2.asm
-+++ b/vp8/common/x86/loopfilter_block_sse2.asm
-@@ -133,7 +133,7 @@
- ;    const char    *limit,
- ;    const char    *thresh
- ;)
--global sym(vp8_loop_filter_bh_y_sse2)
-+global sym(vp8_loop_filter_bh_y_sse2) PRIVATE
- sym(vp8_loop_filter_bh_y_sse2):
- 
- %ifidn __OUTPUT_FORMAT__,x64
-@@ -150,6 +150,7 @@ sym(vp8_loop_filter_bh_y_sse2):
- 
-     push    rbp
-     mov     rbp, rsp
-+    SAVE_XMM 11
-     push    r12
-     push    r13
-     mov     thresh, arg(4)
-@@ -258,6 +259,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2
- %ifidn __OUTPUT_FORMAT__,x64
-     pop    r13
-     pop    r12
-+    RESTORE_XMM
-     pop    rbp
- %endif
- 
-@@ -273,7 +275,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2
- ;    const char    *thresh
- ;)
- 
--global sym(vp8_loop_filter_bv_y_sse2)
-+global sym(vp8_loop_filter_bv_y_sse2) PRIVATE
- sym(vp8_loop_filter_bv_y_sse2):
- 
- %ifidn __OUTPUT_FORMAT__,x64
-diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
-index 697a5de..f388d24 100644
---- a/vp8/common/x86/loopfilter_mmx.asm
-+++ b/vp8/common/x86/loopfilter_mmx.asm
-@@ -21,7 +21,7 @@
- ;    const char *thresh,
- ;    int  count
- ;)
--global sym(vp8_loop_filter_horizontal_edge_mmx)
-+global sym(vp8_loop_filter_horizontal_edge_mmx) PRIVATE
- sym(vp8_loop_filter_horizontal_edge_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -233,7 +233,7 @@ sym(vp8_loop_filter_horizontal_edge_mmx):
- ;    const char *thresh,
- ;    int count
- ;)
--global sym(vp8_loop_filter_vertical_edge_mmx)
-+global sym(vp8_loop_filter_vertical_edge_mmx) PRIVATE
- sym(vp8_loop_filter_vertical_edge_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -603,7 +603,7 @@ sym(vp8_loop_filter_vertical_edge_mmx):
- ;    const char *thresh,
- ;    int count
- ;)
--global sym(vp8_mbloop_filter_horizontal_edge_mmx)
-+global sym(vp8_mbloop_filter_horizontal_edge_mmx) PRIVATE
- sym(vp8_mbloop_filter_horizontal_edge_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -920,7 +920,7 @@ sym(vp8_mbloop_filter_horizontal_edge_mmx):
- ;    const char *thresh,
- ;    int count
- ;)
--global sym(vp8_mbloop_filter_vertical_edge_mmx)
-+global sym(vp8_mbloop_filter_vertical_edge_mmx) PRIVATE
- sym(vp8_mbloop_filter_vertical_edge_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -1384,7 +1384,7 @@ sym(vp8_mbloop_filter_vertical_edge_mmx):
- ;    int  src_pixel_step,
- ;    const char *blimit
- ;)
--global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
-+global sym(vp8_loop_filter_simple_horizontal_edge_mmx) PRIVATE
- sym(vp8_loop_filter_simple_horizontal_edge_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -1500,7 +1500,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
- ;    int  src_pixel_step,
- ;    const char *blimit
- ;)
--global sym(vp8_loop_filter_simple_vertical_edge_mmx)
-+global sym(vp8_loop_filter_simple_vertical_edge_mmx) PRIVATE
- sym(vp8_loop_filter_simple_vertical_edge_mmx):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
-index 9944c33..a66753b 100644
---- a/vp8/common/x86/loopfilter_sse2.asm
-+++ b/vp8/common/x86/loopfilter_sse2.asm
-@@ -286,7 +286,7 @@
- ;    const char    *limit,
- ;    const char    *thresh,
- ;)
--global sym(vp8_loop_filter_horizontal_edge_sse2)
-+global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE
- sym(vp8_loop_filter_horizontal_edge_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -334,7 +334,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
- ;    const char    *thresh,
- ;    int            count
- ;)
--global sym(vp8_loop_filter_horizontal_edge_uv_sse2)
-+global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE
- sym(vp8_loop_filter_horizontal_edge_uv_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -561,7 +561,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
- ;    const char    *limit,
- ;    const char    *thresh,
- ;)
--global sym(vp8_mbloop_filter_horizontal_edge_sse2)
-+global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE
- sym(vp8_mbloop_filter_horizontal_edge_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -607,7 +607,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
- ;    const char    *thresh,
- ;    unsigned char *v
- ;)
--global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2)
-+global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE
- sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -928,7 +928,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
- ;    const char    *limit,
- ;    const char    *thresh,
- ;)
--global sym(vp8_loop_filter_vertical_edge_sse2)
-+global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE
- sym(vp8_loop_filter_vertical_edge_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -993,7 +993,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
- ;    const char    *thresh,
- ;    unsigned char *v
- ;)
--global sym(vp8_loop_filter_vertical_edge_uv_sse2)
-+global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE
- sym(vp8_loop_filter_vertical_edge_uv_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -1142,7 +1142,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
- ;    const char    *limit,
- ;    const char    *thresh,
- ;)
--global sym(vp8_mbloop_filter_vertical_edge_sse2)
-+global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE
- sym(vp8_mbloop_filter_vertical_edge_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -1209,7 +1209,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
- ;    const char    *thresh,
- ;    unsigned char *v
- ;)
--global sym(vp8_mbloop_filter_vertical_edge_uv_sse2)
-+global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE
- sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -1269,7 +1269,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
- ;    int  src_pixel_step,
- ;    const char *blimit,
- ;)
--global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
-+global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE
- sym(vp8_loop_filter_simple_horizontal_edge_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -1374,7 +1374,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
- ;    int  src_pixel_step,
- ;    const char *blimit,
- ;)
--global sym(vp8_loop_filter_simple_vertical_edge_sse2)
-+global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE
- sym(vp8_loop_filter_simple_vertical_edge_sse2):
-     push        rbp         ; save old base pointer value.
-     mov         rbp, rsp    ; set new base pointer value.
-diff --git a/vp8/common/x86/mfqe_sse2.asm b/vp8/common/x86/mfqe_sse2.asm
-index 10d21f3..c1d2174 100644
---- a/vp8/common/x86/mfqe_sse2.asm
-+++ b/vp8/common/x86/mfqe_sse2.asm
-@@ -19,7 +19,7 @@
- ;    int            dst_stride,
- ;    int            src_weight
- ;)
--global sym(vp8_filter_by_weight16x16_sse2)
-+global sym(vp8_filter_by_weight16x16_sse2) PRIVATE
- sym(vp8_filter_by_weight16x16_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -97,7 +97,7 @@ sym(vp8_filter_by_weight16x16_sse2):
- ;    int            dst_stride,
- ;    int            src_weight
- ;)
--global sym(vp8_filter_by_weight8x8_sse2)
-+global sym(vp8_filter_by_weight8x8_sse2) PRIVATE
- sym(vp8_filter_by_weight8x8_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -165,7 +165,7 @@ sym(vp8_filter_by_weight8x8_sse2):
- ;    unsigned int  *variance,      4
- ;    unsigned int  *sad,           5
- ;)
--global sym(vp8_variance_and_sad_16x16_sse2)
-+global sym(vp8_variance_and_sad_16x16_sse2) PRIVATE
- sym(vp8_variance_and_sad_16x16_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm
-index d24f740..966c586 100644
---- a/vp8/common/x86/postproc_mmx.asm
-+++ b/vp8/common/x86/postproc_mmx.asm
-@@ -14,275 +14,10 @@
- %define VP8_FILTER_WEIGHT 128
- %define VP8_FILTER_SHIFT  7
- 
--;void vp8_post_proc_down_and_across_mmx
--;(
--;    unsigned char *src_ptr,
--;    unsigned char *dst_ptr,
--;    int src_pixels_per_line,
--;    int dst_pixels_per_line,
--;    int rows,
--;    int cols,
--;    int flimit
--;)
--global sym(vp8_post_proc_down_and_across_mmx)
--sym(vp8_post_proc_down_and_across_mmx):
--    push        rbp
--    mov         rbp, rsp
--    SHADOW_ARGS_TO_STACK 7
--    GET_GOT     rbx
--    push        rsi
--    push        rdi
--    ; end prolog
--
--%if ABI_IS_32BIT=1 && CONFIG_PIC=1
--    ; move the global rd onto the stack, since we don't have enough registers
--    ; to do PIC addressing
--    movq        mm0, [GLOBAL(rd)]
--    sub         rsp, 8
--    movq        [rsp], mm0
--%define RD [rsp]
--%else
--%define RD [GLOBAL(rd)]
--%endif
--
--        push        rbx
--        lea         rbx, [GLOBAL(Blur)]
--        movd        mm2, dword ptr arg(6) ;flimit
--        punpcklwd   mm2, mm2
--        punpckldq   mm2, mm2
--
--        mov         rsi,        arg(0) ;src_ptr
--        mov         rdi,        arg(1) ;dst_ptr
--
--        movsxd      rcx, DWORD PTR arg(4) ;rows
--        movsxd      rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch?
--        pxor        mm0, mm0              ; mm0 = 00000000
--
--.nextrow:
--
--        xor         rdx,        rdx       ; clear out rdx for use as loop counter
--.nextcol:
--
--        pxor        mm7, mm7              ; mm7 = 00000000
--        movq        mm6, [rbx + 32 ]      ; mm6 = kernel 2 taps
--        movq        mm3, [rsi]            ; mm4 = r0 p0..p7
--        punpcklbw   mm3, mm0              ; mm3 = p0..p3
--        movq        mm1, mm3              ; mm1 = p0..p3
--        pmullw      mm3, mm6              ; mm3 *= kernel 2 modifiers
--
--        movq        mm6, [rbx + 48]       ; mm6 = kernel 3 taps
--        movq        mm5, [rsi + rax]      ; mm4 = r1 p0..p7
--        punpcklbw   mm5, mm0              ; mm5 = r1 p0..p3
--        pmullw      mm6, mm5              ; mm6 *= p0..p3 * kernel 3 modifiers
--        paddusw     mm3, mm6              ; mm3 += mm6
--
--        ; thresholding
--        movq        mm7, mm1              ; mm7 = r0 p0..p3
--        psubusw     mm7, mm5              ; mm7 = r0 p0..p3 - r1 p0..p3
--        psubusw     mm5, mm1              ; mm5 = r1 p0..p3 - r0 p0..p3
--        paddusw     mm7, mm5              ; mm7 = abs(r0 p0..p3 - r1 p0..p3)
--        pcmpgtw     mm7, mm2
--
--        movq        mm6, [rbx + 64 ]      ; mm6 = kernel 4 modifiers
--        movq        mm5, [rsi + 2*rax]    ; mm4 = r2 p0..p7
--        punpcklbw   mm5, mm0              ; mm5 = r2 p0..p3
--        pmullw      mm6, mm5              ; mm5 *= kernel 4 modifiers
--        paddusw     mm3, mm6              ; mm3 += mm5
--
--        ; thresholding
--        movq        mm6, mm1              ; mm6 = r0 p0..p3
--        psubusw     mm6, mm5              ; mm6 = r0 p0..p3 - r2 p0..p3
--        psubusw     mm5, mm1              ; mm5 = r2 p0..p3 - r2 p0..p3
--        paddusw     mm6, mm5              ; mm6 = abs(r0 p0..p3 - r2 p0..p3)
--        pcmpgtw     mm6, mm2
--        por         mm7, mm6              ; accumulate thresholds
--
--
--        neg         rax
--        movq        mm6, [rbx ]           ; kernel 0 taps
--        movq        mm5, [rsi+2*rax]      ; mm4 = r-2 p0..p7
--        punpcklbw   mm5, mm0              ; mm5 = r-2 p0..p3
--        pmullw      mm6, mm5              ; mm5 *= kernel 0 modifiers
--        paddusw     mm3, mm6              ; mm3 += mm5
--
--        ; thresholding
--        movq        mm6, mm1              ; mm6 = r0 p0..p3
--        psubusw     mm6, mm5              ; mm6 = p0..p3 - r-2 p0..p3
--        psubusw     mm5, mm1              ; mm5 = r-2 p0..p3 - p0..p3
--        paddusw     mm6, mm5              ; mm6 = abs(r0 p0..p3 - r-2 p0..p3)
--        pcmpgtw     mm6, mm2
--        por         mm7, mm6              ; accumulate thresholds
--
--        movq        mm6, [rbx + 16]       ; kernel 1 taps
--        movq        mm4, [rsi+rax]        ; mm4 = r-1 p0..p7
--        punpcklbw   mm4, mm0              ; mm4 = r-1 p0..p3
--        pmullw      mm6, mm4              ; mm4 *= kernel 1 modifiers.
--        paddusw     mm3, mm6              ; mm3 += mm5
--
--        ; thresholding
--        movq        mm6, mm1              ; mm6 = r0 p0..p3
--        psubusw     mm6, mm4              ; mm6 = p0..p3 - r-2 p0..p3
--        psubusw     mm4, mm1              ; mm5 = r-1 p0..p3 - p0..p3
--        paddusw     mm6, mm4              ; mm6 = abs(r0 p0..p3 - r-1 p0..p3)
--        pcmpgtw     mm6, mm2
--        por         mm7, mm6              ; accumulate thresholds
--
--
--        paddusw     mm3, RD               ; mm3 += round value
--        psraw       mm3, VP8_FILTER_SHIFT     ; mm3 /= 128
--
--        pand        mm1, mm7              ; mm1 select vals > thresh from source
--        pandn       mm7, mm3              ; mm7 select vals < thresh from blurred result
--        paddusw     mm1, mm7              ; combination
--
--        packuswb    mm1, mm0              ; pack to bytes
--
--        movd        [rdi], mm1            ;
--        neg         rax                   ; pitch is positive
--
--
--        add         rsi, 4
--        add         rdi, 4
--        add         rdx, 4
--
--        cmp         edx, dword ptr arg(5) ;cols
--        jl          .nextcol
--        ; done with the all cols, start the across filtering in place
--        sub         rsi, rdx
--        sub         rdi, rdx
--
--        ; dup the first byte into the left border 8 times
--        movq        mm1,   [rdi]
--        punpcklbw   mm1,   mm1
--        punpcklwd   mm1,   mm1
--        punpckldq   mm1,   mm1
--
--        mov         rdx,    -8
--        movq        [rdi+rdx], mm1
--
--        ; dup the last byte into the right border
--        movsxd      rdx,    dword arg(5)
--        movq        mm1,   [rdi + rdx + -1]
--        punpcklbw   mm1,   mm1
--        punpcklwd   mm1,   mm1
--        punpckldq   mm1,   mm1
--        movq        [rdi+rdx], mm1
--
--
--        push        rax
--        xor         rdx,    rdx
--        mov         rax,    [rdi-4];
--
--.acrossnextcol:
--        pxor        mm7, mm7              ; mm7 = 00000000
--        movq        mm6, [rbx + 32 ]      ;
--        movq        mm4, [rdi+rdx]        ; mm4 = p0..p7
--        movq        mm3, mm4              ; mm3 = p0..p7
--        punpcklbw   mm3, mm0              ; mm3 = p0..p3
--        movq        mm1, mm3              ; mm1 = p0..p3
--        pmullw      mm3, mm6              ; mm3 *= kernel 2 modifiers
--
--        movq        mm6, [rbx + 48]
--        psrlq       mm4, 8                ; mm4 = p1..p7
--        movq        mm5, mm4              ; mm5 = p1..p7
--        punpcklbw   mm5, mm0              ; mm5 = p1..p4
--        pmullw      mm6, mm5              ; mm6 *= p1..p4 * kernel 3 modifiers
--        paddusw     mm3, mm6              ; mm3 += mm6
--
--        ; thresholding
--        movq        mm7, mm1              ; mm7 = p0..p3
--        psubusw     mm7, mm5              ; mm7 = p0..p3 - p1..p4
--        psubusw     mm5, mm1              ; mm5 = p1..p4 - p0..p3
--        paddusw     mm7, mm5              ; mm7 = abs(p0..p3 - p1..p4)
--        pcmpgtw     mm7, mm2
--
--        movq        mm6, [rbx + 64 ]
--        psrlq       mm4, 8                ; mm4 = p2..p7
--        movq        mm5, mm4              ; mm5 = p2..p7
--        punpcklbw   mm5, mm0              ; mm5 = p2..p5
--        pmullw      mm6, mm5              ; mm5 *= kernel 4 modifiers
--        paddusw     mm3, mm6              ; mm3 += mm5
--
--        ; thresholding
--        movq        mm6, mm1              ; mm6 = p0..p3
--        psubusw     mm6, mm5              ; mm6 = p0..p3 - p1..p4
--        psubusw     mm5, mm1              ; mm5 = p1..p4 - p0..p3
--        paddusw     mm6, mm5              ; mm6 = abs(p0..p3 - p1..p4)
--        pcmpgtw     mm6, mm2
--        por         mm7, mm6              ; accumulate thresholds
--
--
--        movq        mm6, [rbx ]
--        movq        mm4, [rdi+rdx-2]      ; mm4 = p-2..p5
--        movq        mm5, mm4              ; mm5 = p-2..p5
--        punpcklbw   mm5, mm0              ; mm5 = p-2..p1
--        pmullw      mm6, mm5              ; mm5 *= kernel 0 modifiers
--        paddusw     mm3, mm6              ; mm3 += mm5
--
--        ; thresholding
--        movq        mm6, mm1              ; mm6 = p0..p3
--        psubusw     mm6, mm5              ; mm6 = p0..p3 - p1..p4
--        psubusw     mm5, mm1              ; mm5 = p1..p4 - p0..p3
--        paddusw     mm6, mm5              ; mm6 = abs(p0..p3 - p1..p4)
--        pcmpgtw     mm6, mm2
--        por         mm7, mm6              ; accumulate thresholds
--
--        movq        mm6, [rbx + 16]
--        psrlq       mm4, 8                ; mm4 = p-1..p5
--        punpcklbw   mm4, mm0              ; mm4 = p-1..p2
--        pmullw      mm6, mm4              ; mm4 *= kernel 1 modifiers.
--        paddusw     mm3, mm6              ; mm3 += mm5
--
--        ; thresholding
--        movq        mm6, mm1              ; mm6 = p0..p3
--        psubusw     mm6, mm4              ; mm6 = p0..p3 - p1..p4
--        psubusw     mm4, mm1              ; mm5 = p1..p4 - p0..p3
--        paddusw     mm6, mm4              ; mm6 = abs(p0..p3 - p1..p4)
--        pcmpgtw     mm6, mm2
--        por         mm7, mm6              ; accumulate thresholds
--
--        paddusw     mm3, RD               ; mm3 += round value
--        psraw       mm3, VP8_FILTER_SHIFT     ; mm3 /= 128
--
--        pand        mm1, mm7              ; mm1 select vals > thresh from source
--        pandn       mm7, mm3              ; mm7 select vals < thresh from blurred result
--        paddusw     mm1, mm7              ; combination
--
--        packuswb    mm1, mm0              ; pack to bytes
--        mov         DWORD PTR [rdi+rdx-4],  eax   ; store previous four bytes
--        movd        eax,    mm1
--
--        add         rdx, 4
--        cmp         edx, dword ptr arg(5) ;cols
--        jl          .acrossnextcol;
--
--        mov         DWORD PTR [rdi+rdx-4],  eax
--        pop         rax
--
--        ; done with this rwo
--        add         rsi,rax               ; next line
--        movsxd      rax, dword ptr arg(3) ;dst_pixels_per_line ; destination pitch?
--        add         rdi,rax               ; next destination
--        movsxd      rax, dword ptr arg(2) ;src_pixels_per_line ; destination pitch?
--
--        dec         rcx                   ; decrement count
--        jnz         .nextrow               ; next row
--        pop         rbx
--
--    ; begin epilog
--    pop rdi
--    pop rsi
--    RESTORE_GOT
--    UNSHADOW_ARGS
--    pop         rbp
--    ret
--%undef RD
--
--
- ;void vp8_mbpost_proc_down_mmx(unsigned char *dst,
- ;                             int pitch, int rows, int cols,int flimit)
- extern sym(vp8_rv)
--global sym(vp8_mbpost_proc_down_mmx)
-+global sym(vp8_mbpost_proc_down_mmx) PRIVATE
- sym(vp8_mbpost_proc_down_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -510,7 +245,7 @@ sym(vp8_mbpost_proc_down_mmx):
- ;                            unsigned char bothclamp[16],
- ;                            unsigned int Width, unsigned int Height, int Pitch)
- extern sym(rand)
--global sym(vp8_plane_add_noise_mmx)
-+global sym(vp8_plane_add_noise_mmx) PRIVATE
- sym(vp8_plane_add_noise_mmx):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
-index 966aafd..00f84a3 100644
---- a/vp8/common/x86/postproc_sse2.asm
-+++ b/vp8/common/x86/postproc_sse2.asm
-@@ -11,146 +11,159 @@
- 
- %include "vpx_ports/x86_abi_support.asm"
- 
--;void vp8_post_proc_down_and_across_xmm
-+;macro in deblock functions
-+%macro FIRST_2_ROWS 0
-+        movdqa      xmm4,       xmm0
-+        movdqa      xmm6,       xmm0
-+        movdqa      xmm5,       xmm1
-+        pavgb       xmm5,       xmm3
-+
-+        ;calculate absolute value
-+        psubusb     xmm4,       xmm1
-+        psubusb     xmm1,       xmm0
-+        psubusb     xmm6,       xmm3
-+        psubusb     xmm3,       xmm0
-+        paddusb     xmm4,       xmm1
-+        paddusb     xmm6,       xmm3
-+
-+        ;get threshold
-+        movdqa      xmm2,       flimit
-+        pxor        xmm1,       xmm1
-+        movdqa      xmm7,       xmm2
-+
-+        ;get mask
-+        psubusb     xmm2,       xmm4
-+        psubusb     xmm7,       xmm6
-+        pcmpeqb     xmm2,       xmm1
-+        pcmpeqb     xmm7,       xmm1
-+        por         xmm7,       xmm2
-+%endmacro
-+
-+%macro SECOND_2_ROWS 0
-+        movdqa      xmm6,       xmm0
-+        movdqa      xmm4,       xmm0
-+        movdqa      xmm2,       xmm1
-+        pavgb       xmm1,       xmm3
-+
-+        ;calculate absolute value
-+        psubusb     xmm6,       xmm2
-+        psubusb     xmm2,       xmm0
-+        psubusb     xmm4,       xmm3
-+        psubusb     xmm3,       xmm0
-+        paddusb     xmm6,       xmm2
-+        paddusb     xmm4,       xmm3
-+
-+        pavgb       xmm5,       xmm1
-+
-+        ;get threshold
-+        movdqa      xmm2,       flimit
-+        pxor        xmm1,       xmm1
-+        movdqa      xmm3,       xmm2
-+
-+        ;get mask
-+        psubusb     xmm2,       xmm6
-+        psubusb     xmm3,       xmm4
-+        pcmpeqb     xmm2,       xmm1
-+        pcmpeqb     xmm3,       xmm1
-+
-+        por         xmm7,       xmm2
-+        por         xmm7,       xmm3
-+
-+        pavgb       xmm5,       xmm0
-+
-+        ;decide if or not to use filtered value
-+        pand        xmm0,       xmm7
-+        pandn       xmm7,       xmm5
-+        paddusb     xmm0,       xmm7
-+%endmacro
-+
-+%macro UPDATE_FLIMIT 0
-+        movdqa      xmm2,       XMMWORD PTR [rbx]
-+        movdqa      [rsp],      xmm2
-+        add         rbx,        16
-+%endmacro
-+
-+;void vp8_post_proc_down_and_across_mb_row_sse2
- ;(
- ;    unsigned char *src_ptr,
- ;    unsigned char *dst_ptr,
- ;    int src_pixels_per_line,
- ;    int dst_pixels_per_line,
--;    int rows,
- ;    int cols,
--;    int flimit
-+;    int *flimits,
-+;    int size
- ;)
--global sym(vp8_post_proc_down_and_across_xmm)
--sym(vp8_post_proc_down_and_across_xmm):
-+global sym(vp8_post_proc_down_and_across_mb_row_sse2) PRIVATE
-+sym(vp8_post_proc_down_and_across_mb_row_sse2):
-     push        rbp
-     mov         rbp, rsp
-     SHADOW_ARGS_TO_STACK 7
-     SAVE_XMM 7
--    GET_GOT     rbx
-+    push        rbx
-     push        rsi
-     push        rdi
-     ; end prolog
--
--%if ABI_IS_32BIT=1 && CONFIG_PIC=1
-     ALIGN_STACK 16, rax
--    ; move the global rd onto the stack, since we don't have enough registers
--    ; to do PIC addressing
--    movdqa      xmm0, [GLOBAL(rd42)]
-     sub         rsp, 16
--    movdqa      [rsp], xmm0
--%define RD42 [rsp]
--%else
--%define RD42 [GLOBAL(rd42)]
--%endif
--
- 
--        movd        xmm2,       dword ptr arg(6) ;flimit
--        punpcklwd   xmm2,       xmm2
--        punpckldq   xmm2,       xmm2
--        punpcklqdq  xmm2,       xmm2
-+        ; put flimit on stack
-+        mov         rbx,        arg(5)           ;flimits ptr
-+        UPDATE_FLIMIT
- 
--        mov         rsi,        arg(0) ;src_ptr
--        mov         rdi,        arg(1) ;dst_ptr
-+%define flimit [rsp]
- 
--        movsxd      rcx,        DWORD PTR arg(4) ;rows
--        movsxd      rax,        DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch?
--        pxor        xmm0,       xmm0              ; mm0 = 00000000
-+        mov         rsi,        arg(0)           ;src_ptr
-+        mov         rdi,        arg(1)           ;dst_ptr
- 
-+        movsxd      rax,        DWORD PTR arg(2) ;src_pixels_per_line
-+        movsxd      rcx,        DWORD PTR arg(6) ;rows in a macroblock
- .nextrow:
--
--        xor         rdx,        rdx       ; clear out rdx for use as loop counter
-+        xor         rdx,        rdx              ;col
- .nextcol:
--        movq        xmm3,       QWORD PTR [rsi]         ; mm4 = r0 p0..p7
--        punpcklbw   xmm3,       xmm0                    ; mm3 = p0..p3
--        movdqa      xmm1,       xmm3                    ; mm1 = p0..p3
--        psllw       xmm3,       2                       ;
--
--        movq        xmm5,       QWORD PTR [rsi + rax]   ; mm4 = r1 p0..p7
--        punpcklbw   xmm5,       xmm0                    ; mm5 = r1 p0..p3
--        paddusw     xmm3,       xmm5                    ; mm3 += mm6
--
--        ; thresholding
--        movdqa      xmm7,       xmm1                    ; mm7 = r0 p0..p3
--        psubusw     xmm7,       xmm5                    ; mm7 = r0 p0..p3 - r1 p0..p3
--        psubusw     xmm5,       xmm1                    ; mm5 = r1 p0..p3 - r0 p0..p3
--        paddusw     xmm7,       xmm5                    ; mm7 = abs(r0 p0..p3 - r1 p0..p3)
--        pcmpgtw     xmm7,       xmm2
--
--        movq        xmm5,       QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7
--        punpcklbw   xmm5,       xmm0                    ; mm5 = r2 p0..p3
--        paddusw     xmm3,       xmm5                    ; mm3 += mm5
--
--        ; thresholding
--        movdqa      xmm6,       xmm1                    ; mm6 = r0 p0..p3
--        psubusw     xmm6,       xmm5                    ; mm6 = r0 p0..p3 - r2 p0..p3
--        psubusw     xmm5,       xmm1                    ; mm5 = r2 p0..p3 - r2 p0..p3
--        paddusw     xmm6,       xmm5                    ; mm6 = abs(r0 p0..p3 - r2 p0..p3)
--        pcmpgtw     xmm6,       xmm2
--        por         xmm7,       xmm6                    ; accumulate thresholds
-+        ;load current and next 2 rows
-+        movdqu      xmm0,       XMMWORD PTR [rsi]
-+        movdqu      xmm1,       XMMWORD PTR [rsi + rax]
-+        movdqu      xmm3,       XMMWORD PTR [rsi + 2*rax]
- 
-+        FIRST_2_ROWS
- 
-+        ;load above 2 rows
-         neg         rax
--        movq        xmm5,       QWORD PTR [rsi+2*rax]   ; mm4 = r-2 p0..p7
--        punpcklbw   xmm5,       xmm0                    ; mm5 = r-2 p0..p3
--        paddusw     xmm3,       xmm5                    ; mm3 += mm5
--
--        ; thresholding
--        movdqa      xmm6,       xmm1                    ; mm6 = r0 p0..p3
--        psubusw     xmm6,       xmm5                    ; mm6 = p0..p3 - r-2 p0..p3
--        psubusw     xmm5,       xmm1                    ; mm5 = r-2 p0..p3 - p0..p3
--        paddusw     xmm6,       xmm5                    ; mm6 = abs(r0 p0..p3 - r-2 p0..p3)
--        pcmpgtw     xmm6,       xmm2
--        por         xmm7,       xmm6                    ; accumulate thresholds
--
--        movq        xmm4,       QWORD PTR [rsi+rax]     ; mm4 = r-1 p0..p7
--        punpcklbw   xmm4,       xmm0                    ; mm4 = r-1 p0..p3
--        paddusw     xmm3,       xmm4                    ; mm3 += mm5
--
--        ; thresholding
--        movdqa      xmm6,       xmm1                    ; mm6 = r0 p0..p3
--        psubusw     xmm6,       xmm4                    ; mm6 = p0..p3 - r-2 p0..p3
--        psubusw     xmm4,       xmm1                    ; mm5 = r-1 p0..p3 - p0..p3
--        paddusw     xmm6,       xmm4                    ; mm6 = abs(r0 p0..p3 - r-1 p0..p3)
--        pcmpgtw     xmm6,       xmm2
--        por         xmm7,       xmm6                    ; accumulate thresholds
--
--
--        paddusw     xmm3,       RD42                    ; mm3 += round value
--        psraw       xmm3,       3                       ; mm3 /= 8
--
--        pand        xmm1,       xmm7                    ; mm1 select vals > thresh from source
--        pandn       xmm7,       xmm3                    ; mm7 select vals < thresh from blurred result
--        paddusw     xmm1,       xmm7                    ; combination
-+        movdqu      xmm1,       XMMWORD PTR [rsi + 2*rax]
-+        movdqu      xmm3,       XMMWORD PTR [rsi + rax]
- 
--        packuswb    xmm1,       xmm0                    ; pack to bytes
--        movq        QWORD PTR [rdi], xmm1             ;
-+        SECOND_2_ROWS
- 
--        neg         rax                   ; pitch is positive
--        add         rsi,        8
--        add         rdi,        8
-+        movdqu      XMMWORD PTR [rdi], xmm0
- 
--        add         rdx,        8
--        cmp         edx,        dword arg(5) ;cols
-+        neg         rax                          ; positive stride
-+        add         rsi,        16
-+        add         rdi,        16
- 
--        jl          .nextcol
-+        add         rdx,        16
-+        cmp         edx,        dword arg(4)     ;cols
-+        jge         .downdone
-+        UPDATE_FLIMIT
-+        jmp         .nextcol
- 
-+.downdone:
-         ; done with the all cols, start the across filtering in place
-         sub         rsi,        rdx
-         sub         rdi,        rdx
- 
-+        mov         rbx,        arg(5) ; flimits
-+        UPDATE_FLIMIT
- 
-         ; dup the first byte into the left border 8 times
-         movq        mm1,   [rdi]
-         punpcklbw   mm1,   mm1
-         punpcklwd   mm1,   mm1
-         punpckldq   mm1,   mm1
--
-         mov         rdx,    -8
-         movq        [rdi+rdx], mm1
- 
-         ; dup the last byte into the right border
--        movsxd      rdx,    dword arg(5)
-+        movsxd      rdx,    dword arg(4)
-         movq        mm1,   [rdi + rdx + -1]
-         punpcklbw   mm1,   mm1
-         punpcklwd   mm1,   mm1
-@@ -158,118 +171,69 @@ sym(vp8_post_proc_down_and_across_xmm):
-         movq        [rdi+rdx], mm1
- 
-         xor         rdx,        rdx
--        movq        mm0,        QWORD PTR [rdi-8];
-+        movq        mm0,        QWORD PTR [rdi-16];
-+        movq        mm1,        QWORD PTR [rdi-8];
- 
- .acrossnextcol:
--        movq        xmm7,       QWORD PTR [rdi +rdx -2]
--        movd        xmm4,       DWORD PTR [rdi +rdx +6]
--
--        pslldq      xmm4,       8
--        por         xmm4,       xmm7
--
--        movdqa      xmm3,       xmm4
--        psrldq      xmm3,       2
--        punpcklbw   xmm3,       xmm0              ; mm3 = p0..p3
--        movdqa      xmm1,       xmm3              ; mm1 = p0..p3
--        psllw       xmm3,       2
--
--
--        movdqa      xmm5,       xmm4
--        psrldq      xmm5,       3
--        punpcklbw   xmm5,       xmm0              ; mm5 = p1..p4
--        paddusw     xmm3,       xmm5              ; mm3 += mm6
--
--        ; thresholding
--        movdqa      xmm7,       xmm1              ; mm7 = p0..p3
--        psubusw     xmm7,       xmm5              ; mm7 = p0..p3 - p1..p4
--        psubusw     xmm5,       xmm1              ; mm5 = p1..p4 - p0..p3
--        paddusw     xmm7,       xmm5              ; mm7 = abs(p0..p3 - p1..p4)
--        pcmpgtw     xmm7,       xmm2
--
--        movdqa      xmm5,       xmm4
--        psrldq      xmm5,       4
--        punpcklbw   xmm5,       xmm0              ; mm5 = p2..p5
--        paddusw     xmm3,       xmm5              ; mm3 += mm5
--
--        ; thresholding
--        movdqa      xmm6,       xmm1              ; mm6 = p0..p3
--        psubusw     xmm6,       xmm5              ; mm6 = p0..p3 - p1..p4
--        psubusw     xmm5,       xmm1              ; mm5 = p1..p4 - p0..p3
--        paddusw     xmm6,       xmm5              ; mm6 = abs(p0..p3 - p1..p4)
--        pcmpgtw     xmm6,       xmm2
--        por         xmm7,       xmm6              ; accumulate thresholds
--
--
--        movdqa      xmm5,       xmm4              ; mm5 = p-2..p5
--        punpcklbw   xmm5,       xmm0              ; mm5 = p-2..p1
--        paddusw     xmm3,       xmm5              ; mm3 += mm5
--
--        ; thresholding
--        movdqa      xmm6,       xmm1              ; mm6 = p0..p3
--        psubusw     xmm6,       xmm5              ; mm6 = p0..p3 - p1..p4
--        psubusw     xmm5,       xmm1              ; mm5 = p1..p4 - p0..p3
--        paddusw     xmm6,       xmm5              ; mm6 = abs(p0..p3 - p1..p4)
--        pcmpgtw     xmm6,       xmm2
--        por         xmm7,       xmm6              ; accumulate thresholds
--
--        psrldq      xmm4,       1                   ; mm4 = p-1..p5
--        punpcklbw   xmm4,       xmm0              ; mm4 = p-1..p2
--        paddusw     xmm3,       xmm4              ; mm3 += mm5
--
--        ; thresholding
--        movdqa      xmm6,       xmm1              ; mm6 = p0..p3
--        psubusw     xmm6,       xmm4              ; mm6 = p0..p3 - p1..p4
--        psubusw     xmm4,       xmm1              ; mm5 = p1..p4 - p0..p3
--        paddusw     xmm6,       xmm4              ; mm6 = abs(p0..p3 - p1..p4)
--        pcmpgtw     xmm6,       xmm2
--        por         xmm7,       xmm6              ; accumulate thresholds
--
--        paddusw     xmm3,       RD42              ; mm3 += round value
--        psraw       xmm3,       3                 ; mm3 /= 8
--
--        pand        xmm1,       xmm7              ; mm1 select vals > thresh from source
--        pandn       xmm7,       xmm3              ; mm7 select vals < thresh from blurred result
--        paddusw     xmm1,       xmm7              ; combination
--
--        packuswb    xmm1,       xmm0              ; pack to bytes
--        movq        QWORD PTR [rdi+rdx-8],  mm0   ; store previous four bytes
--        movdq2q     mm0,        xmm1
--
--        add         rdx,        8
--        cmp         edx,        dword arg(5) ;cols
--        jl          .acrossnextcol;
--
--        ; last 8 pixels
--        movq        QWORD PTR [rdi+rdx-8],  mm0
-+        movdqu      xmm0,       XMMWORD PTR [rdi + rdx]
-+        movdqu      xmm1,       XMMWORD PTR [rdi + rdx -2]
-+        movdqu      xmm3,       XMMWORD PTR [rdi + rdx -1]
-+
-+        FIRST_2_ROWS
-+
-+        movdqu      xmm1,       XMMWORD PTR [rdi + rdx +1]
-+        movdqu      xmm3,       XMMWORD PTR [rdi + rdx +2]
-+
-+        SECOND_2_ROWS
-+
-+        movq        QWORD PTR [rdi+rdx-16], mm0  ; store previous 8 bytes
-+        movq        QWORD PTR [rdi+rdx-8], mm1   ; store previous 8 bytes
-+        movdq2q     mm0,        xmm0
-+        psrldq      xmm0,       8
-+        movdq2q     mm1,        xmm0
-+
-+        add         rdx,        16
-+        cmp         edx,        dword arg(4)     ;cols
-+        jge         .acrossdone
-+        UPDATE_FLIMIT
-+        jmp         .acrossnextcol
- 
-+.acrossdone
-+        ; last 16 pixels
-+        movq        QWORD PTR [rdi+rdx-16], mm0
-+
-+        cmp         edx,        dword arg(4)
-+        jne         .throw_last_8
-+        movq        QWORD PTR [rdi+rdx-8], mm1
-+.throw_last_8:
-         ; done with this rwo
--        add         rsi,rax               ; next line
--        mov         eax, dword arg(3) ;dst_pixels_per_line ; destination pitch?
--        add         rdi,rax               ; next destination
--        mov         eax, dword arg(2) ;src_pixels_per_line ; destination pitch?
-+        add         rsi,rax                      ;next src line
-+        mov         eax, dword arg(3)            ;dst_pixels_per_line
-+        add         rdi,rax                      ;next destination
-+        mov         eax, dword arg(2)            ;src_pixels_per_line
- 
--        dec         rcx                   ; decrement count
--        jnz         .nextrow              ; next row
-+        mov         rbx,        arg(5)           ;flimits
-+        UPDATE_FLIMIT
- 
--%if ABI_IS_32BIT=1 && CONFIG_PIC=1
--    add rsp,16
-+        dec         rcx                          ;decrement count
-+        jnz         .nextrow                     ;next row
-+
-+    add rsp, 16
-     pop rsp
--%endif
-     ; begin epilog
-     pop rdi
-     pop rsi
--    RESTORE_GOT
-+    pop rbx
-     RESTORE_XMM
-     UNSHADOW_ARGS
-     pop         rbp
-     ret
--%undef RD42
--
-+%undef flimit
- 
- ;void vp8_mbpost_proc_down_xmm(unsigned char *dst,
- ;                            int pitch, int rows, int cols,int flimit)
- extern sym(vp8_rv)
--global sym(vp8_mbpost_proc_down_xmm)
-+global sym(vp8_mbpost_proc_down_xmm) PRIVATE
- sym(vp8_mbpost_proc_down_xmm):
-     push        rbp
-     mov         rbp, rsp
-@@ -497,7 +461,7 @@ sym(vp8_mbpost_proc_down_xmm):
- 
- ;void vp8_mbpost_proc_across_ip_xmm(unsigned char *src,
- ;                                int pitch, int rows, int cols,int flimit)
--global sym(vp8_mbpost_proc_across_ip_xmm)
-+global sym(vp8_mbpost_proc_across_ip_xmm) PRIVATE
- sym(vp8_mbpost_proc_across_ip_xmm):
-     push        rbp
-     mov         rbp, rsp
-@@ -694,7 +658,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
- ;                            unsigned char bothclamp[16],
- ;                            unsigned int Width, unsigned int Height, int Pitch)
- extern sym(rand)
--global sym(vp8_plane_add_noise_wmt)
-+global sym(vp8_plane_add_noise_wmt) PRIVATE
- sym(vp8_plane_add_noise_wmt):
-     push        rbp
-     mov         rbp, rsp
-@@ -753,7 +717,5 @@ sym(vp8_plane_add_noise_wmt):
- 
- SECTION_RODATA
- align 16
--rd42:
--    times 8 dw 0x04
- four8s:
-     times 4 dd 8
-diff --git a/vp8/common/x86/postproc_x86.c b/vp8/common/x86/postproc_x86.c
-index a25921b..3ec0106 100644
---- a/vp8/common/x86/postproc_x86.c
-+++ b/vp8/common/x86/postproc_x86.c
-@@ -18,4 +18,7 @@ extern int rand(void)
- {
-   return __rand();
- }
-+#else
-+/* ISO C forbids an empty translation unit. */
-+int vp8_unused;
- #endif
-diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm
-index 19c0faf..15e9871 100644
---- a/vp8/common/x86/recon_mmx.asm
-+++ b/vp8/common/x86/recon_mmx.asm
-@@ -18,7 +18,7 @@
- ;    unsigned char *dst,
- ;    int dst_stride
- ;    )
--global sym(vp8_copy_mem8x8_mmx)
-+global sym(vp8_copy_mem8x8_mmx) PRIVATE
- sym(vp8_copy_mem8x8_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -81,7 +81,7 @@ sym(vp8_copy_mem8x8_mmx):
- ;    unsigned char *dst,
- ;    int dst_stride
- ;    )
--global sym(vp8_copy_mem8x4_mmx)
-+global sym(vp8_copy_mem8x4_mmx) PRIVATE
- sym(vp8_copy_mem8x4_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -125,7 +125,7 @@ sym(vp8_copy_mem8x4_mmx):
- ;    unsigned char *dst,
- ;    int dst_stride
- ;    )
--global sym(vp8_copy_mem16x16_mmx)
-+global sym(vp8_copy_mem16x16_mmx) PRIVATE
- sym(vp8_copy_mem16x16_mmx):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
-index 7b6e3cf..1434bcd 100644
---- a/vp8/common/x86/recon_sse2.asm
-+++ b/vp8/common/x86/recon_sse2.asm
-@@ -17,7 +17,7 @@
- ;    unsigned char *dst,
- ;    int dst_stride
- ;    )
--global sym(vp8_copy_mem16x16_sse2)
-+global sym(vp8_copy_mem16x16_sse2) PRIVATE
- sym(vp8_copy_mem16x16_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -123,7 +123,7 @@ sym(vp8_copy_mem16x16_sse2):
- ;    unsigned char *left,
- ;    int left_stride,
- ;    )
--global sym(vp8_intra_pred_uv_dc_mmx2)
-+global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE
- sym(vp8_intra_pred_uv_dc_mmx2):
-     push        rbp
-     mov         rbp, rsp
-@@ -196,7 +196,7 @@ sym(vp8_intra_pred_uv_dc_mmx2):
- ;    unsigned char *left,
- ;    int left_stride,
- ;    )
--global sym(vp8_intra_pred_uv_dctop_mmx2)
-+global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE
- sym(vp8_intra_pred_uv_dctop_mmx2):
-     push        rbp
-     mov         rbp, rsp
-@@ -250,7 +250,7 @@ sym(vp8_intra_pred_uv_dctop_mmx2):
- ;    unsigned char *left,
- ;    int left_stride,
- ;    )
--global sym(vp8_intra_pred_uv_dcleft_mmx2)
-+global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE
- sym(vp8_intra_pred_uv_dcleft_mmx2):
-     push        rbp
-     mov         rbp, rsp
-@@ -317,7 +317,7 @@ sym(vp8_intra_pred_uv_dcleft_mmx2):
- ;    unsigned char *left,
- ;    int left_stride,
- ;    )
--global sym(vp8_intra_pred_uv_dc128_mmx)
-+global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE
- sym(vp8_intra_pred_uv_dc128_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -357,7 +357,7 @@ sym(vp8_intra_pred_uv_dc128_mmx):
- ;    int left_stride,
- ;    )
- %macro vp8_intra_pred_uv_tm 1
--global sym(vp8_intra_pred_uv_tm_%1)
-+global sym(vp8_intra_pred_uv_tm_%1) PRIVATE
- sym(vp8_intra_pred_uv_tm_%1):
-     push        rbp
-     mov         rbp, rsp
-@@ -437,7 +437,7 @@ vp8_intra_pred_uv_tm ssse3
- ;    unsigned char *left,
- ;    int left_stride,
- ;    )
--global sym(vp8_intra_pred_uv_ve_mmx)
-+global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE
- sym(vp8_intra_pred_uv_ve_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -479,7 +479,7 @@ sym(vp8_intra_pred_uv_ve_mmx):
- ;    int left_stride
- ;    )
- %macro vp8_intra_pred_uv_ho 1
--global sym(vp8_intra_pred_uv_ho_%1)
-+global sym(vp8_intra_pred_uv_ho_%1) PRIVATE
- sym(vp8_intra_pred_uv_ho_%1):
-     push        rbp
-     mov         rbp, rsp
-@@ -577,7 +577,7 @@ vp8_intra_pred_uv_ho ssse3
- ;    unsigned char *left,
- ;    int left_stride
- ;    )
--global sym(vp8_intra_pred_y_dc_sse2)
-+global sym(vp8_intra_pred_y_dc_sse2) PRIVATE
- sym(vp8_intra_pred_y_dc_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -683,7 +683,7 @@ sym(vp8_intra_pred_y_dc_sse2):
- ;    unsigned char *left,
- ;    int left_stride
- ;    )
--global sym(vp8_intra_pred_y_dctop_sse2)
-+global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE
- sym(vp8_intra_pred_y_dctop_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -745,7 +745,7 @@ sym(vp8_intra_pred_y_dctop_sse2):
- ;    unsigned char *left,
- ;    int left_stride
- ;    )
--global sym(vp8_intra_pred_y_dcleft_sse2)
-+global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE
- sym(vp8_intra_pred_y_dcleft_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -838,7 +838,7 @@ sym(vp8_intra_pred_y_dcleft_sse2):
- ;    unsigned char *left,
- ;    int left_stride
- ;    )
--global sym(vp8_intra_pred_y_dc128_sse2)
-+global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE
- sym(vp8_intra_pred_y_dc128_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -885,11 +885,12 @@ sym(vp8_intra_pred_y_dc128_sse2):
- ;    int left_stride
- ;    )
- %macro vp8_intra_pred_y_tm 1
--global sym(vp8_intra_pred_y_tm_%1)
-+global sym(vp8_intra_pred_y_tm_%1) PRIVATE
- sym(vp8_intra_pred_y_tm_%1):
-     push        rbp
-     mov         rbp, rsp
-     SHADOW_ARGS_TO_STACK 5
-+    SAVE_XMM 7
-     push        rsi
-     push        rdi
-     GET_GOT     rbx
-@@ -957,6 +958,7 @@ vp8_intra_pred_y_tm_%1_loop:
-     RESTORE_GOT
-     pop         rdi
-     pop         rsi
-+    RESTORE_XMM
-     UNSHADOW_ARGS
-     pop         rbp
-     ret
-@@ -972,7 +974,7 @@ vp8_intra_pred_y_tm ssse3
- ;    unsigned char *left,
- ;    int left_stride
- ;    )
--global sym(vp8_intra_pred_y_ve_sse2)
-+global sym(vp8_intra_pred_y_ve_sse2) PRIVATE
- sym(vp8_intra_pred_y_ve_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -1020,7 +1022,7 @@ sym(vp8_intra_pred_y_ve_sse2):
- ;    unsigned char *left,
- ;    int left_stride,
- ;    )
--global sym(vp8_intra_pred_y_ho_sse2)
-+global sym(vp8_intra_pred_y_ho_sse2) PRIVATE
- sym(vp8_intra_pred_y_ho_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/sad_mmx.asm b/vp8/common/x86/sad_mmx.asm
-index 407b399..592112f 100644
---- a/vp8/common/x86/sad_mmx.asm
-+++ b/vp8/common/x86/sad_mmx.asm
-@@ -11,11 +11,11 @@
- 
- %include "vpx_ports/x86_abi_support.asm"
- 
--global sym(vp8_sad16x16_mmx)
--global sym(vp8_sad8x16_mmx)
--global sym(vp8_sad8x8_mmx)
--global sym(vp8_sad4x4_mmx)
--global sym(vp8_sad16x8_mmx)
-+global sym(vp8_sad16x16_mmx) PRIVATE
-+global sym(vp8_sad8x16_mmx) PRIVATE
-+global sym(vp8_sad8x8_mmx) PRIVATE
-+global sym(vp8_sad4x4_mmx) PRIVATE
-+global sym(vp8_sad16x8_mmx) PRIVATE
- 
- ;unsigned int vp8_sad16x16_mmx(
- ;    unsigned char *src_ptr,
-diff --git a/vp8/common/x86/sad_sse2.asm b/vp8/common/x86/sad_sse2.asm
-index 0b01d7b..8d86abc 100644
---- a/vp8/common/x86/sad_sse2.asm
-+++ b/vp8/common/x86/sad_sse2.asm
-@@ -16,7 +16,7 @@
- ;    int  src_stride,
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride)
--global sym(vp8_sad16x16_wmt)
-+global sym(vp8_sad16x16_wmt) PRIVATE
- sym(vp8_sad16x16_wmt):
-     push        rbp
-     mov         rbp, rsp
-@@ -90,7 +90,7 @@ sym(vp8_sad16x16_wmt):
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  max_sad)
--global sym(vp8_sad8x16_wmt)
-+global sym(vp8_sad8x16_wmt) PRIVATE
- sym(vp8_sad8x16_wmt):
-     push        rbp
-     mov         rbp, rsp
-@@ -115,7 +115,7 @@ sym(vp8_sad8x16_wmt):
- 
-         movq            rax,        mm7
-         cmp             eax,        arg(4)
--        jg              .x8x16sad_wmt_early_exit
-+        ja              .x8x16sad_wmt_early_exit
- 
-         movq            mm0,        QWORD PTR [rsi]
-         movq            mm1,        QWORD PTR [rdi]
-@@ -153,7 +153,7 @@ sym(vp8_sad8x16_wmt):
- ;    int  src_stride,
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride)
--global sym(vp8_sad8x8_wmt)
-+global sym(vp8_sad8x8_wmt) PRIVATE
- sym(vp8_sad8x8_wmt):
-     push        rbp
-     mov         rbp, rsp
-@@ -176,7 +176,7 @@ sym(vp8_sad8x8_wmt):
- 
-         movq            rax,        mm7
-         cmp             eax,        arg(4)
--        jg              .x8x8sad_wmt_early_exit
-+        ja              .x8x8sad_wmt_early_exit
- 
-         movq            mm0,        QWORD PTR [rsi]
-         movq            mm1,        QWORD PTR [rdi]
-@@ -206,7 +206,7 @@ sym(vp8_sad8x8_wmt):
- ;    int  src_stride,
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride)
--global sym(vp8_sad4x4_wmt)
-+global sym(vp8_sad4x4_wmt) PRIVATE
- sym(vp8_sad4x4_wmt):
-     push        rbp
-     mov         rbp, rsp
-@@ -261,7 +261,7 @@ sym(vp8_sad4x4_wmt):
- ;    int  src_stride,
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride)
--global sym(vp8_sad16x8_wmt)
-+global sym(vp8_sad16x8_wmt) PRIVATE
- sym(vp8_sad16x8_wmt):
-     push        rbp
-     mov         rbp, rsp
-@@ -285,7 +285,7 @@ sym(vp8_sad16x8_wmt):
- 
-         movq            rax,        mm7
-         cmp             eax,        arg(4)
--        jg              .x16x8sad_wmt_early_exit
-+        ja              .x16x8sad_wmt_early_exit
- 
-         movq            mm0,        QWORD PTR [rsi]
-         movq            mm2,        QWORD PTR [rsi+8]
-@@ -335,7 +335,7 @@ sym(vp8_sad16x8_wmt):
- ;    unsigned char *dst_ptr,
- ;    int  dst_stride,
- ;    int height);
--global sym(vp8_copy32xn_sse2)
-+global sym(vp8_copy32xn_sse2) PRIVATE
- sym(vp8_copy32xn_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/sad_sse3.asm b/vp8/common/x86/sad_sse3.asm
-index c2af3c8..f90a589 100644
---- a/vp8/common/x86/sad_sse3.asm
-+++ b/vp8/common/x86/sad_sse3.asm
-@@ -380,7 +380,7 @@
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad16x16x3_sse3)
-+global sym(vp8_sad16x16x3_sse3) PRIVATE
- sym(vp8_sad16x16x3_sse3):
- 
-     STACK_FRAME_CREATE_X3
-@@ -422,7 +422,7 @@ sym(vp8_sad16x16x3_sse3):
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad16x8x3_sse3)
-+global sym(vp8_sad16x8x3_sse3) PRIVATE
- sym(vp8_sad16x8x3_sse3):
- 
-     STACK_FRAME_CREATE_X3
-@@ -460,7 +460,7 @@ sym(vp8_sad16x8x3_sse3):
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad8x16x3_sse3)
-+global sym(vp8_sad8x16x3_sse3) PRIVATE
- sym(vp8_sad8x16x3_sse3):
- 
-     STACK_FRAME_CREATE_X3
-@@ -489,7 +489,7 @@ sym(vp8_sad8x16x3_sse3):
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad8x8x3_sse3)
-+global sym(vp8_sad8x8x3_sse3) PRIVATE
- sym(vp8_sad8x8x3_sse3):
- 
-     STACK_FRAME_CREATE_X3
-@@ -514,7 +514,7 @@ sym(vp8_sad8x8x3_sse3):
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad4x4x3_sse3)
-+global sym(vp8_sad4x4x3_sse3) PRIVATE
- sym(vp8_sad4x4x3_sse3):
- 
-     STACK_FRAME_CREATE_X3
-@@ -589,7 +589,7 @@ sym(vp8_sad4x4x3_sse3):
- ;    int  ref_stride,
- ;    int  max_sad)
- ;%define lddqu movdqu
--global sym(vp8_sad16x16_sse3)
-+global sym(vp8_sad16x16_sse3) PRIVATE
- sym(vp8_sad16x16_sse3):
- 
-     STACK_FRAME_CREATE_X3
-@@ -642,7 +642,7 @@ sym(vp8_sad16x16_sse3):
- ;    unsigned char *dst_ptr,
- ;    int  dst_stride,
- ;    int height);
--global sym(vp8_copy32xn_sse3)
-+global sym(vp8_copy32xn_sse3) PRIVATE
- sym(vp8_copy32xn_sse3):
- 
-     STACK_FRAME_CREATE_X3
-@@ -703,7 +703,7 @@ sym(vp8_copy32xn_sse3):
- ;    unsigned char *ref_ptr_base,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad16x16x4d_sse3)
-+global sym(vp8_sad16x16x4d_sse3) PRIVATE
- sym(vp8_sad16x16x4d_sse3):
- 
-     STACK_FRAME_CREATE_X4
-@@ -754,7 +754,7 @@ sym(vp8_sad16x16x4d_sse3):
- ;    unsigned char *ref_ptr_base,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad16x8x4d_sse3)
-+global sym(vp8_sad16x8x4d_sse3) PRIVATE
- sym(vp8_sad16x8x4d_sse3):
- 
-     STACK_FRAME_CREATE_X4
-@@ -801,7 +801,7 @@ sym(vp8_sad16x8x4d_sse3):
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad8x16x4d_sse3)
-+global sym(vp8_sad8x16x4d_sse3) PRIVATE
- sym(vp8_sad8x16x4d_sse3):
- 
-     STACK_FRAME_CREATE_X4
-@@ -834,7 +834,7 @@ sym(vp8_sad8x16x4d_sse3):
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad8x8x4d_sse3)
-+global sym(vp8_sad8x8x4d_sse3) PRIVATE
- sym(vp8_sad8x8x4d_sse3):
- 
-     STACK_FRAME_CREATE_X4
-@@ -863,7 +863,7 @@ sym(vp8_sad8x8x4d_sse3):
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad4x4x4d_sse3)
-+global sym(vp8_sad4x4x4d_sse3) PRIVATE
- sym(vp8_sad4x4x4d_sse3):
- 
-     STACK_FRAME_CREATE_X4
-diff --git a/vp8/common/x86/sad_sse4.asm b/vp8/common/x86/sad_sse4.asm
-index 03ecec4..f7fccd7 100644
---- a/vp8/common/x86/sad_sse4.asm
-+++ b/vp8/common/x86/sad_sse4.asm
-@@ -161,7 +161,7 @@
- ;    const unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    unsigned short *sad_array);
--global sym(vp8_sad16x16x8_sse4)
-+global sym(vp8_sad16x16x8_sse4) PRIVATE
- sym(vp8_sad16x16x8_sse4):
-     push        rbp
-     mov         rbp, rsp
-@@ -203,7 +203,7 @@ sym(vp8_sad16x16x8_sse4):
- ;    int  ref_stride,
- ;    unsigned short *sad_array
- ;);
--global sym(vp8_sad16x8x8_sse4)
-+global sym(vp8_sad16x8x8_sse4) PRIVATE
- sym(vp8_sad16x8x8_sse4):
-     push        rbp
-     mov         rbp, rsp
-@@ -241,7 +241,7 @@ sym(vp8_sad16x8x8_sse4):
- ;    int  ref_stride,
- ;    unsigned short *sad_array
- ;);
--global sym(vp8_sad8x8x8_sse4)
-+global sym(vp8_sad8x8x8_sse4) PRIVATE
- sym(vp8_sad8x8x8_sse4):
-     push        rbp
-     mov         rbp, rsp
-@@ -279,7 +279,7 @@ sym(vp8_sad8x8x8_sse4):
- ;    int  ref_stride,
- ;    unsigned short *sad_array
- ;);
--global sym(vp8_sad8x16x8_sse4)
-+global sym(vp8_sad8x16x8_sse4) PRIVATE
- sym(vp8_sad8x16x8_sse4):
-     push        rbp
-     mov         rbp, rsp
-@@ -320,7 +320,7 @@ sym(vp8_sad8x16x8_sse4):
- ;    int  ref_stride,
- ;    unsigned short *sad_array
- ;);
--global sym(vp8_sad4x4x8_sse4)
-+global sym(vp8_sad4x4x8_sse4) PRIVATE
- sym(vp8_sad4x4x8_sse4):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/sad_ssse3.asm b/vp8/common/x86/sad_ssse3.asm
-index 95b6c89..278fc06 100644
---- a/vp8/common/x86/sad_ssse3.asm
-+++ b/vp8/common/x86/sad_ssse3.asm
-@@ -152,7 +152,7 @@
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad16x16x3_ssse3)
-+global sym(vp8_sad16x16x3_ssse3) PRIVATE
- sym(vp8_sad16x16x3_ssse3):
-     push        rbp
-     mov         rbp, rsp
-@@ -265,7 +265,7 @@ sym(vp8_sad16x16x3_ssse3):
- ;    unsigned char *ref_ptr,
- ;    int  ref_stride,
- ;    int  *results)
--global sym(vp8_sad16x8x3_ssse3)
-+global sym(vp8_sad16x8x3_ssse3) PRIVATE
- sym(vp8_sad16x8x3_ssse3):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
-index 5528fd0..47dd452 100644
---- a/vp8/common/x86/subpixel_mmx.asm
-+++ b/vp8/common/x86/subpixel_mmx.asm
-@@ -28,7 +28,7 @@ extern sym(vp8_bilinear_filters_x86_8)
- ;    unsigned int    output_width,
- ;    short           * vp8_filter
- ;)
--global sym(vp8_filter_block1d_h6_mmx)
-+global sym(vp8_filter_block1d_h6_mmx) PRIVATE
- sym(vp8_filter_block1d_h6_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -125,7 +125,7 @@ sym(vp8_filter_block1d_h6_mmx):
- ;   unsigned int output_width,
- ;   short * vp8_filter
- ;)
--global sym(vp8_filter_block1dc_v6_mmx)
-+global sym(vp8_filter_block1dc_v6_mmx) PRIVATE
- sym(vp8_filter_block1dc_v6_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -213,7 +213,7 @@ sym(vp8_filter_block1dc_v6_mmx):
- ;   unsigned char *dst_ptr,
- ;    int dst_pitch
- ;)
--global sym(vp8_bilinear_predict8x8_mmx)
-+global sym(vp8_bilinear_predict8x8_mmx) PRIVATE
- sym(vp8_bilinear_predict8x8_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -370,7 +370,7 @@ sym(vp8_bilinear_predict8x8_mmx):
- ;    unsigned char *dst_ptr,
- ;    int dst_pitch
- ;)
--global sym(vp8_bilinear_predict8x4_mmx)
-+global sym(vp8_bilinear_predict8x4_mmx) PRIVATE
- sym(vp8_bilinear_predict8x4_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -525,7 +525,7 @@ sym(vp8_bilinear_predict8x4_mmx):
- ;    unsigned char *dst_ptr,
- ;    int dst_pitch
- ;)
--global sym(vp8_bilinear_predict4x4_mmx)
-+global sym(vp8_bilinear_predict4x4_mmx) PRIVATE
- sym(vp8_bilinear_predict4x4_mmx):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
-index cb550af..69f8d10 100644
---- a/vp8/common/x86/subpixel_sse2.asm
-+++ b/vp8/common/x86/subpixel_sse2.asm
-@@ -33,7 +33,7 @@ extern sym(vp8_bilinear_filters_x86_8)
- ;    unsigned int    output_width,
- ;    short           *vp8_filter
- ;)
--global sym(vp8_filter_block1d8_h6_sse2)
-+global sym(vp8_filter_block1d8_h6_sse2) PRIVATE
- sym(vp8_filter_block1d8_h6_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -153,7 +153,7 @@ sym(vp8_filter_block1d8_h6_sse2):
- ; even number. This function handles 8 pixels in horizontal direction, calculating ONE
- ; rows each iteration to take advantage of the 128 bits operations.
- ;*************************************************************************************/
--global sym(vp8_filter_block1d16_h6_sse2)
-+global sym(vp8_filter_block1d16_h6_sse2) PRIVATE
- sym(vp8_filter_block1d16_h6_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -329,7 +329,7 @@ sym(vp8_filter_block1d16_h6_sse2):
- ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The
- ; input pixel array has output_height rows.
- ;*************************************************************************************/
--global sym(vp8_filter_block1d8_v6_sse2)
-+global sym(vp8_filter_block1d8_v6_sse2) PRIVATE
- sym(vp8_filter_block1d8_v6_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -424,7 +424,7 @@ sym(vp8_filter_block1d8_v6_sse2):
- ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
- ; input pixel array has output_height rows.
- ;*************************************************************************************/
--global sym(vp8_filter_block1d16_v6_sse2)
-+global sym(vp8_filter_block1d16_v6_sse2) PRIVATE
- sym(vp8_filter_block1d16_v6_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -534,7 +534,7 @@ sym(vp8_filter_block1d16_v6_sse2):
- ;    const short    *vp8_filter
- ;)
- ; First-pass filter only when yoffset==0
--global sym(vp8_filter_block1d8_h6_only_sse2)
-+global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE
- sym(vp8_filter_block1d8_h6_only_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -647,7 +647,7 @@ sym(vp8_filter_block1d8_h6_only_sse2):
- ;    const short    *vp8_filter
- ;)
- ; First-pass filter only when yoffset==0
--global sym(vp8_filter_block1d16_h6_only_sse2)
-+global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE
- sym(vp8_filter_block1d16_h6_only_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -812,7 +812,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
- ;    const short    *vp8_filter
- ;)
- ; Second-pass filter only when xoffset==0
--global sym(vp8_filter_block1d8_v6_only_sse2)
-+global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE
- sym(vp8_filter_block1d8_v6_only_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -904,7 +904,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
- ;    unsigned int    output_height,
- ;    unsigned int    output_width
- ;)
--global sym(vp8_unpack_block1d16_h6_sse2)
-+global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE
- sym(vp8_unpack_block1d16_h6_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -963,7 +963,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
- ;    int dst_pitch
- ;)
- extern sym(vp8_bilinear_filters_x86_8)
--global sym(vp8_bilinear_predict16x16_sse2)
-+global sym(vp8_bilinear_predict16x16_sse2) PRIVATE
- sym(vp8_bilinear_predict16x16_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -1231,7 +1231,7 @@ sym(vp8_bilinear_predict16x16_sse2):
- ;    unsigned char *dst_ptr,
- ;    int dst_pitch
- ;)
--global sym(vp8_bilinear_predict8x8_sse2)
-+global sym(vp8_bilinear_predict8x8_sse2) PRIVATE
- sym(vp8_bilinear_predict8x8_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
-index 6bca82b..c06f245 100644
---- a/vp8/common/x86/subpixel_ssse3.asm
-+++ b/vp8/common/x86/subpixel_ssse3.asm
-@@ -34,7 +34,7 @@
- ;    unsigned int    output_height,
- ;    unsigned int    vp8_filter_index
- ;)
--global sym(vp8_filter_block1d8_h6_ssse3)
-+global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE
- sym(vp8_filter_block1d8_h6_ssse3):
-     push        rbp
-     mov         rbp, rsp
-@@ -177,7 +177,7 @@ vp8_filter_block1d8_h4_ssse3:
- ;    unsigned int    output_height,
- ;    unsigned int    vp8_filter_index
- ;)
--global sym(vp8_filter_block1d16_h6_ssse3)
-+global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE
- sym(vp8_filter_block1d16_h6_ssse3):
-     push        rbp
-     mov         rbp, rsp
-@@ -284,7 +284,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
- ;    unsigned int    output_height,
- ;    unsigned int    vp8_filter_index
- ;)
--global sym(vp8_filter_block1d4_h6_ssse3)
-+global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE
- sym(vp8_filter_block1d4_h6_ssse3):
-     push        rbp
-     mov         rbp, rsp
-@@ -352,6 +352,7 @@ sym(vp8_filter_block1d4_h6_ssse3):
-     pop rdi
-     pop rsi
-     RESTORE_GOT
-+    RESTORE_XMM
-     UNSHADOW_ARGS
-     pop         rbp
-     ret
-@@ -413,7 +414,7 @@ sym(vp8_filter_block1d4_h6_ssse3):
- ;    unsigned int   output_height,
- ;    unsigned int   vp8_filter_index
- ;)
--global sym(vp8_filter_block1d16_v6_ssse3)
-+global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE
- sym(vp8_filter_block1d16_v6_ssse3):
-     push        rbp
-     mov         rbp, rsp
-@@ -601,7 +602,7 @@ sym(vp8_filter_block1d16_v6_ssse3):
- ;    unsigned int   output_height,
- ;    unsigned int   vp8_filter_index
- ;)
--global sym(vp8_filter_block1d8_v6_ssse3)
-+global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE
- sym(vp8_filter_block1d8_v6_ssse3):
-     push        rbp
-     mov         rbp, rsp
-@@ -741,7 +742,7 @@ sym(vp8_filter_block1d8_v6_ssse3):
- ;    unsigned int   output_height,
- ;    unsigned int   vp8_filter_index
- ;)
--global sym(vp8_filter_block1d4_v6_ssse3)
-+global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE
- sym(vp8_filter_block1d4_v6_ssse3):
-     push        rbp
-     mov         rbp, rsp
-@@ -880,7 +881,7 @@ sym(vp8_filter_block1d4_v6_ssse3):
- ;    unsigned char *dst_ptr,
- ;    int dst_pitch
- ;)
--global sym(vp8_bilinear_predict16x16_ssse3)
-+global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE
- sym(vp8_bilinear_predict16x16_ssse3):
-     push        rbp
-     mov         rbp, rsp
-@@ -1143,7 +1144,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
- ;    unsigned char *dst_ptr,
- ;    int dst_pitch
- ;)
--global sym(vp8_bilinear_predict8x8_ssse3)
-+global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE
- sym(vp8_bilinear_predict8x8_ssse3):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/variance_impl_mmx.asm b/vp8/common/x86/variance_impl_mmx.asm
-index 2be8bbe..d9120d0 100644
---- a/vp8/common/x86/variance_impl_mmx.asm
-+++ b/vp8/common/x86/variance_impl_mmx.asm
-@@ -12,7 +12,7 @@
- %include "vpx_ports/x86_abi_support.asm"
- 
- ;unsigned int vp8_get_mb_ss_mmx( short *src_ptr )
--global sym(vp8_get_mb_ss_mmx)
-+global sym(vp8_get_mb_ss_mmx) PRIVATE
- sym(vp8_get_mb_ss_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -72,7 +72,7 @@ sym(vp8_get_mb_ss_mmx):
- ;    unsigned int *SSE,
- ;    int *Sum
- ;)
--global sym(vp8_get8x8var_mmx)
-+global sym(vp8_get8x8var_mmx) PRIVATE
- sym(vp8_get8x8var_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -320,7 +320,7 @@ sym(vp8_get8x8var_mmx):
- ;    unsigned int *SSE,
- ;    int *Sum
- ;)
--global sym(vp8_get4x4var_mmx)
-+global sym(vp8_get4x4var_mmx) PRIVATE
- sym(vp8_get4x4var_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -433,7 +433,7 @@ sym(vp8_get4x4var_mmx):
- ;    unsigned char *ref_ptr,
- ;    int  recon_stride
- ;)
--global sym(vp8_get4x4sse_cs_mmx)
-+global sym(vp8_get4x4sse_cs_mmx) PRIVATE
- sym(vp8_get4x4sse_cs_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -522,7 +522,7 @@ sym(vp8_get4x4sse_cs_mmx):
- ;    int *sum,
- ;    unsigned int *sumsquared
- ;)
--global sym(vp8_filter_block2d_bil4x4_var_mmx)
-+global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
- sym(vp8_filter_block2d_bil4x4_var_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -667,7 +667,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx):
- ;    int *sum,
- ;    unsigned int *sumsquared
- ;)
--global sym(vp8_filter_block2d_bil_var_mmx)
-+global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
- sym(vp8_filter_block2d_bil_var_mmx):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/variance_impl_sse2.asm b/vp8/common/x86/variance_impl_sse2.asm
-index 7629220..761433c 100644
---- a/vp8/common/x86/variance_impl_sse2.asm
-+++ b/vp8/common/x86/variance_impl_sse2.asm
-@@ -17,7 +17,7 @@
- ;(
- ;    short *src_ptr
- ;)
--global sym(vp8_get_mb_ss_sse2)
-+global sym(vp8_get_mb_ss_sse2) PRIVATE
- sym(vp8_get_mb_ss_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -80,7 +80,7 @@ sym(vp8_get_mb_ss_sse2):
- ;    unsigned int    *  SSE,
- ;    int             *  Sum
- ;)
--global sym(vp8_get16x16var_sse2)
-+global sym(vp8_get16x16var_sse2) PRIVATE
- sym(vp8_get16x16var_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -224,7 +224,7 @@ sym(vp8_get16x16var_sse2):
- ;    unsigned int    *  SSE,
- ;    int             *  Sum
- ;)
--global sym(vp8_get8x8var_sse2)
-+global sym(vp8_get8x8var_sse2) PRIVATE
- sym(vp8_get8x8var_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -413,7 +413,7 @@ sym(vp8_get8x8var_sse2):
- ;    unsigned int *sumsquared;;
- ;
- ;)
--global sym(vp8_filter_block2d_bil_var_sse2)
-+global sym(vp8_filter_block2d_bil_var_sse2) PRIVATE
- sym(vp8_filter_block2d_bil_var_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -690,7 +690,7 @@ filter_block2d_bil_variance:
- ;    int *sum,
- ;    unsigned int *sumsquared
- ;)
--global sym(vp8_half_horiz_vert_variance8x_h_sse2)
-+global sym(vp8_half_horiz_vert_variance8x_h_sse2) PRIVATE
- sym(vp8_half_horiz_vert_variance8x_h_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -812,7 +812,7 @@ vp8_half_horiz_vert_variance8x_h_1:
- ;    int *sum,
- ;    unsigned int *sumsquared
- ;)
--global sym(vp8_half_horiz_vert_variance16x_h_sse2)
-+global sym(vp8_half_horiz_vert_variance16x_h_sse2) PRIVATE
- sym(vp8_half_horiz_vert_variance16x_h_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -928,7 +928,7 @@ vp8_half_horiz_vert_variance16x_h_1:
- ;    int *sum,
- ;    unsigned int *sumsquared
- ;)
--global sym(vp8_half_vert_variance8x_h_sse2)
-+global sym(vp8_half_vert_variance8x_h_sse2) PRIVATE
- sym(vp8_half_vert_variance8x_h_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -1035,7 +1035,7 @@ vp8_half_vert_variance8x_h_1:
- ;    int *sum,
- ;    unsigned int *sumsquared
- ;)
--global sym(vp8_half_vert_variance16x_h_sse2)
-+global sym(vp8_half_vert_variance16x_h_sse2) PRIVATE
- sym(vp8_half_vert_variance16x_h_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -1143,7 +1143,7 @@ vp8_half_vert_variance16x_h_1:
- ;    int *sum,
- ;    unsigned int *sumsquared
- ;)
--global sym(vp8_half_horiz_variance8x_h_sse2)
-+global sym(vp8_half_horiz_variance8x_h_sse2) PRIVATE
- sym(vp8_half_horiz_variance8x_h_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -1248,7 +1248,7 @@ vp8_half_horiz_variance8x_h_1:
- ;    int *sum,
- ;    unsigned int *sumsquared
- ;)
--global sym(vp8_half_horiz_variance16x_h_sse2)
-+global sym(vp8_half_horiz_variance16x_h_sse2) PRIVATE
- sym(vp8_half_horiz_variance16x_h_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/variance_impl_ssse3.asm b/vp8/common/x86/variance_impl_ssse3.asm
-index 97e8b0e..686b4a9 100644
---- a/vp8/common/x86/variance_impl_ssse3.asm
-+++ b/vp8/common/x86/variance_impl_ssse3.asm
-@@ -29,7 +29,7 @@
- ;)
- ;Note: The filter coefficient at offset=0 is 128. Since the second register
- ;for Pmaddubsw is signed bytes, we must calculate zero offset seperately.
--global sym(vp8_filter_block2d_bil_var_ssse3)
-+global sym(vp8_filter_block2d_bil_var_ssse3) PRIVATE
- sym(vp8_filter_block2d_bil_var_ssse3):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/common/x86/variance_sse2.c b/vp8/common/x86/variance_sse2.c
-index 2769a30..afd6429 100644
---- a/vp8/common/x86/variance_sse2.c
-+++ b/vp8/common/x86/variance_sse2.c
-@@ -332,8 +332,9 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
-     unsigned int xxsum0, xxsum1;
- 
- 
--    // note we could avoid these if statements if the calling function
--    // just called the appropriate functions inside.
-+    /* note we could avoid these if statements if the calling function
-+     * just called the appropriate functions inside.
-+     */
-     if (xoffset == 4 && yoffset == 0)
-     {
-         vp8_half_horiz_variance16x_h_sse2(
-diff --git a/vp8/common/x86/variance_ssse3.c b/vp8/common/x86/variance_ssse3.c
-index 1be0d92..ba2055c 100644
---- a/vp8/common/x86/variance_ssse3.c
-+++ b/vp8/common/x86/variance_ssse3.c
-@@ -79,8 +79,9 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
-     int xsum0;
-     unsigned int xxsum0;
- 
--    // note we could avoid these if statements if the calling function
--    // just called the appropriate functions inside.
-+    /* note we could avoid these if statements if the calling function
-+     * just called the appropriate functions inside.
-+     */
-     if (xoffset == 4 && yoffset == 0)
-     {
-         vp8_half_horiz_variance16x_h_sse2(
-diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
-index 23a7fdc..3437a23 100644
---- a/vp8/common/x86/vp8_asm_stubs.c
-+++ b/vp8/common/x86/vp8_asm_stubs.c
-@@ -438,19 +438,35 @@ void vp8_sixtap_predict16x16_ssse3
-     {
-         if (yoffset)
-         {
--            vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
--            vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
-+            vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
-+                                          src_pixels_per_line, FData2,
-+                                          16, 21, xoffset);
-+            vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch,
-+                                          16, yoffset);
-         }
-         else
-         {
-             /* First-pass only */
--            vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
-+            vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
-+                                          dst_ptr, dst_pitch, 16, xoffset);
-         }
-     }
-     else
-     {
--        /* Second-pass only */
--        vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
-+        if (yoffset)
-+        {
-+            /* Second-pass only */
-+            vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
-+                                          src_pixels_per_line,
-+                                          dst_ptr, dst_pitch, 16, yoffset);
-+        }
-+        else
-+        {
-+            /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
-+             * yoffset==0) case correctly. Add copy function here to guarantee
-+             * six-tap function handles all possible offsets. */
-+            vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
-+        }
-     }
- }
- 
-@@ -470,18 +486,34 @@ void vp8_sixtap_predict8x8_ssse3
-     {
-         if (yoffset)
-         {
--            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
--            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
-+            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
-+                                         src_pixels_per_line, FData2,
-+                                         8, 13, xoffset);
-+            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
-+                                         8, yoffset);
-         }
-         else
-         {
--            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
-+            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
-+                                         dst_ptr, dst_pitch, 8, xoffset);
-         }
-     }
-     else
-     {
--        /* Second-pass only */
--        vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
-+        if (yoffset)
-+        {
-+            /* Second-pass only */
-+            vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
-+                                         src_pixels_per_line,
-+                                         dst_ptr, dst_pitch, 8, yoffset);
-+        }
-+        else
-+        {
-+            /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
-+             * yoffset==0) case correctly. Add copy function here to guarantee
-+             * six-tap function handles all possible offsets. */
-+            vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
-+        }
-     }
- }
- 
-@@ -502,19 +534,35 @@ void vp8_sixtap_predict8x4_ssse3
-     {
-         if (yoffset)
-         {
--            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
--            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
-+            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
-+                                         src_pixels_per_line, FData2,
-+                                         8, 9, xoffset);
-+            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
-+                                         4, yoffset);
-         }
-         else
-         {
-             /* First-pass only */
--            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
-+            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
-+                                         dst_ptr, dst_pitch, 4, xoffset);
-         }
-     }
-     else
-     {
--        /* Second-pass only */
--        vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
-+        if (yoffset)
-+        {
-+            /* Second-pass only */
-+            vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
-+                                         src_pixels_per_line,
-+                                         dst_ptr, dst_pitch, 4, yoffset);
-+        }
-+        else
-+        {
-+            /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
-+             * yoffset==0) case correctly. Add copy function here to guarantee
-+             * six-tap function handles all possible offsets. */
-+            vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
-+        }
-     }
- }
- 
-@@ -534,19 +582,48 @@ void vp8_sixtap_predict4x4_ssse3
-   {
-       if (yoffset)
-       {
--          vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
--          vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
-+          vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
-+                                       src_pixels_per_line,
-+                                       FData2, 4, 9, xoffset);
-+          vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch,
-+                                       4, yoffset);
-       }
-       else
-       {
--          vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
-+          vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
-+                                       dst_ptr, dst_pitch, 4, xoffset);
-       }
-   }
-   else
-   {
--      vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
-+      if (yoffset)
-+      {
-+          vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
-+                                       src_pixels_per_line,
-+                                       dst_ptr, dst_pitch, 4, yoffset);
-+      }
-+      else
-+      {
-+        /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
-+          * yoffset==0) case correctly. Add copy function here to guarantee
-+          * six-tap function handles all possible offsets. */
-+          int r;
-+
-+          for (r = 0; r < 4; r++)
-+          {
-+  #if !(CONFIG_FAST_UNALIGNED)
-+            dst_ptr[0]  = src_ptr[0];
-+            dst_ptr[1]  = src_ptr[1];
-+            dst_ptr[2]  = src_ptr[2];
-+            dst_ptr[3]  = src_ptr[3];
-+  #else
-+              *(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ;
-+  #endif
-+              dst_ptr     += dst_pitch;
-+              src_ptr     += src_pixels_per_line;
-+          }
-+      }
-   }
--
- }
- 
- #endif
-diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
-index 880c185..1a08c05 100644
---- a/vp8/decoder/dboolhuff.h
-+++ b/vp8/decoder/dboolhuff.h
-@@ -55,7 +55,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br);
-         int loop_end, x; \
-         size_t bits_left = ((_bufend)-(_bufptr))*CHAR_BIT; \
-         \
--        x = shift + CHAR_BIT - bits_left; \
-+        x = (int)(shift + CHAR_BIT - bits_left); \
-         loop_end = 0; \
-         if(x >= 0) \
-         { \
-diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
-index 51e2420..8027a07 100644
---- a/vp8/decoder/decodemv.c
-+++ b/vp8/decoder/decodemv.c
-@@ -48,11 +48,11 @@ static MB_PREDICTION_MODE read_uv_mode(vp8_reader *bc, const vp8_prob *p)
- 
- static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi)
- {
--    vp8_reader *const bc = & pbi->bc;
-+    vp8_reader *const bc = & pbi->mbc[8];
-     const int mis = pbi->common.mode_info_stride;
- 
-     mi->mbmi.ref_frame = INTRA_FRAME;
--    mi->mbmi.mode = read_kf_ymode(bc, pbi->common.kf_ymode_prob);
-+    mi->mbmi.mode = read_kf_ymode(bc, vp8_kf_ymode_prob);
- 
-     if (mi->mbmi.mode == B_PRED)
-     {
-@@ -65,12 +65,12 @@ static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi)
-             const B_PREDICTION_MODE L = left_block_mode(mi, i);
- 
-             mi->bmi[i].as_mode =
--                read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]);
-+                read_bmode(bc, vp8_kf_bmode_prob [A] [L]);
-         }
-         while (++i < 16);
-     }
- 
--    mi->mbmi.uv_mode = read_uv_mode(bc, pbi->common.kf_uv_mode_prob);
-+    mi->mbmi.uv_mode = read_uv_mode(bc, vp8_kf_uv_mode_prob);
- }
- 
- static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
-@@ -150,7 +150,7 @@ static const unsigned char mbsplit_fill_offset[4][16] = {
- 
- static void mb_mode_mv_init(VP8D_COMP *pbi)
- {
--    vp8_reader *const bc = & pbi->bc;
-+    vp8_reader *const bc = & pbi->mbc[8];
-     MV_CONTEXT *const mvc = pbi->common.fc.mvc;
- 
- #if CONFIG_ERROR_CONCEALMENT
-@@ -159,6 +159,9 @@ static void mb_mode_mv_init(VP8D_COMP *pbi)
-      * outside the frame. */
-     pbi->mvs_corrupt_from_mb = UINT_MAX;
- #endif
-+    /* Read the mb_no_coeff_skip flag */
-+    pbi->common.mb_no_coeff_skip = (int)vp8_read_bit(bc);
-+
-     pbi->prob_skip_false = 0;
-     if (pbi->common.mb_no_coeff_skip)
-         pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
-@@ -293,26 +296,24 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi,
-                     blockmv.as_mv.row += best_mv.as_mv.row;
-                     blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) << 1;
-                     blockmv.as_mv.col += best_mv.as_mv.col;
--
--                    mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv,
--                                                              mb_to_left_edge,
--                                                              mb_to_right_edge,
--                                                              mb_to_top_edge,
--                                                              mb_to_bottom_edge);
-                 }
-             }
-             else
-             {
-                 blockmv.as_int = abovemv.as_int;
--                mbmi->need_to_clamp_mvs |= above_mb->mbmi.need_to_clamp_mvs;
-             }
-         }
-         else
-         {
-             blockmv.as_int = leftmv.as_int;
--            mbmi->need_to_clamp_mvs |= left_mb->mbmi.need_to_clamp_mvs;
-         }
- 
-+        mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv,
-+                                                  mb_to_left_edge,
-+                                                  mb_to_right_edge,
-+                                                  mb_to_top_edge,
-+                                                  mb_to_bottom_edge);
-+
-         {
-             /* Fill (uniform) modes, mvs of jth subset.
-              Must do it here because ensuing subsets can
-@@ -337,7 +338,7 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi,
- 
- static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi)
- {
--    vp8_reader *const bc = & pbi->bc;
-+    vp8_reader *const bc = & pbi->mbc[8];
-     mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra);
-     if (mbmi->ref_frame)    /* inter MB */
-     {
-@@ -595,14 +596,14 @@ static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi,
-      * By default on a key frame reset all MBs to segment 0
-      */
-     if (pbi->mb.update_mb_segmentation_map)
--        read_mb_features(&pbi->bc, &mi->mbmi, &pbi->mb);
-+        read_mb_features(&pbi->mbc[8], &mi->mbmi, &pbi->mb);
-     else if(pbi->common.frame_type == KEY_FRAME)
-         mi->mbmi.segment_id = 0;
- 
-     /* Read the macroblock coeff skip flag if this feature is in use,
-      * else default to 0 */
-     if (pbi->common.mb_no_coeff_skip)
--        mi->mbmi.mb_skip_coeff = vp8_read(&pbi->bc, pbi->prob_skip_false);
-+        mi->mbmi.mb_skip_coeff = vp8_read(&pbi->mbc[8], pbi->prob_skip_false);
-     else
-         mi->mbmi.mb_skip_coeff = 0;
- 
-@@ -644,7 +645,8 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
- #if CONFIG_ERROR_CONCEALMENT
-             /* look for corruption. set mvs_corrupt_from_mb to the current
-              * mb_num if the frame is corrupt from this macroblock. */
--            if (vp8dx_bool_error(&pbi->bc) && mb_num < pbi->mvs_corrupt_from_mb)
-+            if (vp8dx_bool_error(&pbi->mbc[8]) && mb_num <
-+                (int)pbi->mvs_corrupt_from_mb)
-             {
-                 pbi->mvs_corrupt_from_mb = mb_num;
-                 /* no need to continue since the partition is corrupt from
-diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
-index 62a068b..a4a00f6 100644
---- a/vp8/decoder/decodframe.c
-+++ b/vp8/decoder/decodframe.c
-@@ -177,7 +177,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
-         {
-             short *DQC = xd->dequant_y1;
-             int dst_stride = xd->dst.y_stride;
--            unsigned char *base_dst = xd->dst.y_buffer;
- 
-             /* clear out residual eob info */
-             if(xd->mode_info_context->mbmi.mb_skip_coeff)
-@@ -188,38 +187,29 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
-             for (i = 0; i < 16; i++)
-             {
-                 BLOCKD *b = &xd->block[i];
--                int b_mode = xd->mode_info_context->bmi[i].as_mode;
--                unsigned char *yabove;
--                unsigned char *yleft;
--                int left_stride;
--                unsigned char top_left;
--
--                yabove = base_dst + b->offset - dst_stride;
--                yleft = base_dst + b->offset - 1;
--                left_stride = dst_stride;
--                top_left = yabove[-1];
--
--                //                vp8_intra4x4_predict (base_dst + b->offset, dst_stride, b_mode,
--                  //                                    base_dst + b->offset, dst_stride );
--                vp8_intra4x4_predict_d_c(yabove, yleft, left_stride,
--                                       b_mode,
--                                       base_dst + b->offset, dst_stride,
--                                       top_left);
-+                unsigned char *dst = xd->dst.y_buffer + b->offset;
-+                B_PREDICTION_MODE b_mode =
-+                    xd->mode_info_context->bmi[i].as_mode;
-+                unsigned char *Above = dst - dst_stride;
-+                unsigned char *yleft = dst - 1;
-+                int left_stride = dst_stride;
-+                unsigned char top_left = Above[-1];
-+
-+                vp8_intra4x4_predict(Above, yleft, left_stride, b_mode,
-+                                     dst, dst_stride, top_left);
- 
-                 if (xd->eobs[i])
-                 {
-                     if (xd->eobs[i] > 1)
-                     {
--                    vp8_dequant_idct_add
--                            (b->qcoeff, DQC,
--                                base_dst + b->offset, dst_stride);
-+                    vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride);
-                     }
-                     else
-                     {
-                         vp8_dc_only_idct_add
-                             (b->qcoeff[0] * DQC[0],
--                                base_dst + b->offset, dst_stride,
--                                base_dst + b->offset, dst_stride);
-+                                dst, dst_stride,
-+                                dst, dst_stride);
-                         ((int *)b->qcoeff)[0] = 0;
-                     }
-                 }
-@@ -317,48 +307,253 @@ static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
- FILE *vpxlog = 0;
- #endif
- 
-+static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
-+{
-+    int i;
-+    unsigned char *src_ptr1;
-+    unsigned char *dest_ptr1;
-+
-+    unsigned int Border;
-+    int plane_stride;
-+
-+    /***********/
-+    /* Y Plane */
-+    /***********/
-+    Border = ybf->border;
-+    plane_stride = ybf->y_stride;
-+    src_ptr1 = ybf->y_buffer - Border;
-+    dest_ptr1 = src_ptr1 - (Border * plane_stride);
-+
-+    for (i = 0; i < (int)Border; i++)
-+    {
-+        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-+        dest_ptr1 += plane_stride;
-+    }
-+
-+
-+    /***********/
-+    /* U Plane */
-+    /***********/
-+    plane_stride = ybf->uv_stride;
-+    Border /= 2;
-+    src_ptr1 = ybf->u_buffer - Border;
-+    dest_ptr1 = src_ptr1 - (Border * plane_stride);
-+
-+    for (i = 0; i < (int)(Border); i++)
-+    {
-+        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-+        dest_ptr1 += plane_stride;
-+    }
-+
-+    /***********/
-+    /* V Plane */
-+    /***********/
-+
-+    src_ptr1 = ybf->v_buffer - Border;
-+    dest_ptr1 = src_ptr1 - (Border * plane_stride);
-+
-+    for (i = 0; i < (int)(Border); i++)
-+    {
-+        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-+        dest_ptr1 += plane_stride;
-+    }
-+}
-+
-+static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
-+{
-+    int i;
-+    unsigned char *src_ptr1, *src_ptr2;
-+    unsigned char *dest_ptr2;
-+
-+    unsigned int Border;
-+    int plane_stride;
-+    int plane_height;
-+
-+    /***********/
-+    /* Y Plane */
-+    /***********/
-+    Border = ybf->border;
-+    plane_stride = ybf->y_stride;
-+    plane_height = ybf->y_height;
-+
-+    src_ptr1 = ybf->y_buffer - Border;
-+    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-+    dest_ptr2 = src_ptr2 + plane_stride;
-+
-+    for (i = 0; i < (int)Border; i++)
-+    {
-+        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-+        dest_ptr2 += plane_stride;
-+    }
-+
-+
-+    /***********/
-+    /* U Plane */
-+    /***********/
-+    plane_stride = ybf->uv_stride;
-+    plane_height = ybf->uv_height;
-+    Border /= 2;
-+
-+    src_ptr1 = ybf->u_buffer - Border;
-+    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-+    dest_ptr2 = src_ptr2 + plane_stride;
-+
-+    for (i = 0; i < (int)(Border); i++)
-+    {
-+        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-+        dest_ptr2 += plane_stride;
-+    }
-+
-+    /***********/
-+    /* V Plane */
-+    /***********/
-+
-+    src_ptr1 = ybf->v_buffer - Border;
-+    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-+    dest_ptr2 = src_ptr2 + plane_stride;
-+
-+    for (i = 0; i < (int)(Border); i++)
-+    {
-+        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-+        dest_ptr2 += plane_stride;
-+    }
-+}
-+
-+static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
-+                                           unsigned char *y_src,
-+                                           unsigned char *u_src,
-+                                           unsigned char *v_src)
-+{
-+    int i;
-+    unsigned char *src_ptr1, *src_ptr2;
-+    unsigned char *dest_ptr1, *dest_ptr2;
-+
-+    unsigned int Border;
-+    int plane_stride;
-+    int plane_height;
-+    int plane_width;
-+
-+    /***********/
-+    /* Y Plane */
-+    /***********/
-+    Border = ybf->border;
-+    plane_stride = ybf->y_stride;
-+    plane_height = 16;
-+    plane_width = ybf->y_width;
-+
-+    /* copy the left and right most columns out */
-+    src_ptr1 = y_src;
-+    src_ptr2 = src_ptr1 + plane_width - 1;
-+    dest_ptr1 = src_ptr1 - Border;
-+    dest_ptr2 = src_ptr2 + 1;
-+
-+    for (i = 0; i < plane_height; i++)
-+    {
-+        vpx_memset(dest_ptr1, src_ptr1[0], Border);
-+        vpx_memset(dest_ptr2, src_ptr2[0], Border);
-+        src_ptr1  += plane_stride;
-+        src_ptr2  += plane_stride;
-+        dest_ptr1 += plane_stride;
-+        dest_ptr2 += plane_stride;
-+    }
-+
-+    /***********/
-+    /* U Plane */
-+    /***********/
-+    plane_stride = ybf->uv_stride;
-+    plane_height = 8;
-+    plane_width = ybf->uv_width;
-+    Border /= 2;
-+
-+    /* copy the left and right most columns out */
-+    src_ptr1 = u_src;
-+    src_ptr2 = src_ptr1 + plane_width - 1;
-+    dest_ptr1 = src_ptr1 - Border;
-+    dest_ptr2 = src_ptr2 + 1;
-+
-+    for (i = 0; i < plane_height; i++)
-+    {
-+        vpx_memset(dest_ptr1, src_ptr1[0], Border);
-+        vpx_memset(dest_ptr2, src_ptr2[0], Border);
-+        src_ptr1  += plane_stride;
-+        src_ptr2  += plane_stride;
-+        dest_ptr1 += plane_stride;
-+        dest_ptr2 += plane_stride;
-+    }
-+
-+    /***********/
-+    /* V Plane */
-+    /***********/
-+
-+    /* copy the left and right most columns out */
-+    src_ptr1 = v_src;
-+    src_ptr2 = src_ptr1 + plane_width - 1;
-+    dest_ptr1 = src_ptr1 - Border;
-+    dest_ptr2 = src_ptr2 + 1;
-+
-+    for (i = 0; i < plane_height; i++)
-+    {
-+        vpx_memset(dest_ptr1, src_ptr1[0], Border);
-+        vpx_memset(dest_ptr2, src_ptr2[0], Border);
-+        src_ptr1  += plane_stride;
-+        src_ptr2  += plane_stride;
-+        dest_ptr1 += plane_stride;
-+        dest_ptr2 += plane_stride;
-+    }
-+}
-+
- static void decode_mb_rows(VP8D_COMP *pbi)
- {
-     VP8_COMMON *const pc = & pbi->common;
-     MACROBLOCKD *const xd  = & pbi->mb;
- 
-+    MODE_INFO *lf_mic = xd->mode_info_context;
-+
-     int ibc = 0;
-     int num_part = 1 << pc->multi_token_partition;
- 
-     int recon_yoffset, recon_uvoffset;
-     int mb_row, mb_col;
-     int mb_idx = 0;
--    int dst_fb_idx = pc->new_fb_idx;
--    int recon_y_stride = pc->yv12_fb[dst_fb_idx].y_stride;
--    int recon_uv_stride = pc->yv12_fb[dst_fb_idx].uv_stride;
-+
-+    YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
-+
-+    int recon_y_stride = yv12_fb_new->y_stride;
-+    int recon_uv_stride = yv12_fb_new->uv_stride;
- 
-     unsigned char *ref_buffer[MAX_REF_FRAMES][3];
-     unsigned char *dst_buffer[3];
-+    unsigned char *lf_dst[3];
-+    unsigned char *eb_dst[3];
-     int i;
--    int ref_fb_index[MAX_REF_FRAMES];
-     int ref_fb_corrupted[MAX_REF_FRAMES];
- 
-     ref_fb_corrupted[INTRA_FRAME] = 0;
- 
--    ref_fb_index[LAST_FRAME]    = pc->lst_fb_idx;
--    ref_fb_index[GOLDEN_FRAME]  = pc->gld_fb_idx;
--    ref_fb_index[ALTREF_FRAME]  = pc->alt_fb_idx;
--
-     for(i = 1; i < MAX_REF_FRAMES; i++)
-     {
--        ref_buffer[i][0] = pc->yv12_fb[ref_fb_index[i]].y_buffer;
--        ref_buffer[i][1] = pc->yv12_fb[ref_fb_index[i]].u_buffer;
--        ref_buffer[i][2] = pc->yv12_fb[ref_fb_index[i]].v_buffer;
-+        YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i];
-+
-+        ref_buffer[i][0] = this_fb->y_buffer;
-+        ref_buffer[i][1] = this_fb->u_buffer;
-+        ref_buffer[i][2] = this_fb->v_buffer;
- 
--        ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted;
-+        ref_fb_corrupted[i] = this_fb->corrupted;
-     }
- 
--    dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer;
--    dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer;
--    dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer;
-+    /* Set up the buffer pointers */
-+    eb_dst[0] = lf_dst[0] = dst_buffer[0] = yv12_fb_new->y_buffer;
-+    eb_dst[1] = lf_dst[1] = dst_buffer[1] = yv12_fb_new->u_buffer;
-+    eb_dst[2] = lf_dst[2] = dst_buffer[2] = yv12_fb_new->v_buffer;
- 
-     xd->up_available = 0;
- 
-+    /* Initialize the loop filter for this frame. */
-+    if(pc->filter_level)
-+        vp8_loop_filter_frame_init(pc, xd, pc->filter_level);
-+
-+    vp8_setup_intra_recon_top_line(yv12_fb_new);
-+
-     /* Decode the individual macro block */
-     for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
-     {
-@@ -395,10 +590,14 @@ static void decode_mb_rows(VP8D_COMP *pbi)
-         xd->recon_above[1] -= xd->dst.uv_stride;
-         xd->recon_above[2] -= xd->dst.uv_stride;
- 
--        //TODO: move to outside row loop
-+        /* TODO: move to outside row loop */
-         xd->recon_left_stride[0] = xd->dst.y_stride;
-         xd->recon_left_stride[1] = xd->dst.uv_stride;
- 
-+        setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1],
-+                               xd->recon_left[2], xd->dst.y_stride,
-+                               xd->dst.uv_stride);
-+
-         for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
-         {
-             /* Distance of Mb to the various image edges.
-@@ -460,26 +659,103 @@ static void decode_mb_rows(VP8D_COMP *pbi)
-             xd->recon_left[1] += 8;
-             xd->recon_left[2] += 8;
- 
--
-             recon_yoffset += 16;
-             recon_uvoffset += 8;
- 
-             ++xd->mode_info_context;  /* next mb */
- 
-             xd->above_context++;
--
-         }
- 
-         /* adjust to the next row of mbs */
--        vp8_extend_mb_row(
--            &pc->yv12_fb[dst_fb_idx],
--            xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
--        );
-+        vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16,
-+                          xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
- 
-         ++xd->mode_info_context;      /* skip prediction column */
-         xd->up_available = 1;
- 
-+        if(pc->filter_level)
-+        {
-+            if(mb_row > 0)
-+            {
-+                if (pc->filter_type == NORMAL_LOOPFILTER)
-+                    vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1,
-+                                               recon_y_stride, recon_uv_stride,
-+                                               lf_dst[0], lf_dst[1], lf_dst[2]);
-+                else
-+                    vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1,
-+                                               recon_y_stride, recon_uv_stride,
-+                                               lf_dst[0], lf_dst[1], lf_dst[2]);
-+
-+                if(mb_row > 1)
-+                {
-+                    yv12_extend_frame_left_right_c(yv12_fb_new,
-+                                                   eb_dst[0],
-+                                                   eb_dst[1],
-+                                                   eb_dst[2]);
-+
-+                    eb_dst[0] += recon_y_stride  * 16;
-+                    eb_dst[1] += recon_uv_stride *  8;
-+                    eb_dst[2] += recon_uv_stride *  8;
-+
-+                    if(mb_row == 2)
-+                        yv12_extend_frame_top_c(yv12_fb_new);
-+
-+                }
-+
-+                lf_dst[0] += recon_y_stride  * 16;
-+                lf_dst[1] += recon_uv_stride *  8;
-+                lf_dst[2] += recon_uv_stride *  8;
-+                lf_mic += pc->mb_cols;
-+                lf_mic++;         /* Skip border mb */
-+            }
-+        }
-+        else
-+        {
-+            if(mb_row > 0)
-+            {
-+                /**/
-+                yv12_extend_frame_left_right_c(yv12_fb_new,
-+                                               eb_dst[0],
-+                                               eb_dst[1],
-+                                               eb_dst[2]);
-+
-+                eb_dst[0] += recon_y_stride  * 16;
-+                eb_dst[1] += recon_uv_stride *  8;
-+                eb_dst[2] += recon_uv_stride *  8;
-+
-+                if(mb_row == 1)
-+                    yv12_extend_frame_top_c(yv12_fb_new);
-+            }
-+        }
-+    }
-+
-+    if(pc->filter_level)
-+    {
-+        if (pc->filter_type == NORMAL_LOOPFILTER)
-+            vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, recon_y_stride,
-+                                       recon_uv_stride, lf_dst[0], lf_dst[1],
-+                                       lf_dst[2]);
-+        else
-+            vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, recon_y_stride,
-+                                       recon_uv_stride, lf_dst[0], lf_dst[1],
-+                                       lf_dst[2]);
-+
-+        yv12_extend_frame_left_right_c(yv12_fb_new,
-+                                       eb_dst[0],
-+                                       eb_dst[1],
-+                                       eb_dst[2]);
-+        eb_dst[0] += recon_y_stride  * 16;
-+        eb_dst[1] += recon_uv_stride *  8;
-+        eb_dst[2] += recon_uv_stride *  8;
-     }
-+    yv12_extend_frame_left_right_c(yv12_fb_new,
-+                                   eb_dst[0],
-+                                   eb_dst[1],
-+                                   eb_dst[2]);
-+
-+    yv12_extend_frame_bottom_c(yv12_fb_new);
-+
- }
- 
- static unsigned int read_partition_size(const unsigned char *cx_size)
-@@ -519,13 +795,13 @@ static unsigned int read_available_partition_size(
-         if (read_is_valid(partition_size_ptr, 3, first_fragment_end))
-             partition_size = read_partition_size(partition_size_ptr);
-         else if (pbi->ec_active)
--            partition_size = bytes_left;
-+            partition_size = (unsigned int)bytes_left;
-         else
-             vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
-                                "Truncated partition size data");
-     }
-     else
--        partition_size = bytes_left;
-+        partition_size = (unsigned int)bytes_left;
- 
-     /* Validate the calculated partition length. If the buffer
-      * described by the partition can't be fully read, then restrict
-@@ -534,7 +810,7 @@ static unsigned int read_available_partition_size(
-     if (!read_is_valid(fragment_start, partition_size, fragment_end))
-     {
-         if (pbi->ec_active)
--            partition_size = bytes_left;
-+            partition_size = (unsigned int)bytes_left;
-         else
-             vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
-                                "Truncated packet or corrupt partition "
-@@ -547,24 +823,18 @@ static unsigned int read_available_partition_size(
- static void setup_token_decoder(VP8D_COMP *pbi,
-                                 const unsigned char* token_part_sizes)
- {
--    vp8_reader *bool_decoder = &pbi->bc2;
-+    vp8_reader *bool_decoder = &pbi->mbc[0];
-     unsigned int partition_idx;
--    int fragment_idx;
--    int num_token_partitions;
-+    unsigned int fragment_idx;
-+    unsigned int num_token_partitions;
-     const unsigned char *first_fragment_end = pbi->fragments[0] +
-                                           pbi->fragment_sizes[0];
- 
-     TOKEN_PARTITION multi_token_partition =
--            (TOKEN_PARTITION)vp8_read_literal(&pbi->bc, 2);
--    if (!vp8dx_bool_error(&pbi->bc))
-+            (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2);
-+    if (!vp8dx_bool_error(&pbi->mbc[8]))
-         pbi->common.multi_token_partition = multi_token_partition;
-     num_token_partitions = 1 << pbi->common.multi_token_partition;
--    if (num_token_partitions > 1)
--    {
--        CHECK_MEM_ERROR(pbi->mbc, vpx_malloc(num_token_partitions *
--                                             sizeof(vp8_reader)));
--        bool_decoder = pbi->mbc;
--    }
- 
-     /* Check for partitions within the fragments and unpack the fragments
-      * so that each fragment pointer points to its corresponding partition. */
-@@ -580,10 +850,10 @@ static void setup_token_decoder(VP8D_COMP *pbi,
-             /* Size of first partition + token partition sizes element */
-             ptrdiff_t ext_first_part_size = token_part_sizes -
-                 pbi->fragments[0] + 3 * (num_token_partitions - 1);
--            fragment_size -= ext_first_part_size;
-+            fragment_size -= (unsigned int)ext_first_part_size;
-             if (fragment_size > 0)
-             {
--                pbi->fragment_sizes[0] = ext_first_part_size;
-+                pbi->fragment_sizes[0] = (unsigned int)ext_first_part_size;
-                 /* The fragment contains an additional partition. Move to
-                  * next. */
-                 fragment_idx++;
-@@ -602,8 +872,8 @@ static void setup_token_decoder(VP8D_COMP *pbi,
-                                                  fragment_end,
-                                                  fragment_idx - 1,
-                                                  num_token_partitions);
--            pbi->fragment_sizes[fragment_idx] = partition_size;
--            fragment_size -= partition_size;
-+            pbi->fragment_sizes[fragment_idx] = (unsigned int)partition_size;
-+            fragment_size -= (unsigned int)partition_size;
-             assert(fragment_idx <= num_token_partitions);
-             if (fragment_size > 0)
-             {
-@@ -637,16 +907,6 @@ static void setup_token_decoder(VP8D_COMP *pbi,
- #endif
- }
- 
--static void stop_token_decoder(VP8D_COMP *pbi)
--{
--    VP8_COMMON *pc = &pbi->common;
--
--    if (pc->multi_token_partition != ONE_PARTITION)
--    {
--        vpx_free(pbi->mbc);
--        pbi->mbc = NULL;
--    }
--}
- 
- static void init_frame(VP8D_COMP *pbi)
- {
-@@ -661,7 +921,6 @@ static void init_frame(VP8D_COMP *pbi)
-         vp8_init_mbmode_probs(pc);
- 
-         vp8_default_coef_probs(pc);
--        vp8_kf_default_bmode_probs(pc->kf_bmode_prob);
- 
-         /* reset the segment feature data to 0 with delta coding (Default state). */
-         vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
-@@ -685,13 +944,8 @@ static void init_frame(VP8D_COMP *pbi)
-     }
-     else
-     {
--        if (!pc->use_bilinear_mc_filter)
--            pc->mcomp_filter_type = SIXTAP;
--        else
--            pc->mcomp_filter_type = BILINEAR;
--
-         /* To enable choice of different interploation filters */
--        if (pc->mcomp_filter_type == SIXTAP)
-+        if (!pc->use_bilinear_mc_filter)
-         {
-             xd->subpixel_predict        = vp8_sixtap_predict4x4;
-             xd->subpixel_predict8x4     = vp8_sixtap_predict8x4;
-@@ -725,7 +979,7 @@ static void init_frame(VP8D_COMP *pbi)
- 
- int vp8_decode_frame(VP8D_COMP *pbi)
- {
--    vp8_reader *const bc = & pbi->bc;
-+    vp8_reader *const bc = & pbi->mbc[8];
-     VP8_COMMON *const pc = & pbi->common;
-     MACROBLOCKD *const xd  = & pbi->mb;
-     const unsigned char *data = pbi->fragments[0];
-@@ -737,9 +991,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
-     int corrupt_tokens = 0;
-     int prev_independent_partitions = pbi->independent_partitions;
- 
-+    YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
-+
-     /* start with no corruption of current frame */
-     xd->corrupted = 0;
--    pc->yv12_fb[pc->new_fb_idx].corrupted = 0;
-+    yv12_fb_new->corrupted = 0;
- 
-     if (data_end - data < 3)
-     {
-@@ -774,11 +1030,9 @@ int vp8_decode_frame(VP8D_COMP *pbi)
- 
-         vp8_setup_version(pc);
- 
-+
-         if (pc->frame_type == KEY_FRAME)
-         {
--            const int Width = pc->Width;
--            const int Height = pc->Height;
--
-             /* vet via sync code */
-             /* When error concealment is enabled we should only check the sync
-              * code if we have enough bits available
-@@ -803,56 +1057,21 @@ int vp8_decode_frame(VP8D_COMP *pbi)
-             }
-             data += 7;
- 
--            if (Width != pc->Width  ||  Height != pc->Height)
--            {
--                int prev_mb_rows = pc->mb_rows;
--
--                if (pc->Width <= 0)
--                {
--                    pc->Width = Width;
--                    vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
--                                       "Invalid frame width");
--                }
--
--                if (pc->Height <= 0)
--                {
--                    pc->Height = Height;
--                    vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
--                                       "Invalid frame height");
--                }
--
--                if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height))
--                    vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
--                                       "Failed to allocate frame buffers");
--
--#if CONFIG_ERROR_CONCEALMENT
--                pbi->overlaps = NULL;
--                if (pbi->ec_enabled)
--                {
--                    if (vp8_alloc_overlap_lists(pbi))
--                        vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
--                                           "Failed to allocate overlap lists "
--                                           "for error concealment");
--                }
--#endif
--
--#if CONFIG_MULTITHREAD
--                if (pbi->b_multithreaded_rd)
--                    vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows);
--#endif
--            }
-+        }
-+        else
-+        {
-+          vpx_memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
-+          vpx_memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
-         }
-     }
--
--    if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) ||
--        pc->Width == 0 || pc->Height == 0)
-+    if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME))
-     {
-         return -1;
-     }
- 
-     init_frame(pbi);
- 
--    if (vp8dx_start_decode(bc, data, data_end - data))
-+    if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data)))
-         vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
-                            "Failed to allocate bool decoder 0");
-     if (pc->frame_type == KEY_FRAME) {
-@@ -961,7 +1180,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
- 
-     setup_token_decoder(pbi, data + first_partition_length_in_bytes);
- 
--    xd->current_bc = &pbi->bc2;
-+    xd->current_bc = &pbi->mbc[0];
- 
-     /* Read the default quantizers. */
-     {
-@@ -1094,26 +1313,9 @@ int vp8_decode_frame(VP8D_COMP *pbi)
-                     }
-     }
- 
--    vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
--    vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
--
--    /* set up frame new frame for intra coded blocks */
--#if CONFIG_MULTITHREAD
--    if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level))
--#endif
--        vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
--
--    vp8_setup_block_dptrs(xd);
--
--    vp8_build_block_doffsets(xd);
--
-     /* clear out the coeff buffer */
-     vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
- 
--    /* Read the mb_no_coeff_skip flag */
--    pc->mb_no_coeff_skip = (int)vp8_read_bit(bc);
--
--
-     vp8_decode_mode_mvs(pbi);
- 
- #if CONFIG_ERROR_CONCEALMENT
-@@ -1132,9 +1334,9 @@ int vp8_decode_frame(VP8D_COMP *pbi)
- #if CONFIG_MULTITHREAD
-     if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION)
-     {
--        int i;
-+        unsigned int i;
-         vp8mt_decode_mb_rows(pbi, xd);
--        vp8_yv12_extend_frame_borders(&pc->yv12_fb[pc->new_fb_idx]);    /*cm->frame_to_show);*/
-+        vp8_yv12_extend_frame_borders(yv12_fb_new);
-         for (i = 0; i < pbi->decoding_thread_count; ++i)
-             corrupt_tokens |= pbi->mb_row_di[i].mbd.corrupted;
-     }
-@@ -1145,18 +1347,16 @@ int vp8_decode_frame(VP8D_COMP *pbi)
-         corrupt_tokens |= xd->corrupted;
-     }
- 
--    stop_token_decoder(pbi);
--
-     /* Collect information about decoder corruption. */
-     /* 1. Check first boolean decoder for errors. */
--    pc->yv12_fb[pc->new_fb_idx].corrupted = vp8dx_bool_error(bc);
-+    yv12_fb_new->corrupted = vp8dx_bool_error(bc);
-     /* 2. Check the macroblock information */
--    pc->yv12_fb[pc->new_fb_idx].corrupted |= corrupt_tokens;
-+    yv12_fb_new->corrupted |= corrupt_tokens;
- 
-     if (!pbi->decoded_key_frame)
-     {
-         if (pc->frame_type == KEY_FRAME &&
--            !pc->yv12_fb[pc->new_fb_idx].corrupted)
-+            !yv12_fb_new->corrupted)
-             pbi->decoded_key_frame = 1;
-         else
-             vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME,
-@@ -1165,13 +1365,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
- 
-     /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes  \n",bc->pos+pbi->bc2.pos); */
- 
--    /* If this was a kf or Gf note the Q used */
--    if ((pc->frame_type == KEY_FRAME) ||
--         pc->refresh_golden_frame || pc->refresh_alt_ref_frame)
--    {
--        pc->last_kf_gf_q = pc->base_qindex;
--    }
--
-     if (pc->refresh_entropy_probs == 0)
-     {
-         vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
-diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
-index 0c39848..452ff6c 100644
---- a/vp8/decoder/detokenize.c
-+++ b/vp8/decoder/detokenize.c
-@@ -53,7 +53,8 @@ static const uint8_t kZigzag[16] = {
- #define NUM_PROBAS  11
- #define NUM_CTX  3
- 
--typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS];  // for const-casting
-+/* for const-casting */
-+typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS];
- 
- static int GetSigned(BOOL_DECODER *br, int value_to_sign)
- {
-diff --git a/vp8/decoder/error_concealment.c b/vp8/decoder/error_concealment.c
-index 7750728..8b2e32b 100644
---- a/vp8/decoder/error_concealment.c
-+++ b/vp8/decoder/error_concealment.c
-@@ -51,12 +51,13 @@ int vp8_alloc_overlap_lists(VP8D_COMP *pbi)
-         vpx_free(pbi->overlaps);
-         pbi->overlaps = NULL;
-     }
-+
-     pbi->overlaps = vpx_calloc(pbi->common.mb_rows * pbi->common.mb_cols,
-                                sizeof(MB_OVERLAP));
-+
-     if (pbi->overlaps == NULL)
-         return -1;
--    vpx_memset(pbi->overlaps, 0,
--               sizeof(MB_OVERLAP) * pbi->common.mb_rows * pbi->common.mb_cols);
-+
-     return 0;
- }
- 
-diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
-index c59ce25..8d6871b 100644
---- a/vp8/decoder/onyxd_if.c
-+++ b/vp8/decoder/onyxd_if.c
-@@ -80,6 +80,7 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
- 
- #if CONFIG_ERROR_CONCEALMENT
-     pbi->ec_enabled = oxcf->error_concealment;
-+    pbi->overlaps = NULL;
- #else
-     pbi->ec_enabled = 0;
- #endif
-@@ -99,6 +100,8 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
-      */
-     pbi->independent_partitions = 0;
- 
-+    vp8_setup_block_dptrs(&pbi->mb);
-+
-     return pbi;
- }
- 
-@@ -117,21 +120,20 @@ void vp8dx_remove_decompressor(VP8D_COMP *pbi)
-     vp8_de_alloc_overlap_lists(pbi);
- #endif
-     vp8_remove_common(&pbi->common);
--    vpx_free(pbi->mbc);
-     vpx_free(pbi);
- }
- 
- 
--vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-+vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd)
- {
-     VP8_COMMON *cm = &pbi->common;
-     int ref_fb_idx;
- 
--    if (ref_frame_flag == VP8_LAST_FLAG)
-+    if (ref_frame_flag == VP8_LAST_FRAME)
-         ref_fb_idx = cm->lst_fb_idx;
--    else if (ref_frame_flag == VP8_GOLD_FLAG)
-+    else if (ref_frame_flag == VP8_GOLD_FRAME)
-         ref_fb_idx = cm->gld_fb_idx;
--    else if (ref_frame_flag == VP8_ALT_FLAG)
-+    else if (ref_frame_flag == VP8_ALTR_FRAME)
-         ref_fb_idx = cm->alt_fb_idx;
-     else{
-         vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
-@@ -153,17 +155,17 @@ vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag,
- }
- 
- 
--vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-+vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd)
- {
-     VP8_COMMON *cm = &pbi->common;
-     int *ref_fb_ptr = NULL;
-     int free_fb;
- 
--    if (ref_frame_flag == VP8_LAST_FLAG)
-+    if (ref_frame_flag == VP8_LAST_FRAME)
-         ref_fb_ptr = &cm->lst_fb_idx;
--    else if (ref_frame_flag == VP8_GOLD_FLAG)
-+    else if (ref_frame_flag == VP8_GOLD_FRAME)
-         ref_fb_ptr = &cm->gld_fb_idx;
--    else if (ref_frame_flag == VP8_ALT_FLAG)
-+    else if (ref_frame_flag == VP8_ALTR_FRAME)
-         ref_fb_ptr = &cm->alt_fb_idx;
-     else{
-         vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
-@@ -279,28 +281,22 @@ static int swap_frame_buffers (VP8_COMMON *cm)
-     return err;
- }
- 
--int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsigned char *source, int64_t time_stamp)
-+int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
-+                                  const uint8_t *source,
-+                                  int64_t time_stamp)
- {
- #if HAVE_NEON
-     int64_t dx_store_reg[8];
- #endif
-     VP8_COMMON *cm = &pbi->common;
--    int retcode = 0;
--
--    /*if(pbi->ready_for_new_data == 0)
--        return -1;*/
--
--    if (pbi == 0)
--    {
--        return -1;
--    }
-+    int retcode = -1;
- 
-     pbi->common.error.error_code = VPX_CODEC_OK;
- 
-     if (pbi->num_fragments == 0)
-     {
-         /* New frame, reset fragment pointers and sizes */
--        vpx_memset(pbi->fragments, 0, sizeof(pbi->fragments));
-+        vpx_memset((void*)pbi->fragments, 0, sizeof(pbi->fragments));
-         vpx_memset(pbi->fragment_sizes, 0, sizeof(pbi->fragment_sizes));
-     }
-     if (pbi->input_fragments && !(source == NULL && size == 0))
-@@ -381,20 +377,14 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
- 
-     cm->new_fb_idx = get_free_fb (cm);
- 
-+    /* setup reference frames for vp8_decode_frame */
-+    pbi->dec_fb_ref[INTRA_FRAME]  = &cm->yv12_fb[cm->new_fb_idx];
-+    pbi->dec_fb_ref[LAST_FRAME]   = &cm->yv12_fb[cm->lst_fb_idx];
-+    pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx];
-+    pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx];
-+
-     if (setjmp(pbi->common.error.jmp))
-     {
--#if HAVE_NEON
--#if CONFIG_RUNTIME_CPU_DETECT
--        if (cm->cpu_caps & HAS_NEON)
--#endif
--        {
--            vp8_pop_neon(dx_store_reg);
--        }
--#endif
--        pbi->common.error.setjmp = 0;
--
--        pbi->num_fragments = 0;
--
-        /* We do not know if the missing frame(s) was supposed to update
-         * any of the reference buffers, but we act conservative and
-         * mark only the last buffer as corrupted.
-@@ -403,7 +393,8 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
- 
-         if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
-           cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
--        return -1;
-+
-+        goto decode_exit;
-     }
- 
-     pbi->common.error.setjmp = 1;
-@@ -412,68 +403,19 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
- 
-     if (retcode < 0)
-     {
--#if HAVE_NEON
--#if CONFIG_RUNTIME_CPU_DETECT
--        if (cm->cpu_caps & HAS_NEON)
--#endif
--        {
--            vp8_pop_neon(dx_store_reg);
--        }
--#endif
--        pbi->common.error.error_code = VPX_CODEC_ERROR;
--        pbi->common.error.setjmp = 0;
--        pbi->num_fragments = 0;
-         if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
-           cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
--        return retcode;
-+
-+        pbi->common.error.error_code = VPX_CODEC_ERROR;
-+        goto decode_exit;
-     }
- 
--#if CONFIG_MULTITHREAD
--    if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION)
--    {
--        if (swap_frame_buffers (cm))
--        {
--#if HAVE_NEON
--#if CONFIG_RUNTIME_CPU_DETECT
--            if (cm->cpu_caps & HAS_NEON)
--#endif
--            {
--                vp8_pop_neon(dx_store_reg);
--            }
--#endif
--            pbi->common.error.error_code = VPX_CODEC_ERROR;
--            pbi->common.error.setjmp = 0;
--            pbi->num_fragments = 0;
--            return -1;
--        }
--    } else
--#endif
-+    if (swap_frame_buffers (cm))
-     {
--        if (swap_frame_buffers (cm))
--        {
--#if HAVE_NEON
--#if CONFIG_RUNTIME_CPU_DETECT
--            if (cm->cpu_caps & HAS_NEON)
--#endif
--            {
--                vp8_pop_neon(dx_store_reg);
--            }
--#endif
--            pbi->common.error.error_code = VPX_CODEC_ERROR;
--            pbi->common.error.setjmp = 0;
--            pbi->num_fragments = 0;
--            return -1;
--        }
--
--        if(cm->filter_level)
--        {
--            /* Apply the loop filter if appropriate. */
--            vp8_loop_filter_frame(cm, &pbi->mb);
--        }
--        vp8_yv12_extend_frame_borders(cm->frame_to_show);
-+        pbi->common.error.error_code = VPX_CODEC_ERROR;
-+        goto decode_exit;
-     }
- 
--
-     vp8_clear_system_state();
- 
- #if CONFIG_ERROR_CONCEALMENT
-@@ -498,49 +440,13 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
-     }
- #endif
- 
--    /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/
--
-     if (cm->show_frame)
-         cm->current_video_frame++;
- 
-     pbi->ready_for_new_data = 0;
-     pbi->last_time_stamp = time_stamp;
--    pbi->num_fragments = 0;
--
--#if 0
--    {
--        int i;
--        int64_t earliest_time = pbi->dr[0].time_stamp;
--        int64_t latest_time = pbi->dr[0].time_stamp;
--        int64_t time_diff = 0;
--        int bytes = 0;
--
--        pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;;
--        pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp;
--
--        for (i = 0; i < 16; i++)
--        {
--
--            bytes += pbi->dr[i].size;
--
--            if (pbi->dr[i].time_stamp < earliest_time)
--                earliest_time = pbi->dr[i].time_stamp;
--
--            if (pbi->dr[i].time_stamp > latest_time)
--                latest_time = pbi->dr[i].time_stamp;
--        }
--
--        time_diff = latest_time - earliest_time;
--
--        if (time_diff > 0)
--        {
--            pbi->common.bitrate = 80000.00 * bytes / time_diff  ;
--            pbi->common.framerate = 160000000.00 / time_diff ;
--        }
--
--    }
--#endif
- 
-+decode_exit:
- #if HAVE_NEON
- #if CONFIG_RUNTIME_CPU_DETECT
-     if (cm->cpu_caps & HAS_NEON)
-@@ -549,7 +455,9 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
-         vp8_pop_neon(dx_store_reg);
-     }
- #endif
-+
-     pbi->common.error.setjmp = 0;
-+    pbi->num_fragments = 0;
-     return retcode;
- }
- int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags)
-diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
-index 97cf0dc..0063beb 100644
---- a/vp8/decoder/onyxd_int.h
-+++ b/vp8/decoder/onyxd_int.h
-@@ -31,23 +31,18 @@ typedef struct
- typedef struct
- {
-     MACROBLOCKD  mbd;
--    int mb_row;
- } MB_ROW_DEC;
- 
--typedef struct
--{
--    int64_t time_stamp;
--    int size;
--} DATARATE;
--
--
- typedef struct VP8D_COMP
- {
-     DECLARE_ALIGNED(16, MACROBLOCKD, mb);
- 
-+    YV12_BUFFER_CONFIG *dec_fb_ref[NUM_YV12_BUFFERS];
-+
-     DECLARE_ALIGNED(16, VP8_COMMON, common);
- 
--    vp8_reader bc, bc2;
-+    /* the last partition will be used for the modes/mvs */
-+    vp8_reader mbc[MAX_PARTITIONS];
- 
-     VP8D_CONFIG oxcf;
- 
-@@ -62,7 +57,7 @@ typedef struct VP8D_COMP
-     volatile int b_multithreaded_rd;
-     int max_threads;
-     int current_mb_col_main;
--    int decoding_thread_count;
-+    unsigned int decoding_thread_count;
-     int allocated_decoding_thread_count;
- 
-     int mt_baseline_filter_level[MAX_MB_SEGMENTS];
-@@ -85,12 +80,9 @@ typedef struct VP8D_COMP
-     /* end of threading data */
- #endif
- 
--    vp8_reader *mbc;
-     int64_t last_time_stamp;
-     int   ready_for_new_data;
- 
--    DATARATE dr[16];
--
-     vp8_prob prob_intra;
-     vp8_prob prob_last;
-     vp8_prob prob_gf;
-diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
-index 47a0349..88c06be 100644
---- a/vp8/decoder/threading.c
-+++ b/vp8/decoder/threading.c
-@@ -24,10 +24,18 @@
- #include "detokenize.h"
- #include "vp8/common/reconintra4x4.h"
- #include "vp8/common/reconinter.h"
-+#include "vp8/common/setupintrarecon.h"
- #if CONFIG_ERROR_CONCEALMENT
- #include "error_concealment.h"
- #endif
- 
-+#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n)))
-+#define CALLOC_ARRAY_ALIGNED(p, n, algn) do {                      \
-+  CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n)));  \
-+  memset((p), 0, (n) * sizeof(*(p)));                              \
-+} while (0)
-+
-+
- extern void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
- 
- static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
-@@ -47,11 +55,9 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
-         mbd->mode_info_stride  = pc->mode_info_stride;
- 
-         mbd->frame_type = pc->frame_type;
--        mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
--        mbd->dst = pc->yv12_fb[pc->new_fb_idx];
-+        mbd->pre = xd->pre;
-+        mbd->dst = xd->dst;
- 
--        vp8_setup_block_dptrs(mbd);
--        vp8_build_block_doffsets(mbd);
-         mbd->segmentation_enabled    = xd->segmentation_enabled;
-         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
-         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
-@@ -65,7 +71,7 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
-         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
-         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
- 
--        mbd->current_bc = &pbi->bc2;
-+        mbd->current_bc = &pbi->mbc[0];
- 
-         vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
-         vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
-@@ -73,16 +79,18 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
-         vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
- 
-         mbd->fullpixel_mask = 0xffffffff;
--        if(pc->full_pixel)
-+
-+        if (pc->full_pixel)
-             mbd->fullpixel_mask = 0xfffffff8;
- 
-     }
- 
--    for (i=0; i< pc->mb_rows; i++)
--        pbi->mt_current_mb_col[i]=-1;
-+    for (i = 0; i < pc->mb_rows; i++)
-+        pbi->mt_current_mb_col[i] = -1;
- }
- 
--static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_idx)
-+static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
-+                                 unsigned int mb_idx)
- {
-     MB_PREDICTION_MODE mode;
-     int i;
-@@ -166,7 +174,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i
-         {
-             short *DQC = xd->dequant_y1;
-             int dst_stride = xd->dst.y_stride;
--            unsigned char *base_dst = xd->dst.y_buffer;
- 
-             /* clear out residual eob info */
-             if(xd->mode_info_context->mbmi.mb_skip_coeff)
-@@ -177,17 +184,19 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i
-             for (i = 0; i < 16; i++)
-             {
-                 BLOCKD *b = &xd->block[i];
--                int b_mode = xd->mode_info_context->bmi[i].as_mode;
--                unsigned char *yabove;
-+                unsigned char *dst = xd->dst.y_buffer + b->offset;
-+                B_PREDICTION_MODE b_mode =
-+                    xd->mode_info_context->bmi[i].as_mode;
-+                unsigned char *Above;
-                 unsigned char *yleft;
-                 int left_stride;
-                 unsigned char top_left;
- 
-                 /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/
-                 if (i < 4 && pbi->common.filter_level)
--                    yabove = xd->recon_above[0] + b->offset; //i*4;
-+                    Above = xd->recon_above[0] + b->offset;
-                 else
--                    yabove = (base_dst - dst_stride) + b->offset;
-+                    Above = dst - dst_stride;
- 
-                 if (i%4==0 && pbi->common.filter_level)
-                 {
-@@ -196,34 +205,28 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i
-                 }
-                 else
-                 {
--                    yleft = (base_dst  - 1) + b->offset;
-+                    yleft = dst - 1;
-                     left_stride = dst_stride;
-                 }
- 
-                 if ((i==4 || i==8 || i==12) && pbi->common.filter_level)
-                     top_left = *(xd->recon_left[0] + i - 1);
-                 else
--                    top_left = yabove[-1];
-+                    top_left = Above[-1];
- 
--                vp8_intra4x4_predict_d_c(yabove, yleft, left_stride,
--                                       b_mode,
--                                       base_dst + b->offset, dst_stride,
--                                       top_left);
-+                vp8_intra4x4_predict(Above, yleft, left_stride,
-+                                     b_mode, dst, dst_stride, top_left);
- 
-                 if (xd->eobs[i] )
-                 {
-                     if (xd->eobs[i] > 1)
-                     {
--                        vp8_dequant_idct_add
--                            (b->qcoeff, DQC,
--                            base_dst + b->offset, dst_stride);
-+                        vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride);
-                     }
-                     else
-                     {
--                        vp8_dc_only_idct_add
--                            (b->qcoeff[0] * DQC[0],
--                            base_dst + b->offset, dst_stride,
--                            base_dst + b->offset, dst_stride);
-+                        vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0],
-+                                             dst, dst_stride, dst, dst_stride);
-                         ((int *)b->qcoeff)[0] = 0;
-                     }
-                 }
-@@ -297,60 +300,44 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i
-     }
- }
- 
--typedef void (*init_current_bc_fn_t)(VP8D_COMP *pbi, MACROBLOCKD *xd,
--    int start_mb_row, int mb_row, int num_part);
--
--static void init_current_bc(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
--                     int mb_row, int num_part)
-+static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
- {
--    (void) start_mb_row;
--
--    xd->current_bc = &pbi->mbc[mb_row%num_part];
--}
--
--static void init_current_bc_threads(VP8D_COMP *pbi, MACROBLOCKD *xd,
--                     int start_mb_row, int mb_row, int num_part)
--{
--    (void) xd;
--    pbi->mb_row_di[start_mb_row - 1].mb_row = mb_row;
--    pbi->mb_row_di[start_mb_row - 1].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
--}
--
--
--static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
--                           init_current_bc_fn_t init_current_bc_fn)
--{
--    volatile int *last_row_current_mb_col = NULL;
-+    volatile const int *last_row_current_mb_col;
-+    volatile int *current_mb_col;
-     int mb_row;
-     VP8_COMMON *pc = &pbi->common;
--    int nsync = pbi->sync_range;
-+    const int nsync = pbi->sync_range;
-+    const int first_row_no_sync_above = pc->mb_cols + nsync;
-     int num_part = 1 << pbi->common.multi_token_partition;
-+    int last_mb_row = start_mb_row;
-+
-+    YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
-+    YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME];
-+
-+    int recon_y_stride = yv12_fb_new->y_stride;
-+    int recon_uv_stride = yv12_fb_new->uv_stride;
- 
--    int dst_fb_idx = pc->new_fb_idx;
-     unsigned char *ref_buffer[MAX_REF_FRAMES][3];
-     unsigned char *dst_buffer[3];
-     int i;
--    int ref_fb_index[MAX_REF_FRAMES];
-     int ref_fb_corrupted[MAX_REF_FRAMES];
- 
-     ref_fb_corrupted[INTRA_FRAME] = 0;
- 
--    ref_fb_index[LAST_FRAME]    = pc->lst_fb_idx;
--    ref_fb_index[GOLDEN_FRAME]  = pc->gld_fb_idx;
--    ref_fb_index[ALTREF_FRAME]  = pc->alt_fb_idx;
--
-     for(i = 1; i < MAX_REF_FRAMES; i++)
-     {
--        ref_buffer[i][0] = pc->yv12_fb[ref_fb_index[i]].y_buffer;
--        ref_buffer[i][1] = pc->yv12_fb[ref_fb_index[i]].u_buffer;
--        ref_buffer[i][2] = pc->yv12_fb[ref_fb_index[i]].v_buffer;
-+        YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i];
- 
--        ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted;
-+        ref_buffer[i][0] = this_fb->y_buffer;
-+        ref_buffer[i][1] = this_fb->u_buffer;
-+        ref_buffer[i][2] = this_fb->v_buffer;
-+
-+        ref_fb_corrupted[i] = this_fb->corrupted;
-     }
- 
--    dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer;
--    dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer;
--    dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer;
-+    dst_buffer[0] = yv12_fb_new->y_buffer;
-+    dst_buffer[1] = yv12_fb_new->u_buffer;
-+    dst_buffer[2] = yv12_fb_new->v_buffer;
- 
-     xd->up_available = (start_mb_row != 0);
- 
-@@ -359,18 +346,20 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
-        int i;
-        int recon_yoffset, recon_uvoffset;
-        int mb_col;
--       int ref_fb_idx = pc->lst_fb_idx;
--       int dst_fb_idx = pc->new_fb_idx;
--       int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
--       int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
--
-        int filter_level;
-        loop_filter_info_n *lfi_n = &pc->lf_info;
- 
--       init_current_bc_fn(pbi, xd, start_mb_row, mb_row, num_part);
-+       /* save last row processed by this thread */
-+       last_mb_row = mb_row;
-+       /* select bool coder for current partition */
-+       xd->current_bc =  &pbi->mbc[mb_row%num_part];
- 
-        if (mb_row > 0)
-            last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
-+       else
-+           last_row_current_mb_col = &first_row_no_sync_above;
-+
-+       current_mb_col = &pbi->mt_current_mb_col[mb_row];
- 
-        recon_yoffset = mb_row * recon_y_stride * 16;
-        recon_uvoffset = mb_row * recon_uv_stride * 8;
-@@ -394,7 +383,7 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
-           xd->recon_left[1] = pbi->mt_uleft_col[mb_row];
-           xd->recon_left[2] = pbi->mt_vleft_col[mb_row];
- 
--          //TODO: move to outside row loop
-+          /* TODO: move to outside row loop */
-           xd->recon_left_stride[0] = 1;
-           xd->recon_left_stride[1] = 1;
-        }
-@@ -412,16 +401,22 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
-           xd->recon_above[1] -= xd->dst.uv_stride;
-           xd->recon_above[2] -= xd->dst.uv_stride;
- 
--          //TODO: move to outside row loop
-+          /* TODO: move to outside row loop */
-           xd->recon_left_stride[0] = xd->dst.y_stride;
-           xd->recon_left_stride[1] = xd->dst.uv_stride;
-+
-+          setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1],
-+                                 xd->recon_left[2], xd->dst.y_stride,
-+                                 xd->dst.uv_stride);
-        }
- 
-        for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
-        {
--           if ( mb_row > 0 && (mb_col & (nsync-1)) == 0)
-+           *current_mb_col = mb_col - 1;
-+
-+           if ((mb_col & (nsync - 1)) == 0)
-            {
--               while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
-+               while (mb_col > (*last_row_current_mb_col - nsync))
-                {
-                    x86_pause_hint();
-                    thread_sleep(0);
-@@ -477,7 +472,7 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
-            /* propagate errors from reference frames */
-            xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame];
- 
--           decode_macroblock(pbi, xd, 0);
-+           mt_decode_macroblock(pbi, xd, 0);
- 
-            xd->left_available = 1;
- 
-@@ -591,9 +586,6 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
-            ++xd->mode_info_context;  /* next mb */
- 
-            xd->above_context++;
--
--           /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
--           pbi->mt_current_mb_col[mb_row] = mb_col;
-        }
- 
-        /* adjust to the next row of mbs */
-@@ -601,8 +593,8 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
-        {
-            if(mb_row != pc->mb_rows-1)
-            {
--               int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
--               int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
-+               int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS;
-+               int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1);
- 
-                for (i = 0; i < 4; i++)
-                {
-@@ -611,8 +603,13 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
-                    pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
-                }
-            }
--       } else
--           vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
-+       }
-+       else
-+           vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16,
-+                             xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
-+
-+       /* last MB of row is ready just after extension is done */
-+       *current_mb_col = mb_col + nsync;
- 
-        ++xd->mode_info_context;      /* skip prediction column */
-        xd->up_available = 1;
-@@ -620,6 +617,11 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row,
-        /* since we have multithread */
-        xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
-     }
-+
-+    /* signal end of frame decoding if this thread processed the last mb_row */
-+    if (last_mb_row == (pc->mb_rows - 1))
-+        sem_post(&pbi->h_event_end_decoding);
-+
- }
- 
- 
-@@ -635,7 +637,6 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
-         if (pbi->b_multithreaded_rd == 0)
-             break;
- 
--        /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
-         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
-         {
-             if (pbi->b_multithreaded_rd == 0)
-@@ -643,21 +644,11 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
-             else
-             {
-                 MACROBLOCKD *xd = &mbrd->mbd;
--
-                 xd->left_context = &mb_row_left_context;
- 
--                decode_mb_rows(pbi, xd, ithread+1, init_current_bc_threads);
-+                mt_decode_mb_rows(pbi, xd, ithread+1);
-             }
-         }
--
--        /*  add this to each frame */
--        if ((mbrd->mb_row == pbi->common.mb_rows-1) ||
--            ((mbrd->mb_row == pbi->common.mb_rows-2) &&
--                (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
--        {
--            /*SetEvent(pbi->h_event_end_decoding);*/
--            sem_post(&pbi->h_event_end_decoding);
--        }
-     }
- 
-     return 0 ;
-@@ -667,7 +658,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
- void vp8_decoder_create_threads(VP8D_COMP *pbi)
- {
-     int core_count = 0;
--    int ithread;
-+    unsigned int ithread;
- 
-     pbi->b_multithreaded_rd = 0;
-     pbi->allocated_decoding_thread_count = 0;
-@@ -684,16 +675,17 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
-         pbi->b_multithreaded_rd = 1;
-         pbi->decoding_thread_count = core_count - 1;
- 
--        CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
--        CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
--        CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
--        vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
--        CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
-+        CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
-+        CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count);
-+        CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32);
-+        CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count);
- 
-         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
-         {
-             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
- 
-+            vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd);
-+
-             pbi->de_thread_data[ithread].ithread  = ithread;
-             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
-             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
-@@ -810,32 +802,32 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
-         uv_width = width >>1;
- 
-         /* Allocate an int for each mb row. */
--        CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
-+        CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
- 
-         /* Allocate memory for above_row buffers. */
--        CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
--        for (i=0; i< pc->mb_rows; i++)
-+        CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
-+        for (i = 0; i < pc->mb_rows; i++)
-             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1))));
- 
--        CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
--        for (i=0; i< pc->mb_rows; i++)
-+        CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
-+        for (i = 0; i < pc->mb_rows; i++)
-             CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
- 
--        CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
--        for (i=0; i< pc->mb_rows; i++)
-+        CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
-+        for (i = 0; i < pc->mb_rows; i++)
-             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
- 
-         /* Allocate memory for left_col buffers. */
--        CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
--        for (i=0; i< pc->mb_rows; i++)
-+        CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
-+        for (i = 0; i < pc->mb_rows; i++)
-             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
- 
--        CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
--        for (i=0; i< pc->mb_rows; i++)
-+        CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows);
-+        for (i = 0; i < pc->mb_rows; i++)
-             CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
- 
--        CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
--        for (i=0; i< pc->mb_rows; i++)
-+        CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows);
-+        for (i = 0; i < pc->mb_rows; i++)
-             CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
-     }
- }
-@@ -881,42 +873,46 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
- void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
- {
-     VP8_COMMON *pc = &pbi->common;
--    int i;
-+    unsigned int i;
-+    int j;
- 
-     int filter_level = pc->filter_level;
-+    YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
- 
-     if (filter_level)
-     {
-         /* Set above_row buffer to 127 for decoding first MB row */
--        vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
--        vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
--        vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
-+        vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
-+        vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
-+        vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
- 
--        for (i=1; i<pc->mb_rows; i++)
-+        for (j=1; j<pc->mb_rows; j++)
-         {
--            vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
--            vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
--            vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
-+            vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
-+            vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
-+            vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
-         }
- 
-         /* Set left_col to 129 initially */
--        for (i=0; i<pc->mb_rows; i++)
-+        for (j=0; j<pc->mb_rows; j++)
-         {
--            vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
--            vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
--            vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
-+            vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
-+            vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
-+            vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
-         }
- 
-         /* Initialize the loop filter for this frame. */
-         vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level);
-     }
-+    else
-+        vp8_setup_intra_recon_top_line(yv12_fb_new);
- 
-     setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
- 
-     for (i = 0; i < pbi->decoding_thread_count; i++)
-         sem_post(&pbi->h_event_start_decoding[i]);
- 
--    decode_mb_rows(pbi, xd, 0, init_current_bc);
-+    mt_decode_mb_rows(pbi, xd, 0);
- 
-     sem_wait(&pbi->h_event_end_decoding);   /* add back for each frame */
- }
-diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
-index 3824294..e666b6c 100644
---- a/vp8/encoder/bitstream.c
-+++ b/vp8/encoder/bitstream.c
-@@ -118,7 +118,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi)
- 
-         update_mode(
-             w, VP8_YMODES, vp8_ymode_encodings, vp8_ymode_tree,
--            Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count
-+            Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->mb.ymode_count
-         );
-     }
-     {
-@@ -127,7 +127,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi)
- 
-         update_mode(
-             w, VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree,
--            Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->uv_mode_count
-+            Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->mb.uv_mode_count
-         );
-     }
- }
-@@ -172,7 +172,7 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
-     while (p < stop)
-     {
-         const int t = p->Token;
--        const vp8_token *a = vp8_coef_encodings + t;
-+        vp8_token *a = vp8_coef_encodings + t;
-         const vp8_extra_bit_struct *b = vp8_extra_bits + t;
-         int i = 0;
-         const unsigned char *pp = p->context_tree;
-@@ -397,7 +397,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
-         {
-             const TOKENEXTRA *p    = cpi->tplist[mb_row].start;
-             const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
--            int tokens = stop - p;
-+            int tokens = (int)(stop - p);
- 
-             vp8_pack_tokens_c(w, p, tokens);
-         }
-@@ -416,7 +416,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
-     {
-         const TOKENEXTRA *p    = cpi->tplist[mb_row].start;
-         const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
--        int tokens = stop - p;
-+        int tokens = (int)(stop - p);
- 
-         vp8_pack_tokens_c(w, p, tokens);
-     }
-@@ -461,7 +461,7 @@ static void write_mv
- 
- static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACROBLOCKD *x)
- {
--    // Encode the MB segment id.
-+    /* Encode the MB segment id. */
-     if (x->segmentation_enabled && x->update_mb_segmentation_map)
-     {
-         switch (mi->segment_id)
-@@ -483,7 +483,7 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO
-             vp8_write(w, 1, x->mb_segment_tree_probs[2]);
-             break;
- 
--            // TRAP.. This should not happen
-+            /* TRAP.. This should not happen */
-         default:
-             vp8_write(w, 0, x->mb_segment_tree_probs[0]);
-             vp8_write(w, 0, x->mb_segment_tree_probs[1]);
-@@ -493,11 +493,11 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO
- }
- void vp8_convert_rfct_to_prob(VP8_COMP *const cpi)
- {
--    const int *const rfct = cpi->count_mb_ref_frame_usage;
-+    const int *const rfct = cpi->mb.count_mb_ref_frame_usage;
-     const int rf_intra = rfct[INTRA_FRAME];
-     const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
- 
--    // Calculate the probabilities used to code the ref frame based on useage
-+    /* Calculate the probabilities used to code the ref frame based on usage */
-     if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter)))
-         cpi->prob_intra_coded = 1;
- 
-@@ -539,7 +539,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
-     {
-         int total_mbs = pc->mb_rows * pc->mb_cols;
- 
--        prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs;
-+        prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs;
- 
-         if (prob_skip_false <= 1)
-             prob_skip_false = 1;
-@@ -571,8 +571,10 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
- 
-             MACROBLOCKD *xd = &cpi->mb.e_mbd;
- 
--            // Distance of Mb to the various image edges.
--            // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
-+            /* Distance of Mb to the various image edges.
-+             * These specified to 8th pel as they are always compared to MV
-+             * values that are in 1/8th pel units
-+             */
-             xd->mb_to_left_edge = -((mb_col * 16) << 3);
-             xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
-             xd->mb_to_top_edge = -((mb_row * 16)) << 3;
-@@ -728,7 +730,7 @@ static void write_kfmodes(VP8_COMP *cpi)
-     {
-         int total_mbs = c->mb_rows * c->mb_cols;
- 
--        prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs;
-+        prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs;
- 
-         if (prob_skip_false <= 1)
-             prob_skip_false = 1;
-@@ -754,7 +756,7 @@ static void write_kfmodes(VP8_COMP *cpi)
-             if (c->mb_no_coeff_skip)
-                 vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false);
- 
--            kfwrite_ymode(bc, ym, c->kf_ymode_prob);
-+            kfwrite_ymode(bc, ym, vp8_kf_ymode_prob);
- 
-             if (ym == B_PRED)
-             {
-@@ -771,15 +773,15 @@ static void write_kfmodes(VP8_COMP *cpi)
-                     ++intra_mode_stats [A] [L] [bm];
- #endif
- 
--                    write_bmode(bc, bm, c->kf_bmode_prob [A] [L]);
-+                    write_bmode(bc, bm, vp8_kf_bmode_prob [A] [L]);
-                 }
-                 while (++i < 16);
-             }
- 
--            write_uv_mode(bc, (m++)->mbmi.uv_mode, c->kf_uv_mode_prob);
-+            write_uv_mode(bc, (m++)->mbmi.uv_mode, vp8_kf_uv_mode_prob);
-         }
- 
--        m++;    // skip L prediction border
-+        m++;    /* skip L prediction border */
-     }
- }
- 
-@@ -849,6 +851,7 @@ static int prob_update_savings(const unsigned int *ct,
- 
- static int independent_coef_context_savings(VP8_COMP *cpi)
- {
-+    MACROBLOCK *const x = & cpi->mb;
-     int savings = 0;
-     int i = 0;
-     do
-@@ -865,7 +868,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi)
-              */
- 
-             probs = (const unsigned int (*)[MAX_ENTROPY_TOKENS])
--                                                    cpi->coef_counts[i][j];
-+                x->coef_counts[i][j];
- 
-             /* Reset to default probabilities at key frames */
-             if (cpi->common.frame_type == KEY_FRAME)
-@@ -878,9 +881,6 @@ static int independent_coef_context_savings(VP8_COMP *cpi)
-                 /* at every context */
- 
-                 /* calc probs and branch cts for this frame only */
--                //vp8_prob new_p           [ENTROPY_NODES];
--                //unsigned int branch_ct   [ENTROPY_NODES] [2];
--
-                 int t = 0;      /* token/prob index */
- 
-                 vp8_tree_probs_from_distribution(
-@@ -927,6 +927,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi)
- 
- static int default_coef_context_savings(VP8_COMP *cpi)
- {
-+    MACROBLOCK *const x = & cpi->mb;
-     int savings = 0;
-     int i = 0;
-     do
-@@ -940,16 +941,13 @@ static int default_coef_context_savings(VP8_COMP *cpi)
-                 /* at every context */
- 
-                 /* calc probs and branch cts for this frame only */
--                //vp8_prob new_p           [ENTROPY_NODES];
--                //unsigned int branch_ct   [ENTROPY_NODES] [2];
--
-                 int t = 0;      /* token/prob index */
- 
-                 vp8_tree_probs_from_distribution(
-                     MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
-                     cpi->frame_coef_probs [i][j][k],
-                     cpi->frame_branch_ct [i][j][k],
--                    cpi->coef_counts [i][j][k],
-+                    x->coef_counts [i][j][k],
-                     256, 1
-                 );
- 
-@@ -998,13 +996,13 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi)
- {
-     int savings = 0;
- 
--    const int *const rfct = cpi->count_mb_ref_frame_usage;
-+    const int *const rfct = cpi->mb.count_mb_ref_frame_usage;
-     const int rf_intra = rfct[INTRA_FRAME];
-     const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
-     int new_intra, new_last, new_garf, oldtotal, newtotal;
-     int ref_frame_cost[MAX_REF_FRAMES];
- 
--    vp8_clear_system_state(); //__asm emms;
-+    vp8_clear_system_state();
- 
-     if (cpi->common.frame_type != KEY_FRAME)
-     {
-@@ -1026,7 +1024,7 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi)
-             rfct[ALTREF_FRAME] * ref_frame_cost[ALTREF_FRAME];
- 
- 
--        // old costs
-+        /* old costs */
-         vp8_calc_ref_frame_costs(ref_frame_cost,cpi->prob_intra_coded,
-                                  cpi->prob_last_coded,cpi->prob_gf_coded);
- 
-@@ -1078,7 +1076,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi)
- #endif
-     int savings = 0;
- 
--    vp8_clear_system_state(); //__asm emms;
-+    vp8_clear_system_state();
- 
-     do
-     {
-@@ -1110,21 +1108,15 @@ void vp8_update_coef_probs(VP8_COMP *cpi)
-             }
-             do
-             {
--                //note: use result from vp8_estimate_entropy_savings, so no need to call vp8_tree_probs_from_distribution here.
-+                /* note: use result from vp8_estimate_entropy_savings, so no
-+                 * need to call vp8_tree_probs_from_distribution here.
-+                 */
-+
-                 /* at every context */
- 
-                 /* calc probs and branch cts for this frame only */
--                //vp8_prob new_p           [ENTROPY_NODES];
--                //unsigned int branch_ct   [ENTROPY_NODES] [2];
--
-                 int t = 0;      /* token/prob index */
- 
--                //vp8_tree_probs_from_distribution(
--                //    MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
--                //    new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k],
--                //    256, 1
--                //    );
--
-                 do
-                 {
-                     const vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
-@@ -1295,19 +1287,16 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
-     Sectionbits[active_section = 1] += sizeof(VP8_HEADER) * 8 * 256;
- #endif
- 
--    //vp8_kf_default_bmode_probs() is called in vp8_setup_key_frame() once for each
--    //K frame before encode frame. pc->kf_bmode_prob doesn't get changed anywhere
--    //else. No need to call it again here. --yw
--    //vp8_kf_default_bmode_probs( pc->kf_bmode_prob);
--
--    // every keyframe send startcode, width, height, scale factor, clamp and color type
-+    /* every keyframe send startcode, width, height, scale factor, clamp
-+     * and color type
-+     */
-     if (oh.type == KEY_FRAME)
-     {
-         int v;
- 
-         validate_buffer(cx_data, 7, cx_data_end, &cpi->common.error);
- 
--        // Start / synch code
-+        /* Start / synch code */
-         cx_data[0] = 0x9D;
-         cx_data[1] = 0x01;
-         cx_data[2] = 0x2a;
-@@ -1326,7 +1315,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
- 
-         vp8_start_encode(bc, cx_data, cx_data_end);
- 
--        // signal clr type
-+        /* signal clr type */
-         vp8_write_bit(bc, pc->clr_type);
-         vp8_write_bit(bc, pc->clamp_type);
- 
-@@ -1335,13 +1324,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
-         vp8_start_encode(bc, cx_data, cx_data_end);
- 
- 
--    // Signal whether or not Segmentation is enabled
-+    /* Signal whether or not Segmentation is enabled */
-     vp8_write_bit(bc, xd->segmentation_enabled);
- 
--    // Indicate which features are enabled
-+    /*  Indicate which features are enabled */
-     if (xd->segmentation_enabled)
-     {
--        // Signal whether or not the segmentation map is being updated.
-+        /* Signal whether or not the segmentation map is being updated. */
-         vp8_write_bit(bc, xd->update_mb_segmentation_map);
-         vp8_write_bit(bc, xd->update_mb_segmentation_data);
- 
-@@ -1351,15 +1340,15 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
- 
-             vp8_write_bit(bc, xd->mb_segement_abs_delta);
- 
--            // For each segmentation feature (Quant and loop filter level)
-+            /* For each segmentation feature (Quant and loop filter level) */
-             for (i = 0; i < MB_LVL_MAX; i++)
-             {
--                // For each of the segments
-+                /* For each of the segments */
-                 for (j = 0; j < MAX_MB_SEGMENTS; j++)
-                 {
-                     Data = xd->segment_feature_data[i][j];
- 
--                    // Frame level data
-+                    /* Frame level data */
-                     if (Data)
-                     {
-                         vp8_write_bit(bc, 1);
-@@ -1384,7 +1373,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
- 
-         if (xd->update_mb_segmentation_map)
-         {
--            // Write the probs used to decode the segment id for each macro block.
-+            /* Write the probs used to decode the segment id for each mb */
-             for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
-             {
-                 int Data = xd->mb_segment_tree_probs[i];
-@@ -1400,17 +1389,18 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
-         }
-     }
- 
--    // Code to determine whether or not to update the scan order.
-     vp8_write_bit(bc, pc->filter_type);
-     vp8_write_literal(bc, pc->filter_level, 6);
-     vp8_write_literal(bc, pc->sharpness_level, 3);
- 
--    // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled).
-+    /* Write out loop filter deltas applied at the MB level based on mode
-+     * or ref frame (if they are enabled).
-+     */
-     vp8_write_bit(bc, xd->mode_ref_lf_delta_enabled);
- 
-     if (xd->mode_ref_lf_delta_enabled)
-     {
--        // Do the deltas need to be updated
-+        /* Do the deltas need to be updated */
-         int send_update = xd->mode_ref_lf_delta_update
-                           || cpi->oxcf.error_resilient_mode;
- 
-@@ -1419,12 +1409,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
-         {
-             int Data;
- 
--            // Send update
-+            /* Send update */
-             for (i = 0; i < MAX_REF_LF_DELTAS; i++)
-             {
-                 Data = xd->ref_lf_deltas[i];
- 
--                // Frame level data
-+                /* Frame level data */
-                 if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i]
-                     || cpi->oxcf.error_resilient_mode)
-                 {
-@@ -1434,20 +1424,20 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
-                     if (Data > 0)
-                     {
-                         vp8_write_literal(bc, (Data & 0x3F), 6);
--                        vp8_write_bit(bc, 0);    // sign
-+                        vp8_write_bit(bc, 0);    /* sign */
-                     }
-                     else
-                     {
-                         Data = -Data;
-                         vp8_write_literal(bc, (Data & 0x3F), 6);
--                        vp8_write_bit(bc, 1);    // sign
-+                        vp8_write_bit(bc, 1);    /* sign */
-                     }
-                 }
-                 else
-                     vp8_write_bit(bc, 0);
-             }
- 
--            // Send update
-+            /* Send update */
-             for (i = 0; i < MAX_MODE_LF_DELTAS; i++)
-             {
-                 Data = xd->mode_lf_deltas[i];
-@@ -1461,13 +1451,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
-                     if (Data > 0)
-                     {
-                         vp8_write_literal(bc, (Data & 0x3F), 6);
--                        vp8_write_bit(bc, 0);    // sign
-+                        vp8_write_bit(bc, 0);    /* sign */
-                     }
-                     else
-                     {
-                         Data = -Data;
-                         vp8_write_literal(bc, (Data & 0x3F), 6);
--                        vp8_write_bit(bc, 1);    // sign
-+                        vp8_write_bit(bc, 1);    /* sign */
-                     }
-                 }
-                 else
-@@ -1476,34 +1466,42 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
-         }
-     }
- 
--    //signal here is multi token partition is enabled
-+    /* signal here is multi token partition is enabled */
-     vp8_write_literal(bc, pc->multi_token_partition, 2);
- 
--    // Frame Qbaseline quantizer index
-+    /* Frame Qbaseline quantizer index */
-     vp8_write_literal(bc, pc->base_qindex, 7);
- 
--    // Transmit Dc, Second order and Uv quantizer delta information
-+    /* Transmit Dc, Second order and Uv quantizer delta information */
-     put_delta_q(bc, pc->y1dc_delta_q);
-     put_delta_q(bc, pc->y2dc_delta_q);
-     put_delta_q(bc, pc->y2ac_delta_q);
-     put_delta_q(bc, pc->uvdc_delta_q);
-     put_delta_q(bc, pc->uvac_delta_q);
- 
--    // When there is a key frame all reference buffers are updated using the new key frame
-+    /* When there is a key frame all reference buffers are updated using
-+     * the new key frame
-+     */
-     if (pc->frame_type != KEY_FRAME)
-     {
--        // Should the GF or ARF be updated using the transmitted frame or buffer
-+        /* Should the GF or ARF be updated using the transmitted frame
-+         * or buffer
-+         */
-         vp8_write_bit(bc, pc->refresh_golden_frame);
-         vp8_write_bit(bc, pc->refresh_alt_ref_frame);
- 
--        // If not being updated from current frame should either GF or ARF be updated from another buffer
-+        /* If not being updated from current frame should either GF or ARF
-+         * be updated from another buffer
-+         */
-         if (!pc->refresh_golden_frame)
-             vp8_write_literal(bc, pc->copy_buffer_to_gf, 2);
- 
-         if (!pc->refresh_alt_ref_frame)
-             vp8_write_literal(bc, pc->copy_buffer_to_arf, 2);
- 
--        // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer)
-+        /* Indicate reference frame sign bias for Golden and ARF frames
-+         * (always 0 for last frame buffer)
-+         */
-         vp8_write_bit(bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]);
-         vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
-     }
-@@ -1532,14 +1530,14 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
- 
- #endif
- 
--    vp8_clear_system_state();  //__asm emms;
-+    vp8_clear_system_state();
- 
- #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
-     pack_coef_probs(cpi);
- #else
-     if (pc->refresh_entropy_probs == 0)
-     {
--        // save a copy for later refresh
-+        /* save a copy for later refresh */
-         vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
-     }
- 
-@@ -1550,7 +1548,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
-     active_section = 2;
- #endif
- 
--    // Write out the mb_no_coeff_skip flag
-+    /* Write out the mb_no_coeff_skip flag */
-     vp8_write_bit(bc, pc->mb_no_coeff_skip);
- 
-     if (pc->frame_type == KEY_FRAME)
-diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
-index 6165d04..a30f888 100644
---- a/vp8/encoder/block.h
-+++ b/vp8/encoder/block.h
-@@ -18,7 +18,10 @@
- #include "vp8/common/entropy.h"
- #include "vpx_ports/mem.h"
- 
--// motion search site
-+#define MAX_MODES 20
-+#define MAX_ERROR_BINS 1024
-+
-+/* motion search site */
- typedef struct
- {
-     MV mv;
-@@ -27,11 +30,11 @@ typedef struct
- 
- typedef struct block
- {
--    // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
-+    /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
-     short *src_diff;
-     short *coeff;
- 
--    // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
-+    /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
-     short *quant;
-     short *quant_fast;
-     unsigned char *quant_shift;
-@@ -39,7 +42,7 @@ typedef struct block
-     short *zrun_zbin_boost;
-     short *round;
- 
--    // Zbin Over Quant value
-+    /* Zbin Over Quant value */
-     short zbin_extra;
- 
-     unsigned char **base_src;
-@@ -59,12 +62,12 @@ typedef struct
- 
- typedef struct macroblock
- {
--    DECLARE_ALIGNED(16, short, src_diff[400]);       // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
--    DECLARE_ALIGNED(16, short, coeff[400]);     // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
-+    DECLARE_ALIGNED(16, short, src_diff[400]); /* 25 blocks Y,U,V,Y2 */
-+    DECLARE_ALIGNED(16, short, coeff[400]); /* 25 blocks Y,U,V,Y2 */
-     DECLARE_ALIGNED(16, unsigned char, thismb[256]);
- 
-     unsigned char *thismb_ptr;
--    // 16 Y blocks, 4 U blocks, 4 V blocks, 1 DC 2nd order block each with 16 entries
-+    /* 16 Y, 4 U, 4 V, 1 DC 2nd order block */
-     BLOCK block[25];
- 
-     YV12_BUFFER_CONFIG src;
-@@ -90,16 +93,18 @@ typedef struct macroblock
-     signed int act_zbin_adj;
-     signed int last_act_zbin_adj;
- 
--    int mvcosts[2][MVvals+1];
-     int *mvcost[2];
--    int mvsadcosts[2][MVfpvals+1];
-     int *mvsadcost[2];
--    int mbmode_cost[2][MB_MODE_COUNT];
--    int intra_uv_mode_cost[2][MB_MODE_COUNT];
--    unsigned int bmode_costs[10][10][10];
--    unsigned int inter_bmode_costs[B_MODE_COUNT];
--
--    // These define limits to motion vector components to prevent them from extending outside the UMV borders
-+    int (*mbmode_cost)[MB_MODE_COUNT];
-+    int (*intra_uv_mode_cost)[MB_MODE_COUNT];
-+    int (*bmode_costs)[10][10];
-+    int *inter_bmode_costs;
-+    int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
-+    [MAX_ENTROPY_TOKENS];
-+
-+    /* These define limits to motion vector components to prevent
-+     * them from extending outside the UMV borders.
-+     */
-     int mv_col_min;
-     int mv_col_max;
-     int mv_row_min;
-@@ -107,18 +112,45 @@ typedef struct macroblock
- 
-     int skip;
- 
--    int encode_breakout;
-+    unsigned int encode_breakout;
- 
--    //char * gf_active_ptr;
-     signed char *gf_active_ptr;
- 
-     unsigned char *active_ptr;
-     MV_CONTEXT *mvc;
- 
--    unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-     int optimize;
-     int q_index;
- 
-+#if CONFIG_TEMPORAL_DENOISING
-+    MB_PREDICTION_MODE best_sse_inter_mode;
-+    int_mv best_sse_mv;
-+    MV_REFERENCE_FRAME best_reference_frame;
-+    MV_REFERENCE_FRAME best_zeromv_reference_frame;
-+    unsigned char need_to_clamp_best_mvs;
-+#endif
-+
-+    int skip_true_count;
-+    unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-+    unsigned int MVcount [2] [MVvals];  /* (row,col) MV cts this frame */
-+    int ymode_count [VP8_YMODES];        /* intra MB type cts this frame */
-+    int uv_mode_count[VP8_UV_MODES];     /* intra MB type cts this frame */
-+    int64_t prediction_error;
-+    int64_t intra_error;
-+    int count_mb_ref_frame_usage[MAX_REF_FRAMES];
-+
-+    int rd_thresh_mult[MAX_MODES];
-+    int rd_threshes[MAX_MODES];
-+    unsigned int mbs_tested_so_far;
-+    unsigned int mode_test_hit_counts[MAX_MODES];
-+    int zbin_mode_boost_enabled;
-+    int zbin_mode_boost;
-+    int last_zbin_mode_boost;
-+
-+    int last_zbin_over_quant;
-+    int zbin_over_quant;
-+    int error_bins[MAX_ERROR_BINS];
-+
-     void (*short_fdct4x4)(short *input, short *output, int pitch);
-     void (*short_fdct8x4)(short *input, short *output, int pitch);
-     void (*short_walsh4x4)(short *input, short *output, int pitch);
-diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
-index fb6cbaf..8309063 100644
---- a/vp8/encoder/boolhuff.h
-+++ b/vp8/encoder/boolhuff.h
-@@ -32,7 +32,7 @@ typedef struct
-     unsigned char *buffer_end;
-     struct vpx_internal_error_info *error;
- 
--    // Variables used to track bit costs without outputing to the bitstream
-+    /* Variables used to track bit costs without outputing to the bitstream */
-     unsigned int  measure_cost;
-     unsigned long bit_counter;
- } BOOL_CODER;
-diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
-index 09ed9dd..f3faa22 100644
---- a/vp8/encoder/denoising.c
-+++ b/vp8/encoder/denoising.c
-@@ -15,198 +15,293 @@
- #include "vpx_mem/vpx_mem.h"
- #include "vpx_rtcd.h"
- 
--static const unsigned int NOISE_MOTION_THRESHOLD = 20*20;
--static const unsigned int NOISE_DIFF2_THRESHOLD = 75;
--// SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100.
--static const unsigned int SSE_DIFF_THRESHOLD = 16*16*20;
--static const unsigned int SSE_THRESHOLD = 16*16*40;
-+static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
-+/* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming
-+ * var(noise) ~= 100.
-+ */
-+static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
-+static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
- 
--static uint8_t blend(uint8_t state, uint8_t sample, uint8_t factor_q8)
--{
--  return (uint8_t)(
--      (((uint16_t)factor_q8 * ((uint16_t)state) +  // Q8
--        (uint16_t)(256 - factor_q8) * ((uint16_t)sample)) + 128)  // Q8
--      >> 8);
--}
-+/*
-+ * The filter function was modified to reduce the computational complexity.
-+ * Step 1:
-+ * Instead of applying tap coefficients for each pixel, we calculated the
-+ * pixel adjustments vs. pixel diff value ahead of time.
-+ *     adjustment = filtered_value - current_raw
-+ *                = (filter_coefficient * diff + 128) >> 8
-+ * where
-+ *     filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3));
-+ *     filter_coefficient += filter_coefficient /
-+ *                           (3 + motion_magnitude_adjustment);
-+ *     filter_coefficient is clamped to 0 ~ 255.
-+ *
-+ * Step 2:
-+ * The adjustment vs. diff curve becomes flat very quick when diff increases.
-+ * This allowed us to use only several levels to approximate the curve without
-+ * changing the filtering algorithm too much.
-+ * The adjustments were further corrected by checking the motion magnitude.
-+ * The levels used are:
-+ * diff       adjustment w/o motion correction   adjustment w/ motion correction
-+ * [-255, -16]           -6                                   -7
-+ * [-15, -8]             -4                                   -5
-+ * [-7, -4]              -3                                   -4
-+ * [-3, 3]               diff                                 diff
-+ * [4, 7]                 3                                    4
-+ * [8, 15]                4                                    5
-+ * [16, 255]              6                                    7
-+ */
- 
--static unsigned int denoiser_motion_compensate(YV12_BUFFER_CONFIG* src,
--                                               YV12_BUFFER_CONFIG* dst,
--                                               MACROBLOCK* x,
--                                               unsigned int best_sse,
--                                               unsigned int zero_mv_sse,
--                                               int recon_yoffset,
--                                               int recon_uvoffset)
-+int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
-+                          YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal,
-+                          unsigned int motion_magnitude, int y_offset,
-+                          int uv_offset)
- {
--  MACROBLOCKD filter_xd = x->e_mbd;
--  int mv_col;
--  int mv_row;
--  int sse_diff = zero_mv_sse - best_sse;
--  // Compensate the running average.
--  filter_xd.pre.y_buffer = src->y_buffer + recon_yoffset;
--  filter_xd.pre.u_buffer = src->u_buffer + recon_uvoffset;
--  filter_xd.pre.v_buffer = src->v_buffer + recon_uvoffset;
--  // Write the compensated running average to the destination buffer.
--  filter_xd.dst.y_buffer = dst->y_buffer + recon_yoffset;
--  filter_xd.dst.u_buffer = dst->u_buffer + recon_uvoffset;
--  filter_xd.dst.v_buffer = dst->v_buffer + recon_uvoffset;
--  // Use the best MV for the compensation.
--  filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
--  filter_xd.mode_info_context->mbmi.mode = filter_xd.best_sse_inter_mode;
--  filter_xd.mode_info_context->mbmi.mv = filter_xd.best_sse_mv;
--  filter_xd.mode_info_context->mbmi.need_to_clamp_mvs =
--      filter_xd.need_to_clamp_best_mvs;
--  mv_col = filter_xd.best_sse_mv.as_mv.col;
--  mv_row = filter_xd.best_sse_mv.as_mv.row;
--  if (filter_xd.mode_info_context->mbmi.mode <= B_PRED ||
--      (mv_row*mv_row + mv_col*mv_col <= NOISE_MOTION_THRESHOLD &&
--       sse_diff < SSE_DIFF_THRESHOLD))
--  {
--    // Handle intra blocks as referring to last frame with zero motion and
--    // let the absolute pixel difference affect the filter factor.
--    // Also consider small amount of motion as being random walk due to noise,
--    // if it doesn't mean that we get a much bigger error.
--    // Note that any changes to the mode info only affects the denoising.
--    filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
--    filter_xd.mode_info_context->mbmi.mode = ZEROMV;
--    filter_xd.mode_info_context->mbmi.mv.as_int = 0;
--    x->e_mbd.best_sse_inter_mode = ZEROMV;
--    x->e_mbd.best_sse_mv.as_int = 0;
--    best_sse = zero_mv_sse;
--  }
--  if (!x->skip)
--  {
--    vp8_build_inter_predictors_mb(&filter_xd);
--  }
--  else
--  {
--    vp8_build_inter16x16_predictors_mb(&filter_xd,
--                                       filter_xd.dst.y_buffer,
--                                       filter_xd.dst.u_buffer,
--                                       filter_xd.dst.v_buffer,
--                                       filter_xd.dst.y_stride,
--                                       filter_xd.dst.uv_stride);
--  }
--  return best_sse;
--}
-+    unsigned char *sig = signal->thismb;
-+    int sig_stride = 16;
-+    unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
-+    int mc_avg_y_stride = mc_running_avg->y_stride;
-+    unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
-+    int avg_y_stride = running_avg->y_stride;
-+    int r, c, i;
-+    int sum_diff = 0;
-+    int adj_val[3] = {3, 4, 6};
- 
--static void denoiser_filter(YV12_BUFFER_CONFIG* mc_running_avg,
--                            YV12_BUFFER_CONFIG* running_avg,
--                            MACROBLOCK* signal,
--                            unsigned int motion_magnitude2,
--                            int y_offset,
--                            int uv_offset)
--{
--  unsigned char* sig = signal->thismb;
--  int sig_stride = 16;
--  unsigned char* mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
--  int mc_avg_y_stride = mc_running_avg->y_stride;
--  unsigned char* running_avg_y = running_avg->y_buffer + y_offset;
--  int avg_y_stride = running_avg->y_stride;
--  int r, c;
--  for (r = 0; r < 16; r++)
--  {
--    for (c = 0; c < 16; c++)
-+    /* If motion_magnitude is small, making the denoiser more aggressive by
-+     * increasing the adjustment for each level. */
-+    if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
-+    {
-+        for (i = 0; i < 3; i++)
-+            adj_val[i] += 1;
-+    }
-+
-+    for (r = 0; r < 16; ++r)
-     {
--      int diff;
--      int absdiff = 0;
--      unsigned int filter_coefficient;
--      absdiff = sig[c] - mc_running_avg_y[c];
--      absdiff = absdiff > 0 ? absdiff : -absdiff;
--      assert(absdiff >= 0 && absdiff < 256);
--      filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3));
--      // Allow some additional filtering of static blocks, or blocks with very
--      // small motion vectors.
--      filter_coefficient += filter_coefficient / (3 + (motion_magnitude2 >> 3));
--      filter_coefficient = filter_coefficient > 255 ? 255 : filter_coefficient;
--
--      running_avg_y[c] = blend(mc_running_avg_y[c], sig[c], filter_coefficient);
--      diff = sig[c] - running_avg_y[c];
--
--      if (diff * diff < NOISE_DIFF2_THRESHOLD)
--      {
--        // Replace with mean to suppress the noise.
--        sig[c] = running_avg_y[c];
--      }
--      else
--      {
--        // Replace the filter state with the signal since the change in this
--        // pixel isn't classified as noise.
--        running_avg_y[c] = sig[c];
--      }
-+        for (c = 0; c < 16; ++c)
-+        {
-+            int diff = 0;
-+            int adjustment = 0;
-+            int absdiff = 0;
-+
-+            diff = mc_running_avg_y[c] - sig[c];
-+            absdiff = abs(diff);
-+
-+            /* When |diff| < 4, use pixel value from last denoised raw. */
-+            if (absdiff <= 3)
-+            {
-+                running_avg_y[c] = mc_running_avg_y[c];
-+                sum_diff += diff;
-+            }
-+            else
-+            {
-+                if (absdiff >= 4 && absdiff <= 7)
-+                    adjustment = adj_val[0];
-+                else if (absdiff >= 8 && absdiff <= 15)
-+                    adjustment = adj_val[1];
-+                else
-+                    adjustment = adj_val[2];
-+
-+                if (diff > 0)
-+                {
-+                    if ((sig[c] + adjustment) > 255)
-+                        running_avg_y[c] = 255;
-+                    else
-+                        running_avg_y[c] = sig[c] + adjustment;
-+
-+                    sum_diff += adjustment;
-+                }
-+                else
-+                {
-+                    if ((sig[c] - adjustment) < 0)
-+                        running_avg_y[c] = 0;
-+                    else
-+                        running_avg_y[c] = sig[c] - adjustment;
-+
-+                    sum_diff -= adjustment;
-+                }
-+            }
-+        }
-+
-+        /* Update pointers for next iteration. */
-+        sig += sig_stride;
-+        mc_running_avg_y += mc_avg_y_stride;
-+        running_avg_y += avg_y_stride;
-     }
--    sig += sig_stride;
--    mc_running_avg_y += mc_avg_y_stride;
--    running_avg_y += avg_y_stride;
--  }
-+
-+    if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
-+        return COPY_BLOCK;
-+
-+    vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride,
-+                      signal->thismb, sig_stride);
-+    return FILTER_BLOCK;
- }
- 
- int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height)
- {
--  assert(denoiser);
--  denoiser->yv12_running_avg.flags = 0;
--  if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg), width,
--                                  height, VP8BORDERINPIXELS) < 0)
--  {
--      vp8_denoiser_free(denoiser);
--      return 1;
--  }
--  denoiser->yv12_mc_running_avg.flags = 0;
--  if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width,
--                                  height, VP8BORDERINPIXELS) < 0)
--  {
--      vp8_denoiser_free(denoiser);
--      return 1;
--  }
--  vpx_memset(denoiser->yv12_running_avg.buffer_alloc, 0,
--             denoiser->yv12_running_avg.frame_size);
--  vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
--             denoiser->yv12_mc_running_avg.frame_size);
--  return 0;
-+    int i;
-+    assert(denoiser);
-+
-+    for (i = 0; i < MAX_REF_FRAMES; i++)
-+    {
-+        denoiser->yv12_running_avg[i].flags = 0;
-+
-+        if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg[i]), width,
-+                                        height, VP8BORDERINPIXELS)
-+            < 0)
-+        {
-+            vp8_denoiser_free(denoiser);
-+            return 1;
-+        }
-+        vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
-+                   denoiser->yv12_running_avg[i].frame_size);
-+
-+    }
-+    denoiser->yv12_mc_running_avg.flags = 0;
-+
-+    if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width,
-+                                   height, VP8BORDERINPIXELS) < 0)
-+    {
-+        vp8_denoiser_free(denoiser);
-+        return 1;
-+    }
-+
-+    vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
-+               denoiser->yv12_mc_running_avg.frame_size);
-+    return 0;
- }
- 
- void vp8_denoiser_free(VP8_DENOISER *denoiser)
- {
--  assert(denoiser);
--  vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg);
--  vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
-+    int i;
-+    assert(denoiser);
-+
-+    for (i = 0; i < MAX_REF_FRAMES ; i++)
-+    {
-+        vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]);
-+    }
-+    vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
- }
- 
-+
- void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
-                              MACROBLOCK *x,
-                              unsigned int best_sse,
-                              unsigned int zero_mv_sse,
-                              int recon_yoffset,
--                             int recon_uvoffset) {
--  int mv_row;
--  int mv_col;
--  unsigned int motion_magnitude2;
--  // Motion compensate the running average.
--  best_sse = denoiser_motion_compensate(&denoiser->yv12_running_avg,
--                                        &denoiser->yv12_mc_running_avg,
--                                        x,
--                                        best_sse,
--                                        zero_mv_sse,
--                                        recon_yoffset,
--                                        recon_uvoffset);
--
--  mv_row = x->e_mbd.best_sse_mv.as_mv.row;
--  mv_col = x->e_mbd.best_sse_mv.as_mv.col;
--  motion_magnitude2 = mv_row*mv_row + mv_col*mv_col;
--  if (best_sse > SSE_THRESHOLD ||
--      motion_magnitude2 > 8 * NOISE_MOTION_THRESHOLD)
--  {
--    // No filtering of this block since it differs too much from the predictor,
--    // or the motion vector magnitude is considered too big.
--    vp8_copy_mem16x16(x->thismb, 16,
--                      denoiser->yv12_running_avg.y_buffer + recon_yoffset,
--                      denoiser->yv12_running_avg.y_stride);
--    return;
--  }
--  // Filter.
--  denoiser_filter(&denoiser->yv12_mc_running_avg,
--                  &denoiser->yv12_running_avg,
--                  x,
--                  motion_magnitude2,
--                  recon_yoffset,
--                  recon_uvoffset);
-+                             int recon_uvoffset)
-+{
-+    int mv_row;
-+    int mv_col;
-+    unsigned int motion_magnitude2;
-+
-+    MV_REFERENCE_FRAME frame = x->best_reference_frame;
-+    MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
-+
-+    enum vp8_denoiser_decision decision = FILTER_BLOCK;
-+
-+    if (zero_frame)
-+    {
-+        YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame];
-+        YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg;
-+        YV12_BUFFER_CONFIG saved_pre,saved_dst;
-+        MB_MODE_INFO saved_mbmi;
-+        MACROBLOCKD *filter_xd = &x->e_mbd;
-+        MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi;
-+        int mv_col;
-+        int mv_row;
-+        int sse_diff = zero_mv_sse - best_sse;
-+
-+        saved_mbmi = *mbmi;
-+
-+        /* Use the best MV for the compensation. */
-+        mbmi->ref_frame = x->best_reference_frame;
-+        mbmi->mode = x->best_sse_inter_mode;
-+        mbmi->mv = x->best_sse_mv;
-+        mbmi->need_to_clamp_mvs = x->need_to_clamp_best_mvs;
-+        mv_col = x->best_sse_mv.as_mv.col;
-+        mv_row = x->best_sse_mv.as_mv.row;
-+
-+        if (frame == INTRA_FRAME ||
-+            ((unsigned int)(mv_row *mv_row + mv_col *mv_col)
-+              <= NOISE_MOTION_THRESHOLD &&
-+             sse_diff < (int)SSE_DIFF_THRESHOLD))
-+        {
-+            /*
-+             * Handle intra blocks as referring to last frame with zero motion
-+             * and let the absolute pixel difference affect the filter factor.
-+             * Also consider small amount of motion as being random walk due
-+             * to noise, if it doesn't mean that we get a much bigger error.
-+             * Note that any changes to the mode info only affects the
-+             * denoising.
-+             */
-+            mbmi->ref_frame =
-+                    x->best_zeromv_reference_frame;
-+
-+            src = &denoiser->yv12_running_avg[zero_frame];
-+
-+            mbmi->mode = ZEROMV;
-+            mbmi->mv.as_int = 0;
-+            x->best_sse_inter_mode = ZEROMV;
-+            x->best_sse_mv.as_int = 0;
-+            best_sse = zero_mv_sse;
-+        }
-+
-+        saved_pre = filter_xd->pre;
-+        saved_dst = filter_xd->dst;
-+
-+        /* Compensate the running average. */
-+        filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset;
-+        filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset;
-+        filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset;
-+        /* Write the compensated running average to the destination buffer. */
-+        filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset;
-+        filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset;
-+        filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset;
-+
-+        if (!x->skip)
-+        {
-+            vp8_build_inter_predictors_mb(filter_xd);
-+        }
-+        else
-+        {
-+            vp8_build_inter16x16_predictors_mb(filter_xd,
-+                                               filter_xd->dst.y_buffer,
-+                                               filter_xd->dst.u_buffer,
-+                                               filter_xd->dst.v_buffer,
-+                                               filter_xd->dst.y_stride,
-+                                               filter_xd->dst.uv_stride);
-+        }
-+        filter_xd->pre = saved_pre;
-+        filter_xd->dst = saved_dst;
-+        *mbmi = saved_mbmi;
-+
-+    }
-+
-+    mv_row = x->best_sse_mv.as_mv.row;
-+    mv_col = x->best_sse_mv.as_mv.col;
-+    motion_magnitude2 = mv_row * mv_row + mv_col * mv_col;
-+    if (best_sse > SSE_THRESHOLD || motion_magnitude2
-+           > 8 * NOISE_MOTION_THRESHOLD)
-+    {
-+        decision = COPY_BLOCK;
-+    }
-+
-+    if (decision == FILTER_BLOCK)
-+    {
-+        /* Filter. */
-+        decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
-+                                       &denoiser->yv12_running_avg[INTRA_FRAME],
-+                                       x,
-+                                       motion_magnitude2,
-+                                       recon_yoffset, recon_uvoffset);
-+    }
-+    if (decision == COPY_BLOCK)
-+    {
-+        /* No filtering of this block; it differs too much from the predictor,
-+         * or the motion vector magnitude is considered too big.
-+         */
-+        vp8_copy_mem16x16(
-+                x->thismb, 16,
-+                denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
-+                denoiser->yv12_running_avg[INTRA_FRAME].y_stride);
-+    }
- }
-diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h
-index 343531b..b025f5c 100644
---- a/vp8/encoder/denoising.h
-+++ b/vp8/encoder/denoising.h
-@@ -13,10 +13,19 @@
- 
- #include "block.h"
- 
-+#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
-+#define MOTION_MAGNITUDE_THRESHOLD (8*3)
-+
-+enum vp8_denoiser_decision
-+{
-+  COPY_BLOCK,
-+  FILTER_BLOCK
-+};
-+
- typedef struct vp8_denoiser
- {
--  YV12_BUFFER_CONFIG yv12_running_avg;
--  YV12_BUFFER_CONFIG yv12_mc_running_avg;
-+    YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES];
-+    YV12_BUFFER_CONFIG yv12_mc_running_avg;
- } VP8_DENOISER;
- 
- int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height);
-@@ -30,4 +39,4 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
-                              int recon_yoffset,
-                              int recon_uvoffset);
- 
--#endif  // VP8_ENCODER_DENOISING_H_
-+#endif  /* VP8_ENCODER_DENOISING_H_ */
-diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
-index 8233873..d1b647b 100644
---- a/vp8/encoder/encodeframe.c
-+++ b/vp8/encoder/encodeframe.c
-@@ -33,7 +33,7 @@
- #endif
- #include "encodeframe.h"
- 
--extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
-+extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ;
- extern void vp8_calc_ref_frame_costs(int *ref_frame_cost,
-                                      int prob_intra,
-                                      int prob_last,
-@@ -45,7 +45,6 @@ extern void vp8_auto_select_speed(VP8_COMP *cpi);
- extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
-                                       MACROBLOCK *x,
-                                       MB_ROW_COMP *mbr_ei,
--                                      int mb_row,
-                                       int count);
- static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x );
- 
-@@ -77,7 +76,7 @@ static const unsigned char VP8_VAR_OFFS[16]=
- };
- 
- 
--// Original activity measure from Tim T's code.
-+/* Original activity measure from Tim T's code. */
- static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
- {
-     unsigned int act;
-@@ -100,7 +99,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
-     return act;
- }
- 
--// Stub for alternative experimental activity measures.
-+/* Stub for alternative experimental activity measures. */
- static unsigned int alt_activity_measure( VP8_COMP *cpi,
-                                           MACROBLOCK *x, int use_dc_pred )
- {
-@@ -108,8 +107,9 @@ static unsigned int alt_activity_measure( VP8_COMP *cpi,
- }
- 
- 
--// Measure the activity of the current macroblock
--// What we measure here is TBD so abstracted to this function
-+/* Measure the activity of the current macroblock
-+ * What we measure here is TBD so abstracted to this function
-+ */
- #define ALT_ACT_MEASURE 1
- static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x,
-                                   int mb_row, int mb_col)
-@@ -120,12 +120,12 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x,
-     {
-         int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
- 
--        // Or use and alternative.
-+        /* Or use and alternative. */
-         mb_activity = alt_activity_measure( cpi, x, use_dc_pred );
-     }
-     else
-     {
--        // Original activity measure from Tim T's code.
-+        /* Original activity measure from Tim T's code. */
-         mb_activity = tt_activity_measure( cpi, x );
-     }
- 
-@@ -135,36 +135,36 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x,
-     return mb_activity;
- }
- 
--// Calculate an "average" mb activity value for the frame
-+/* Calculate an "average" mb activity value for the frame */
- #define ACT_MEDIAN 0
- static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
- {
- #if ACT_MEDIAN
--    // Find median: Simple n^2 algorithm for experimentation
-+    /* Find median: Simple n^2 algorithm for experimentation */
-     {
-         unsigned int median;
-         unsigned int i,j;
-         unsigned int * sortlist;
-         unsigned int tmp;
- 
--        // Create a list to sort to
-+        /* Create a list to sort to */
-         CHECK_MEM_ERROR(sortlist,
-                         vpx_calloc(sizeof(unsigned int),
-                         cpi->common.MBs));
- 
--        // Copy map to sort list
-+        /* Copy map to sort list */
-         vpx_memcpy( sortlist, cpi->mb_activity_map,
-                     sizeof(unsigned int) * cpi->common.MBs );
- 
- 
--        // Ripple each value down to its correct position
-+        /* Ripple each value down to its correct position */
-         for ( i = 1; i < cpi->common.MBs; i ++ )
-         {
-             for ( j = i; j > 0; j -- )
-             {
-                 if ( sortlist[j] < sortlist[j-1] )
-                 {
--                    // Swap values
-+                    /* Swap values */
-                     tmp = sortlist[j-1];
-                     sortlist[j-1] = sortlist[j];
-                     sortlist[j] = tmp;
-@@ -174,7 +174,7 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
-             }
-         }
- 
--        // Even number MBs so estimate median as mean of two either side.
-+        /* Even number MBs so estimate median as mean of two either side. */
-         median = ( 1 + sortlist[cpi->common.MBs >> 1] +
-                    sortlist[(cpi->common.MBs >> 1) + 1] ) >> 1;
- 
-@@ -183,14 +183,14 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
-         vpx_free(sortlist);
-     }
- #else
--    // Simple mean for now
-+    /* Simple mean for now */
-     cpi->activity_avg = (unsigned int)(activity_sum/cpi->common.MBs);
- #endif
- 
-     if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
-         cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
- 
--    // Experimental code: return fixed value normalized for several clips
-+    /* Experimental code: return fixed value normalized for several clips */
-     if  ( ALT_ACT_MEASURE )
-         cpi->activity_avg = 100000;
- }
-@@ -199,7 +199,7 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
- #define OUTPUT_NORM_ACT_STATS   0
- 
- #if USE_ACT_INDEX
--// Calculate and activity index for each mb
-+/* Calculate and activity index for each mb */
- static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
- {
-     VP8_COMMON *const cm = & cpi->common;
-@@ -214,19 +214,19 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
-     fprintf(f, "\n%12d\n", cpi->activity_avg );
- #endif
- 
--    // Reset pointers to start of activity map
-+    /* Reset pointers to start of activity map */
-     x->mb_activity_ptr = cpi->mb_activity_map;
- 
--    // Calculate normalized mb activity number.
-+    /* Calculate normalized mb activity number. */
-     for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
-     {
--        // for each macroblock col in image
-+        /* for each macroblock col in image */
-         for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-         {
--            // Read activity from the map
-+            /* Read activity from the map */
-             act = *(x->mb_activity_ptr);
- 
--            // Calculate a normalized activity number
-+            /* Calculate a normalized activity number */
-             a = act + 4*cpi->activity_avg;
-             b = 4*act + cpi->activity_avg;
- 
-@@ -238,7 +238,7 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
- #if OUTPUT_NORM_ACT_STATS
-             fprintf(f, " %6d", *(x->mb_activity_ptr));
- #endif
--            // Increment activity map pointers
-+            /* Increment activity map pointers */
-             x->mb_activity_ptr++;
-         }
- 
-@@ -255,8 +255,9 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
- }
- #endif
- 
--// Loop through all MBs. Note activity of each, average activity and
--// calculate a normalized activity for each
-+/* Loop through all MBs. Note activity of each, average activity and
-+ * calculate a normalized activity for each
-+ */
- static void build_activity_map( VP8_COMP *cpi )
- {
-     MACROBLOCK *const x = & cpi->mb;
-@@ -273,15 +274,15 @@ static void build_activity_map( VP8_COMP *cpi )
-     unsigned int mb_activity;
-     int64_t activity_sum = 0;
- 
--    // for each macroblock row in image
-+    /* for each macroblock row in image */
-     for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
-     {
- #if ALT_ACT_MEASURE
--        // reset above block coeffs
-+        /* reset above block coeffs */
-         xd->up_available = (mb_row != 0);
-         recon_yoffset = (mb_row * recon_y_stride * 16);
- #endif
--        // for each macroblock col in image
-+        /* for each macroblock col in image */
-         for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-         {
- #if ALT_ACT_MEASURE
-@@ -289,48 +290,48 @@ static void build_activity_map( VP8_COMP *cpi )
-             xd->left_available = (mb_col != 0);
-             recon_yoffset += 16;
- #endif
--            //Copy current mb to a buffer
-+            /* Copy current mb to a buffer */
-             vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
- 
--            // measure activity
-+            /* measure activity */
-             mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col );
- 
--            // Keep frame sum
-+            /* Keep frame sum */
-             activity_sum += mb_activity;
- 
--            // Store MB level activity details.
-+            /* Store MB level activity details. */
-             *x->mb_activity_ptr = mb_activity;
- 
--            // Increment activity map pointer
-+            /* Increment activity map pointer */
-             x->mb_activity_ptr++;
- 
--            // adjust to the next column of source macroblocks
-+            /* adjust to the next column of source macroblocks */
-             x->src.y_buffer += 16;
-         }
- 
- 
--        // adjust to the next row of mbs
-+        /* adjust to the next row of mbs */
-         x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
- 
- #if ALT_ACT_MEASURE
--        //extend the recon for intra prediction
-+        /* extend the recon for intra prediction */
-         vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16,
-                           xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
- #endif
- 
-     }
- 
--    // Calculate an "average" MB activity
-+    /* Calculate an "average" MB activity */
-     calc_av_activity(cpi, activity_sum);
- 
- #if USE_ACT_INDEX
--    // Calculate an activity index number of each mb
-+    /* Calculate an activity index number of each mb */
-     calc_activity_index( cpi, x );
- #endif
- 
- }
- 
--// Macroblock activity masking
-+/* Macroblock activity masking */
- void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
- {
- #if USE_ACT_INDEX
-@@ -342,7 +343,7 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
-     int64_t b;
-     int64_t act = *(x->mb_activity_ptr);
- 
--    // Apply the masking to the RD multiplier.
-+    /* Apply the masking to the RD multiplier. */
-     a = act + (2*cpi->activity_avg);
-     b = (2*act) + cpi->activity_avg;
- 
-@@ -351,7 +352,7 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
-     x->errorperbit += (x->errorperbit==0);
- #endif
- 
--    // Activity based Zbin adjustment
-+    /* Activity based Zbin adjustment */
-     adjust_act_zbin(cpi, x);
- }
- 
-@@ -398,7 +399,7 @@ void encode_mb_row(VP8_COMP *cpi,
-         w = &cpi->bc[1];
- #endif
- 
--    // reset above block coeffs
-+    /* reset above block coeffs */
-     xd->above_context = cm->above_context;
- 
-     xd->up_available = (mb_row != 0);
-@@ -406,37 +407,41 @@ void encode_mb_row(VP8_COMP *cpi,
-     recon_uvoffset = (mb_row * recon_uv_stride * 8);
- 
-     cpi->tplist[mb_row].start = *tp;
--    //printf("Main mb_row = %d\n", mb_row);
-+    /* printf("Main mb_row = %d\n", mb_row); */
- 
--    // Distance of Mb to the top & bottom edges, specified in 1/8th pel
--    // units as they are always compared to values that are in 1/8th pel units
-+    /* Distance of Mb to the top & bottom edges, specified in 1/8th pel
-+     * units as they are always compared to values that are in 1/8th pel
-+     */
-     xd->mb_to_top_edge = -((mb_row * 16) << 3);
-     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
- 
--    // Set up limit values for vertical motion vector components
--    // to prevent them extending beyond the UMV borders
-+    /* Set up limit values for vertical motion vector components
-+     * to prevent them extending beyond the UMV borders
-+     */
-     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
-     x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
-                         + (VP8BORDERINPIXELS - 16);
- 
--    // Set the mb activity pointer to the start of the row.
-+    /* Set the mb activity pointer to the start of the row. */
-     x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
- 
--    // for each macroblock col in image
-+    /* for each macroblock col in image */
-     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-     {
- 
- #if  (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
-         *tp = cpi->tok;
- #endif
--        // Distance of Mb to the left & right edges, specified in
--        // 1/8th pel units as they are always compared to values
--        // that are in 1/8th pel units
-+        /* Distance of Mb to the left & right edges, specified in
-+         * 1/8th pel units as they are always compared to values
-+         * that are in 1/8th pel units
-+         */
-         xd->mb_to_left_edge = -((mb_col * 16) << 3);
-         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
- 
--        // Set up limit values for horizontal motion vector components
--        // to prevent them extending beyond the UMV borders
-+        /* Set up limit values for horizontal motion vector components
-+         * to prevent them extending beyond the UMV borders
-+         */
-         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
-         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
-                             + (VP8BORDERINPIXELS - 16);
-@@ -449,13 +454,13 @@ void encode_mb_row(VP8_COMP *cpi,
-         x->rddiv = cpi->RDDIV;
-         x->rdmult = cpi->RDMULT;
- 
--        //Copy current mb to a buffer
-+        /* Copy current mb to a buffer */
-         vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
- 
- #if CONFIG_MULTITHREAD
-         if (cpi->b_multi_threaded != 0)
-         {
--            *current_mb_col = mb_col - 1; // set previous MB done
-+            *current_mb_col = mb_col - 1; /* set previous MB done */
- 
-             if ((mb_col & (nsync - 1)) == 0)
-             {
-@@ -471,11 +476,13 @@ void encode_mb_row(VP8_COMP *cpi,
-         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
-             vp8_activity_masking(cpi, x);
- 
--        // Is segmentation enabled
--        // MB level adjustment to quantizer
-+        /* Is segmentation enabled */
-+        /* MB level adjustment to quantizer */
-         if (xd->segmentation_enabled)
-         {
--            // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
-+            /* Code to set segment id in xd->mbmi.segment_id for current MB
-+             * (with range checking)
-+             */
-             if (cpi->segmentation_map[map_index+mb_col] <= 3)
-                 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col];
-             else
-@@ -484,7 +491,8 @@ void encode_mb_row(VP8_COMP *cpi,
-             vp8cx_mb_init_quantizer(cpi, x, 1);
-         }
-         else
--            xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
-+            /* Set to Segment 0 by default */
-+            xd->mode_info_context->mbmi.segment_id = 0;
- 
-         x->active_ptr = cpi->active_map + map_index + mb_col;
- 
-@@ -514,21 +522,25 @@ void encode_mb_row(VP8_COMP *cpi,
- 
- #endif
- 
--            // Count of last ref frame 0,0 usage
--            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
--                cpi->inter_zz_count ++;
--
--            // Special case code for cyclic refresh
--            // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
--            // during vp8cx_encode_inter_macroblock()) back into the global segmentation map
-+            /* Special case code for cyclic refresh
-+             * If cyclic update enabled then copy xd->mbmi.segment_id; (which
-+             * may have been updated based on mode during
-+             * vp8cx_encode_inter_macroblock()) back into the global
-+             * segmentation map
-+             */
-             if ((cpi->current_layer == 0) &&
--                (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled))
-+                (cpi->cyclic_refresh_mode_enabled &&
-+                 xd->segmentation_enabled))
-             {
-                 cpi->segmentation_map[map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
- 
--                // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
--                // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
--                // else mark it as dirty (1).
-+                /* If the block has been refreshed mark it as clean (the
-+                 * magnitude of the -ve influences how long it will be before
-+                 * we consider another refresh):
-+                 * Else if it was coded (last frame 0,0) and has not already
-+                 * been refreshed then mark it as a candidate for cleanup
-+                 * next time (marked 0) else mark it as dirty (1).
-+                 */
-                 if (xd->mode_info_context->mbmi.segment_id)
-                     cpi->cyclic_refresh_map[map_index+mb_col] = -1;
-                 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
-@@ -551,13 +563,13 @@ void encode_mb_row(VP8_COMP *cpi,
-             pack_tokens(w, tp_start, tok_count);
-         }
- #endif
--        // Increment pointer into gf usage flags structure.
-+        /* Increment pointer into gf usage flags structure. */
-         x->gf_active_ptr++;
- 
--        // Increment the activity mask pointers.
-+        /* Increment the activity mask pointers. */
-         x->mb_activity_ptr++;
- 
--        // adjust to the next column of macroblocks
-+        /* adjust to the next column of macroblocks */
-         x->src.y_buffer += 16;
-         x->src.u_buffer += 8;
-         x->src.v_buffer += 8;
-@@ -565,16 +577,16 @@ void encode_mb_row(VP8_COMP *cpi,
-         recon_yoffset += 16;
-         recon_uvoffset += 8;
- 
--        // Keep track of segment usage
-+        /* Keep track of segment usage */
-         segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
- 
--        // skip to next mb
-+        /* skip to next mb */
-         xd->mode_info_context++;
-         x->partition_info++;
-         xd->above_context++;
-     }
- 
--    //extend the recon for intra prediction
-+    /* extend the recon for intra prediction */
-     vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx],
-                         xd->dst.y_buffer + 16,
-                         xd->dst.u_buffer + 8,
-@@ -585,7 +597,7 @@ void encode_mb_row(VP8_COMP *cpi,
-         *current_mb_col = rightmost_col;
- #endif
- 
--    // this is to account for the border
-+    /* this is to account for the border */
-     xd->mode_info_context++;
-     x->partition_info++;
- }
-@@ -596,10 +608,10 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi)
-     VP8_COMMON *const cm = & cpi->common;
-     MACROBLOCKD *const xd = & x->e_mbd;
- 
--    // GF active flags data structure
-+    /* GF active flags data structure */
-     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
- 
--    // Activity map pointer
-+    /* Activity map pointer */
-     x->mb_activity_ptr = cpi->mb_activity_map;
- 
-     x->act_zbin_adj = 0;
-@@ -611,48 +623,42 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi)
- 
-     xd->frame_type = cm->frame_type;
- 
--    // reset intra mode contexts
-+    /* reset intra mode contexts */
-     if (cm->frame_type == KEY_FRAME)
-         vp8_init_mbmode_probs(cm);
- 
--    // Copy data over into macro block data structures.
-+    /* Copy data over into macro block data structures. */
-     x->src = * cpi->Source;
-     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
-     xd->dst = cm->yv12_fb[cm->new_fb_idx];
- 
--    // set up frame for intra coded blocks
-+    /* set up frame for intra coded blocks */
-     vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
- 
-     vp8_build_block_offsets(x);
- 
--    vp8_setup_block_dptrs(&x->e_mbd);
--
--    vp8_setup_block_ptrs(x);
--
-     xd->mode_info_context->mbmi.mode = DC_PRED;
-     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
- 
-     xd->left_context = &cm->left_context;
- 
--    vp8_zero(cpi->count_mb_ref_frame_usage)
--    vp8_zero(cpi->ymode_count)
--    vp8_zero(cpi->uv_mode_count)
--
-     x->mvc = cm->fc.mvc;
- 
-     vpx_memset(cm->above_context, 0,
-                sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
- 
--    // Special case treatment when GF and ARF are not sensible options for reference
--    if (cpi->ref_frame_flags == VP8_LAST_FLAG)
-+    /* Special case treatment when GF and ARF are not sensible options
-+     * for reference
-+     */
-+    if (cpi->ref_frame_flags == VP8_LAST_FRAME)
-         vp8_calc_ref_frame_costs(x->ref_frame_cost,
-                                  cpi->prob_intra_coded,255,128);
-     else if ((cpi->oxcf.number_of_layers > 1) &&
--               (cpi->ref_frame_flags == VP8_GOLD_FLAG))
-+               (cpi->ref_frame_flags == VP8_GOLD_FRAME))
-         vp8_calc_ref_frame_costs(x->ref_frame_cost,
-                                  cpi->prob_intra_coded,1,255);
-     else if ((cpi->oxcf.number_of_layers > 1) &&
--                (cpi->ref_frame_flags == VP8_ALT_FLAG))
-+                (cpi->ref_frame_flags == VP8_ALTR_FRAME))
-         vp8_calc_ref_frame_costs(x->ref_frame_cost,
-                                  cpi->prob_intra_coded,1,1);
-     else
-@@ -664,6 +670,43 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi)
-     xd->fullpixel_mask = 0xffffffff;
-     if(cm->full_pixel)
-         xd->fullpixel_mask = 0xfffffff8;
-+
-+    vp8_zero(x->coef_counts);
-+    vp8_zero(x->ymode_count);
-+    vp8_zero(x->uv_mode_count)
-+    x->prediction_error = 0;
-+    x->intra_error = 0;
-+    vp8_zero(x->count_mb_ref_frame_usage);
-+}
-+
-+static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread)
-+{
-+    int i = 0;
-+    do
-+    {
-+        int j = 0;
-+        do
-+        {
-+            int k = 0;
-+            do
-+            {
-+                /* at every context */
-+
-+                /* calc probs and branch cts for this frame only */
-+                int t = 0;      /* token/prob index */
-+
-+                do
-+                {
-+                    x->coef_counts [i][j][k][t] +=
-+                        x_thread->coef_counts [i][j][k][t];
-+                }
-+                while (++t < ENTROPY_NODES);
-+            }
-+            while (++k < PREV_COEF_CONTEXTS);
-+        }
-+        while (++j < COEF_BANDS);
-+    }
-+    while (++i < BLOCK_TYPES);
- }
- 
- void vp8_encode_frame(VP8_COMP *cpi)
-@@ -676,7 +719,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
-     int segment_counts[MAX_MB_SEGMENTS];
-     int totalrate;
- #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
--    BOOL_CODER * bc = &cpi->bc[1]; // bc[0] is for control partition
-+    BOOL_CODER * bc = &cpi->bc[1]; /* bc[0] is for control partition */
-     const int num_part = (1 << cm->multi_token_partition);
- #endif
- 
-@@ -691,8 +734,8 @@ void vp8_encode_frame(VP8_COMP *cpi)
-             vp8_auto_select_speed(cpi);
-     }
- 
--    // Functions setup for all frame types so we can use MC in AltRef
--    if (cm->mcomp_filter_type == SIXTAP)
-+    /* Functions setup for all frame types so we can use MC in AltRef */
-+    if(!cm->use_bilinear_mc_filter)
-     {
-         xd->subpixel_predict        = vp8_sixtap_predict4x4;
-         xd->subpixel_predict8x4     = vp8_sixtap_predict8x4;
-@@ -707,43 +750,36 @@ void vp8_encode_frame(VP8_COMP *cpi)
-         xd->subpixel_predict16x16   = vp8_bilinear_predict16x16;
-     }
- 
--    // Reset frame count of inter 0,0 motion vector usage.
--    cpi->inter_zz_count = 0;
--
--    cpi->prediction_error = 0;
--    cpi->intra_error = 0;
--    cpi->skip_true_count = 0;
-+    cpi->mb.skip_true_count = 0;
-     cpi->tok_count = 0;
- 
- #if 0
--    // Experimental code
-+    /* Experimental code */
-     cpi->frame_distortion = 0;
-     cpi->last_mb_distortion = 0;
- #endif
- 
-     xd->mode_info_context = cm->mi;
- 
--    vp8_zero(cpi->MVcount);
--
--    vp8_zero(cpi->coef_counts);
-+    vp8_zero(cpi->mb.MVcount);
- 
-     vp8cx_frame_init_quantizer(cpi);
- 
--    vp8_initialize_rd_consts(cpi,
-+    vp8_initialize_rd_consts(cpi, x,
-                              vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
- 
-     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
- 
-     if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
-     {
--        // Initialize encode frame context.
-+        /* Initialize encode frame context. */
-         init_encode_frame_mb_context(cpi);
- 
--        // Build a frame level activity map
-+        /* Build a frame level activity map */
-         build_activity_map(cpi);
-     }
- 
--    // re-init encode frame context.
-+    /* re-init encode frame context. */
-     init_encode_frame_mb_context(cpi);
- 
- #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
-@@ -768,7 +804,8 @@ void vp8_encode_frame(VP8_COMP *cpi)
-         {
-             int i;
- 
--            vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
-+            vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei,
-+                                      cpi->encoding_thread_count);
- 
-             for (i = 0; i < cm->mb_rows; i++)
-                 cpi->mt_current_mb_col[i] = -1;
-@@ -790,7 +827,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
- 
-                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
- 
--                // adjust to the next row of mbs
-+                /* adjust to the next row of mbs */
-                 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
-                 x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
-                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
-@@ -809,7 +846,8 @@ void vp8_encode_frame(VP8_COMP *cpi)
- 
-             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
-             {
--                cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
-+                cpi->tok_count += (unsigned int)
-+                  (cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start);
-             }
- 
-             if (xd->segmentation_enabled)
-@@ -829,14 +867,50 @@ void vp8_encode_frame(VP8_COMP *cpi)
- 
-             for (i = 0; i < cpi->encoding_thread_count; i++)
-             {
-+                int mode_count;
-+                int c_idx;
-                 totalrate += cpi->mb_row_ei[i].totalrate;
-+
-+                cpi->mb.skip_true_count += cpi->mb_row_ei[i].mb.skip_true_count;
-+
-+                for(mode_count = 0; mode_count < VP8_YMODES; mode_count++)
-+                    cpi->mb.ymode_count[mode_count] +=
-+                        cpi->mb_row_ei[i].mb.ymode_count[mode_count];
-+
-+                for(mode_count = 0; mode_count < VP8_UV_MODES; mode_count++)
-+                    cpi->mb.uv_mode_count[mode_count] +=
-+                        cpi->mb_row_ei[i].mb.uv_mode_count[mode_count];
-+
-+                for(c_idx = 0; c_idx < MVvals; c_idx++)
-+                {
-+                    cpi->mb.MVcount[0][c_idx] +=
-+                        cpi->mb_row_ei[i].mb.MVcount[0][c_idx];
-+                    cpi->mb.MVcount[1][c_idx] +=
-+                        cpi->mb_row_ei[i].mb.MVcount[1][c_idx];
-+                }
-+
-+                cpi->mb.prediction_error +=
-+                    cpi->mb_row_ei[i].mb.prediction_error;
-+                cpi->mb.intra_error += cpi->mb_row_ei[i].mb.intra_error;
-+
-+                for(c_idx = 0; c_idx < MAX_REF_FRAMES; c_idx++)
-+                    cpi->mb.count_mb_ref_frame_usage[c_idx] +=
-+                        cpi->mb_row_ei[i].mb.count_mb_ref_frame_usage[c_idx];
-+
-+                for(c_idx = 0; c_idx < MAX_ERROR_BINS; c_idx++)
-+                    cpi->mb.error_bins[c_idx] +=
-+                        cpi->mb_row_ei[i].mb.error_bins[c_idx];
-+
-+                /* add up counts for each thread */
-+                sum_coef_counts(x, &cpi->mb_row_ei[i].mb);
-             }
- 
-         }
-         else
- #endif
-         {
--            // for each macroblock row in image
-+
-+            /* for each macroblock row in image */
-             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
-             {
-                 vp8_zero(cm->left_context)
-@@ -847,13 +921,13 @@ void vp8_encode_frame(VP8_COMP *cpi)
- 
-                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
- 
--                // adjust to the next row of mbs
-+                /* adjust to the next row of mbs */
-                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
-                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
-                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
-             }
- 
--            cpi->tok_count = tp - cpi->tok;
-+            cpi->tok_count = (unsigned int)(tp - cpi->tok);
-         }
- 
- #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
-@@ -873,12 +947,13 @@ void vp8_encode_frame(VP8_COMP *cpi)
- 
- 
-     // Work out the segment probabilities if segmentation is enabled
--    if (xd->segmentation_enabled)
-+    // and needs to be updated
-+    if (xd->segmentation_enabled && xd->update_mb_segmentation_map)
-     {
-         int tot_count;
-         int i;
- 
--        // Set to defaults
-+        /* Set to defaults */
-         vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
- 
-         tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
-@@ -899,7 +974,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
-             if (tot_count > 0)
-                 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
- 
--            // Zero probabilities not allowed
-+            /* Zero probabilities not allowed */
-             for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
-             {
-                 if (xd->mb_segment_tree_probs[i] == 0)
-@@ -908,10 +983,10 @@ void vp8_encode_frame(VP8_COMP *cpi)
-         }
-     }
- 
--    // 256 rate units to the bit
--    cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
-+    /* projected_frame_size in units of BYTES */
-+    cpi->projected_frame_size = totalrate >> 8;
- 
--    // Make a note of the percentage MBs coded Intra.
-+    /* Make a note of the percentage MBs coded Intra. */
-     if (cm->frame_type == KEY_FRAME)
-     {
-         cpi->this_frame_percent_intra = 100;
-@@ -920,50 +995,23 @@ void vp8_encode_frame(VP8_COMP *cpi)
-     {
-         int tot_modes;
- 
--        tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
--                    + cpi->count_mb_ref_frame_usage[LAST_FRAME]
--                    + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
--                    + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
-+        tot_modes = cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME]
-+                    + cpi->mb.count_mb_ref_frame_usage[LAST_FRAME]
-+                    + cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME]
-+                    + cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME];
- 
-         if (tot_modes)
--            cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
--
--    }
--
--#if 0
--    {
--        int cnt = 0;
--        int flag[2] = {0, 0};
--
--        for (cnt = 0; cnt < MVPcount; cnt++)
--        {
--            if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
--            {
--                flag[0] = 1;
--                vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
--                break;
--            }
--        }
--
--        for (cnt = 0; cnt < MVPcount; cnt++)
--        {
--            if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
--            {
--                flag[1] = 1;
--                vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
--                break;
--            }
--        }
-+            cpi->this_frame_percent_intra =
-+                cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
- 
--        if (flag[0] || flag[1])
--            vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
-     }
--#endif
- 
- #if ! CONFIG_REALTIME_ONLY
--    // Adjust the projected reference frame usage probability numbers to reflect
--    // what we have just seen. This may be useful when we make multiple iterations
--    // of the recode loop rather than continuing to use values from the previous frame.
-+    /* Adjust the projected reference frame usage probability numbers to
-+     * reflect what we have just seen. This may be useful when we make
-+     * multiple iterations of the recode loop rather than continuing to use
-+     * values from the previous frame.
-+     */
-     if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) ||
-         (!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)))
-     {
-@@ -1017,16 +1065,13 @@ void vp8_build_block_offsets(MACROBLOCK *x)
- 
-     vp8_build_block_doffsets(&x->e_mbd);
- 
--    // y blocks
-+    /* y blocks */
-     x->thismb_ptr = &x->thismb[0];
-     for (br = 0; br < 4; br++)
-     {
-         for (bc = 0; bc < 4; bc++)
-         {
-             BLOCK *this_block = &x->block[block];
--            //this_block->base_src = &x->src.y_buffer;
--            //this_block->src_stride = x->src.y_stride;
--            //this_block->src = 4 * br * this_block->src_stride + 4 * bc;
-             this_block->base_src = &x->thismb_ptr;
-             this_block->src_stride = 16;
-             this_block->src = 4 * br * 16 + 4 * bc;
-@@ -1034,7 +1079,7 @@ void vp8_build_block_offsets(MACROBLOCK *x)
-         }
-     }
- 
--    // u blocks
-+    /* u blocks */
-     for (br = 0; br < 2; br++)
-     {
-         for (bc = 0; bc < 2; bc++)
-@@ -1047,7 +1092,7 @@ void vp8_build_block_offsets(MACROBLOCK *x)
-         }
-     }
- 
--    // v blocks
-+    /* v blocks */
-     for (br = 0; br < 2; br++)
-     {
-         for (bc = 0; bc < 2; bc++)
-@@ -1087,13 +1132,14 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
- 
- #endif
- 
--    ++cpi->ymode_count[m];
--    ++cpi->uv_mode_count[uvm];
-+    ++x->ymode_count[m];
-+    ++x->uv_mode_count[uvm];
- 
- }
- 
--// Experimental stub function to create a per MB zbin adjustment based on
--// some previously calculated measure of MB activity.
-+/* Experimental stub function to create a per MB zbin adjustment based on
-+ * some previously calculated measure of MB activity.
-+ */
- static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
- {
- #if USE_ACT_INDEX
-@@ -1103,7 +1149,7 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
-     int64_t b;
-     int64_t act = *(x->mb_activity_ptr);
- 
--    // Apply the masking to the RD multiplier.
-+    /* Apply the masking to the RD multiplier. */
-     a = act + 4*cpi->activity_avg;
-     b = 4*act + cpi->activity_avg;
- 
-@@ -1114,15 +1160,16 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
- #endif
- }
- 
--int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
-+int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
-+                                  TOKENEXTRA **t)
- {
-     MACROBLOCKD *xd = &x->e_mbd;
-     int rate;
- 
-     if (cpi->sf.RD && cpi->compressor_speed != 2)
--        vp8_rd_pick_intra_mode(cpi, x, &rate);
-+        vp8_rd_pick_intra_mode(x, &rate);
-     else
--        vp8_pick_intra_mode(cpi, x, &rate);
-+        vp8_pick_intra_mode(x, &rate);
- 
-     if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
-     {
-@@ -1139,7 +1186,7 @@ int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
- 
-     sum_intra_stats(cpi, x);
- 
--    vp8_tokenize_mb(cpi, &x->e_mbd, t);
-+    vp8_tokenize_mb(cpi, x, t);
- 
-     if (xd->mode_info_context->mbmi.mode != B_PRED)
-         vp8_inverse_transform_mby(xd);
-@@ -1176,25 +1223,27 @@ int vp8cx_encode_inter_macroblock
-         x->encode_breakout = cpi->oxcf.encode_breakout;
- 
- #if CONFIG_TEMPORAL_DENOISING
--    // Reset the best sse mode/mv for each macroblock.
--    x->e_mbd.best_sse_inter_mode = 0;
--    x->e_mbd.best_sse_mv.as_int = 0;
--    x->e_mbd.need_to_clamp_best_mvs = 0;
-+    /* Reset the best sse mode/mv for each macroblock. */
-+    x->best_reference_frame = INTRA_FRAME;
-+    x->best_zeromv_reference_frame = INTRA_FRAME;
-+    x->best_sse_inter_mode = 0;
-+    x->best_sse_mv.as_int = 0;
-+    x->need_to_clamp_best_mvs = 0;
- #endif
- 
-     if (cpi->sf.RD)
-     {
--        int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
-+        int zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;
- 
-         /* Are we using the fast quantizer for the mode selection? */
-         if(cpi->sf.use_fastquant_for_pick)
-         {
--            cpi->mb.quantize_b      = vp8_fast_quantize_b;
--            cpi->mb.quantize_b_pair = vp8_fast_quantize_b_pair;
-+            x->quantize_b      = vp8_fast_quantize_b;
-+            x->quantize_b_pair = vp8_fast_quantize_b_pair;
- 
-             /* the fast quantizer does not use zbin_extra, so
-              * do not recalculate */
--            cpi->zbin_mode_boost_enabled = 0;
-+            x->zbin_mode_boost_enabled = 0;
-         }
-         vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                                &distortion, &intra_error);
-@@ -1202,12 +1251,12 @@ int vp8cx_encode_inter_macroblock
-         /* switch back to the regular quantizer for the encode */
-         if (cpi->sf.improved_quant)
-         {
--            cpi->mb.quantize_b      = vp8_regular_quantize_b;
--            cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair;
-+            x->quantize_b      = vp8_regular_quantize_b;
-+            x->quantize_b_pair = vp8_regular_quantize_b_pair;
-         }
- 
-         /* restore cpi->zbin_mode_boost_enabled */
--        cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
-+        x->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
- 
-     }
-     else
-@@ -1216,28 +1265,28 @@ int vp8cx_encode_inter_macroblock
-                             &distortion, &intra_error, mb_row, mb_col);
-     }
- 
--    cpi->prediction_error += distortion;
--    cpi->intra_error += intra_error;
-+    x->prediction_error += distortion;
-+    x->intra_error += intra_error;
- 
-     if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
-     {
--        // Adjust the zbin based on this MB rate.
-+        /* Adjust the zbin based on this MB rate. */
-         adjust_act_zbin( cpi, x );
-     }
- 
- #if 0
--    // Experimental RD code
-+    /* Experimental RD code */
-     cpi->frame_distortion += distortion;
-     cpi->last_mb_distortion = distortion;
- #endif
- 
--    // MB level adjutment to quantizer setup
-+    /* MB level adjutment to quantizer setup */
-     if (xd->segmentation_enabled)
-     {
--        // If cyclic update enabled
-+        /* If cyclic update enabled */
-         if (cpi->current_layer == 0 && cpi->cyclic_refresh_mode_enabled)
-         {
--            // Clear segment_id back to 0 if not coded (last frame 0,0)
-+            /* Clear segment_id back to 0 if not coded (last frame 0,0) */
-             if ((xd->mode_info_context->mbmi.segment_id == 1) &&
-                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
-             {
-@@ -1250,24 +1299,25 @@ int vp8cx_encode_inter_macroblock
-     }
- 
-     {
--        // Experimental code. Special case for gf and arf zeromv modes.
--        // Increase zbin size to supress noise
--        cpi->zbin_mode_boost = 0;
--        if (cpi->zbin_mode_boost_enabled)
-+        /* Experimental code. Special case for gf and arf zeromv modes.
-+         * Increase zbin size to supress noise
-+         */
-+        x->zbin_mode_boost = 0;
-+        if (x->zbin_mode_boost_enabled)
-         {
-             if ( xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME )
-             {
-                 if (xd->mode_info_context->mbmi.mode == ZEROMV)
-                 {
-                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
--                        cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
-+                        x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
-                     else
--                        cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
-+                        x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
-                 }
-                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
--                    cpi->zbin_mode_boost = 0;
-+                    x->zbin_mode_boost = 0;
-                 else
--                    cpi->zbin_mode_boost = MV_ZBIN_BOOST;
-+                    x->zbin_mode_boost = MV_ZBIN_BOOST;
-             }
-         }
- 
-@@ -1277,7 +1327,7 @@ int vp8cx_encode_inter_macroblock
-             vp8_update_zbin_extra(cpi, x);
-     }
- 
--    cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
-+    x->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
- 
-     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
-     {
-@@ -1322,7 +1372,7 @@ int vp8cx_encode_inter_macroblock
- 
-     if (!x->skip)
-     {
--        vp8_tokenize_mb(cpi, xd, t);
-+        vp8_tokenize_mb(cpi, x, t);
- 
-         if (xd->mode_info_context->mbmi.mode != B_PRED)
-             vp8_inverse_transform_mby(xd);
-@@ -1339,12 +1389,12 @@ int vp8cx_encode_inter_macroblock
- 
-         if (cpi->common.mb_no_coeff_skip)
-         {
--            cpi->skip_true_count ++;
-+            x->skip_true_count ++;
-             vp8_fix_contexts(xd);
-         }
-         else
-         {
--            vp8_stuff_mb(cpi, xd, t);
-+            vp8_stuff_mb(cpi, x, t);
-         }
-     }
- 
-diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
-index 1f445b7..340dd63 100644
---- a/vp8/encoder/encodeintra.c
-+++ b/vp8/encoder/encodeintra.c
-@@ -54,10 +54,13 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib)
-     BLOCKD *b = &x->e_mbd.block[ib];
-     BLOCK *be = &x->block[ib];
-     int dst_stride = x->e_mbd.dst.y_stride;
--    unsigned char *base_dst = x->e_mbd.dst.y_buffer;
-+    unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
-+    unsigned char *Above = dst - dst_stride;
-+    unsigned char *yleft = dst - 1;
-+    unsigned char top_left = Above[-1];
- 
--    vp8_intra4x4_predict(base_dst + b->offset, dst_stride,
--                 b->bmi.as_mode, b->predictor, 16);
-+    vp8_intra4x4_predict(Above, yleft, dst_stride, b->bmi.as_mode,
-+                         b->predictor, 16, top_left);
- 
-     vp8_subtract_b(be, b, 16);
- 
-@@ -67,14 +70,11 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib)
- 
-     if (*b->eob > 1)
-     {
--      vp8_short_idct4x4llm(b->dqcoeff,
--            b->predictor, 16, base_dst + b->offset, dst_stride);
-+      vp8_short_idct4x4llm(b->dqcoeff, b->predictor, 16, dst, dst_stride);
-     }
-     else
-     {
--      vp8_dc_only_idct_add
--            (b->dqcoeff[0], b->predictor, 16, base_dst + b->offset,
--                dst_stride);
-+      vp8_dc_only_idct_add(b->dqcoeff[0], b->predictor, 16, dst, dst_stride);
-     }
- }
- 
-diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
-index f89e4f7..7d494f2 100644
---- a/vp8/encoder/encodemb.c
-+++ b/vp8/encoder/encodemb.c
-@@ -137,10 +137,10 @@ void vp8_transform_intra_mby(MACROBLOCK *x)
-             &x->block[i].coeff[0], 32);
-     }
- 
--    // build dc block from 16 y dc values
-+    /* build dc block from 16 y dc values */
-     build_dcblock(x);
- 
--    // do 2nd order transform on the dc block
-+    /* do 2nd order transform on the dc block */
-     x->short_walsh4x4(&x->block[24].src_diff[0],
-         &x->block[24].coeff[0], 8);
- 
-@@ -157,7 +157,7 @@ static void transform_mb(MACROBLOCK *x)
-             &x->block[i].coeff[0], 32);
-     }
- 
--    // build dc block from 16 y dc values
-+    /* build dc block from 16 y dc values */
-     if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
-         build_dcblock(x);
- 
-@@ -167,7 +167,7 @@ static void transform_mb(MACROBLOCK *x)
-             &x->block[i].coeff[0], 16);
-     }
- 
--    // do 2nd order transform on the dc block
-+    /* do 2nd order transform on the dc block */
-     if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
-         x->short_walsh4x4(&x->block[24].src_diff[0],
-         &x->block[24].coeff[0], 8);
-@@ -185,7 +185,7 @@ static void transform_mby(MACROBLOCK *x)
-             &x->block[i].coeff[0], 32);
-     }
- 
--    // build dc block from 16 y dc values
-+    /* build dc block from 16 y dc values */
-     if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
-     {
-         build_dcblock(x);
-@@ -208,7 +208,7 @@ struct vp8_token_state{
-   short         qc;
- };
- 
--// TODO: experiments to find optimal multiple numbers
-+/* TODO: experiments to find optimal multiple numbers */
- #define Y1_RD_MULT 4
- #define UV_RD_MULT 2
- #define Y2_RD_MULT 16
-diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
-index 0145f6d..0c43d06 100644
---- a/vp8/encoder/encodemv.c
-+++ b/vp8/encoder/encodemv.c
-@@ -29,15 +29,15 @@ static void encode_mvcomponent(
-     const vp8_prob *p = mvc->prob;
-     const int x = v < 0 ? -v : v;
- 
--    if (x < mvnum_short)     // Small
-+    if (x < mvnum_short)     /* Small */
-     {
-         vp8_write(w, 0, p [mvpis_short]);
-         vp8_treed_write(w, vp8_small_mvtree, p + MVPshort, x, 3);
- 
-         if (!x)
--            return;         // no sign bit
-+            return;         /* no sign bit */
-     }
--    else                    // Large
-+    else                    /* Large */
-     {
-         int i = 0;
- 
-@@ -100,7 +100,7 @@ void vp8_encode_motion_vector(vp8_writer *w, const MV *mv, const MV_CONTEXT *mvc
- static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc)
- {
-     const vp8_prob *p = mvc->prob;
--    const int x = v;   //v<0? -v:v;
-+    const int x = v;
-     unsigned int cost;
- 
-     if (x < mvnum_short)
-@@ -132,12 +132,12 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc)
-             cost += vp8_cost_bit(p [MVPbits + 3], (x >> 3) & 1);
-     }
- 
--    return cost;   // + vp8_cost_bit( p [MVPsign], v < 0);
-+    return cost;   /* + vp8_cost_bit( p [MVPsign], v < 0); */
- }
- 
- void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2])
- {
--    int i = 1;   //-mv_max;
-+    int i = 1;
-     unsigned int cost0 = 0;
-     unsigned int cost1 = 0;
- 
-@@ -151,7 +151,6 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m
- 
-         do
-         {
--            //mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]);
-             cost0 = cost_mvcomponent(i, &mvc[0]);
- 
-             mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign]);
-@@ -168,7 +167,6 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m
- 
-         do
-         {
--            //mvcost [1] [i] = cost_mvcomponent( i, mvc[1]);
-             cost1 = cost_mvcomponent(i, &mvc[1]);
- 
-             mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign]);
-@@ -179,10 +177,10 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m
- }
- 
- 
--// Motion vector probability table update depends on benefit.
--// Small correction allows for the fact that an update to an MV probability
--// may have benefit in subsequent frames as well as the current one.
--
-+/* Motion vector probability table update depends on benefit.
-+ * Small correction allows for the fact that an update to an MV probability
-+ * may have benefit in subsequent frames as well as the current one.
-+ */
- #define MV_PROB_UPDATE_CORRECTION   -1
- 
- 
-@@ -254,22 +252,22 @@ static void write_component_probs(
-     vp8_zero(short_bct)
- 
- 
--    //j=0
-+    /* j=0 */
-     {
-         const int c = events [mv_max];
- 
--        is_short_ct [0] += c;     // Short vector
--        short_ct [0] += c;       // Magnitude distribution
-+        is_short_ct [0] += c;     /* Short vector */
-+        short_ct [0] += c;       /* Magnitude distribution */
-     }
- 
--    //j: 1 ~ mv_max (1023)
-+    /* j: 1 ~ mv_max (1023) */
-     {
-         int j = 1;
- 
-         do
-         {
--            const int c1 = events [mv_max + j];  //positive
--            const int c2 = events [mv_max - j];  //negative
-+            const int c1 = events [mv_max + j];  /* positive */
-+            const int c2 = events [mv_max - j];  /* negative */
-             const int c  = c1 + c2;
-             int a = j;
- 
-@@ -278,13 +276,13 @@ static void write_component_probs(
- 
-             if (a < mvnum_short)
-             {
--                is_short_ct [0] += c;     // Short vector
--                short_ct [a] += c;       // Magnitude distribution
-+                is_short_ct [0] += c;     /* Short vector */
-+                short_ct [a] += c;       /* Magnitude distribution */
-             }
-             else
-             {
-                 int k = mvlong_width - 1;
--                is_short_ct [1] += c;     // Long vector
-+                is_short_ct [1] += c;     /* Long vector */
- 
-                 /*  bit 3 not always encoded. */
-                 do
-@@ -296,43 +294,6 @@ static void write_component_probs(
-         while (++j <= mv_max);
-     }
- 
--    /*
--    {
--        int j = -mv_max;
--        do
--        {
--
--            const int c = events [mv_max + j];
--            int a = j;
--
--            if( j < 0)
--            {
--                sign_ct [1] += c;
--                a = -j;
--            }
--            else if( j)
--                sign_ct [0] += c;
--
--            if( a < mvnum_short)
--            {
--                is_short_ct [0] += c;     // Short vector
--                short_ct [a] += c;       // Magnitude distribution
--            }
--            else
--            {
--                int k = mvlong_width - 1;
--                is_short_ct [1] += c;     // Long vector
--
--                //  bit 3 not always encoded.
--
--                do
--                    bit_ct [k] [(a >> k) & 1] += c;
--                while( --k >= 0);
--            }
--        } while( ++j <= mv_max);
--    }
--    */
--
-     calc_prob(Pnew + mvpis_short, is_short_ct);
- 
-     calc_prob(Pnew + MVPsign, sign_ct);
-@@ -402,10 +363,12 @@ void vp8_write_mvprobs(VP8_COMP *cpi)
-     active_section = 4;
- #endif
-     write_component_probs(
--        w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], cpi->MVcount[0], 0, &flags[0]
-+        w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0],
-+        cpi->mb.MVcount[0], 0, &flags[0]
-     );
-     write_component_probs(
--        w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], cpi->MVcount[1], 1, &flags[1]
-+        w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1],
-+        cpi->mb.MVcount[1], 1, &flags[1]
-     );
- 
-     if (flags[0] || flags[1])
-diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
-index 2a2cb2f..d4b17ce 100644
---- a/vp8/encoder/ethreading.c
-+++ b/vp8/encoder/ethreading.c
-@@ -17,12 +17,6 @@
- 
- #if CONFIG_MULTITHREAD
- 
--extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
--                                         TOKENEXTRA **t,
--                                         int recon_yoffset, int recon_uvoffset,
--                                         int mb_row, int mb_col);
--extern int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
--                                         TOKENEXTRA **t);
- extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip);
- 
- extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
-@@ -39,7 +33,7 @@ static THREAD_FUNCTION thread_loopfilter(void *p_data)
- 
-         if (sem_wait(&cpi->h_event_start_lpf) == 0)
-         {
--            if (cpi->b_multi_threaded == 0) // we're shutting down
-+            if (cpi->b_multi_threaded == 0) /* we're shutting down */
-                 break;
- 
-             vp8_loopfilter_frame(cpi, cm);
-@@ -59,17 +53,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
-     MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2);
-     ENTROPY_CONTEXT_PLANES mb_row_left_context;
- 
--    const int nsync = cpi->mt_sync_range;
--    //printf("Started thread %d\n", ithread);
--
-     while (1)
-     {
-         if (cpi->b_multi_threaded == 0)
-             break;
- 
--        //if(WaitForSingleObject(cpi->h_event_mbrencoding[ithread], INFINITE) == WAIT_OBJECT_0)
-         if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0)
-         {
-+            const int nsync = cpi->mt_sync_range;
-             VP8_COMMON *cm = &cpi->common;
-             int mb_row;
-             MACROBLOCK *x = &mbri->mb;
-@@ -83,7 +74,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
-             int *segment_counts = mbri->segment_counts;
-             int *totalrate = &mbri->totalrate;
- 
--            if (cpi->b_multi_threaded == 0) // we're shutting down
-+            if (cpi->b_multi_threaded == 0) /* we're shutting down */
-                 break;
- 
-             for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
-@@ -108,7 +99,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
- 
-                 last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
- 
--                // reset above block coeffs
-+                /* reset above block coeffs */
-                 xd->above_context = cm->above_context;
-                 xd->left_context = &mb_row_left_context;
- 
-@@ -118,10 +109,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
-                 recon_yoffset = (mb_row * recon_y_stride * 16);
-                 recon_uvoffset = (mb_row * recon_uv_stride * 8);
- 
--                // Set the mb activity pointer to the start of the row.
-+                /* Set the mb activity pointer to the start of the row. */
-                 x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
- 
--                // for each macroblock col in image
-+                /* for each macroblock col in image */
-                 for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-                 {
-                     *current_mb_col = mb_col - 1;
-@@ -139,14 +130,18 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
-                     tp = tp_start;
- #endif
- 
--                    // Distance of Mb to the various image edges.
--                    // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-+                    /* Distance of Mb to the various image edges.
-+                     * These specified to 8th pel as they are always compared
-+                     * to values that are in 1/8th pel units
-+                     */
-                     xd->mb_to_left_edge = -((mb_col * 16) << 3);
-                     xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
-                     xd->mb_to_top_edge = -((mb_row * 16) << 3);
-                     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
- 
--                    // Set up limit values for motion vectors used to prevent them extending outside the UMV borders
-+                    /* Set up limit values for motion vectors used to prevent
-+                     * them extending outside the UMV borders
-+                     */
-                     x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
-                     x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
-                     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
-@@ -160,17 +155,19 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
-                     x->rddiv = cpi->RDDIV;
-                     x->rdmult = cpi->RDMULT;
- 
--                    //Copy current mb to a buffer
-+                    /* Copy current mb to a buffer */
-                     vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
- 
-                     if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
-                         vp8_activity_masking(cpi, x);
- 
--                    // Is segmentation enabled
--                    // MB level adjustment to quantizer
-+                    /* Is segmentation enabled */
-+                    /* MB level adjustment to quantizer */
-                     if (xd->segmentation_enabled)
-                     {
--                        // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
-+                        /* Code to set segment id in xd->mbmi.segment_id for
-+                         * current MB (with range checking)
-+                         */
-                         if (cpi->segmentation_map[map_index + mb_col] <= 3)
-                             xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col];
-                         else
-@@ -179,7 +176,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
-                         vp8cx_mb_init_quantizer(cpi, x, 1);
-                     }
-                     else
--                        xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
-+                        /* Set to Segment 0 by default */
-+                        xd->mode_info_context->mbmi.segment_id = 0;
- 
-                     x->active_ptr = cpi->active_map + map_index + mb_col;
- 
-@@ -209,21 +207,28 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
- 
- #endif
- 
--                        // Count of last ref frame 0,0 usage
--                        if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
--                            cpi->inter_zz_count++;
--
--                        // Special case code for cyclic refresh
--                        // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
--                        // during vp8cx_encode_inter_macroblock()) back into the global segmentation map
--                        if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
-+                        /* Special case code for cyclic refresh
-+                         * If cyclic update enabled then copy
-+                         * xd->mbmi.segment_id; (which may have been updated
-+                         * based on mode during
-+                         * vp8cx_encode_inter_macroblock()) back into the
-+                         * global segmentation map
-+                         */
-+                        if ((cpi->current_layer == 0) &&
-+                            (cpi->cyclic_refresh_mode_enabled &&
-+                             xd->segmentation_enabled))
-                         {
-                             const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
-                             cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id;
- 
--                            // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
--                            // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
--                            // else mark it as dirty (1).
-+                            /* If the block has been refreshed mark it as clean
-+                             * (the magnitude of the -ve influences how long it
-+                             * will be before we consider another refresh):
-+                             * Else if it was coded (last frame 0,0) and has
-+                             * not already been refreshed then mark it as a
-+                             * candidate for cleanup next time (marked 0) else
-+                             * mark it as dirty (1).
-+                             */
-                             if (mbmi->segment_id)
-                                 cpi->cyclic_refresh_map[map_index + mb_col] = -1;
-                             else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME))
-@@ -246,13 +251,13 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
- #else
-                     cpi->tplist[mb_row].stop = tp;
- #endif
--                    // Increment pointer into gf usage flags structure.
-+                    /* Increment pointer into gf usage flags structure. */
-                     x->gf_active_ptr++;
- 
--                    // Increment the activity mask pointers.
-+                    /* Increment the activity mask pointers. */
-                     x->mb_activity_ptr++;
- 
--                    // adjust to the next column of macroblocks
-+                    /* adjust to the next column of macroblocks */
-                     x->src.y_buffer += 16;
-                     x->src.u_buffer += 8;
-                     x->src.v_buffer += 8;
-@@ -260,10 +265,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
-                     recon_yoffset += 16;
-                     recon_uvoffset += 8;
- 
--                    // Keep track of segment usage
-+                    /* Keep track of segment usage */
-                     segment_counts[xd->mode_info_context->mbmi.segment_id]++;
- 
--                    // skip to next mb
-+                    /* skip to next mb */
-                     xd->mode_info_context++;
-                     x->partition_info++;
-                     xd->above_context++;
-@@ -276,7 +281,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
- 
-                 *current_mb_col = mb_col + nsync;
- 
--                // this is to account for the border
-+                /* this is to account for the border */
-                 xd->mode_info_context++;
-                 x->partition_info++;
- 
-@@ -296,7 +301,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
-         }
-     }
- 
--    //printf("exit thread %d\n", ithread);
-+    /* printf("exit thread %d\n", ithread); */
-     return 0;
- }
- 
-@@ -336,21 +341,16 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
-     z->src.v_buffer      = x->src.v_buffer;
-     */
- 
-+    z->mvcost[0] =  x->mvcost[0];
-+    z->mvcost[1] =  x->mvcost[1];
-+    z->mvsadcost[0] =  x->mvsadcost[0];
-+    z->mvsadcost[1] =  x->mvsadcost[1];
- 
--    vpx_memcpy(z->mvcosts,          x->mvcosts,         sizeof(x->mvcosts));
--    z->mvcost[0] = &z->mvcosts[0][mv_max+1];
--    z->mvcost[1] = &z->mvcosts[1][mv_max+1];
--    z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1];
--    z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1];
--
--
--    vpx_memcpy(z->token_costs,       x->token_costs,      sizeof(x->token_costs));
--    vpx_memcpy(z->inter_bmode_costs,  x->inter_bmode_costs, sizeof(x->inter_bmode_costs));
--    //memcpy(z->mvcosts,            x->mvcosts,         sizeof(x->mvcosts));
--    //memcpy(z->mvcost,         x->mvcost,          sizeof(x->mvcost));
--    vpx_memcpy(z->mbmode_cost,       x->mbmode_cost,      sizeof(x->mbmode_cost));
--    vpx_memcpy(z->intra_uv_mode_cost,  x->intra_uv_mode_cost, sizeof(x->intra_uv_mode_cost));
--    vpx_memcpy(z->bmode_costs,       x->bmode_costs,      sizeof(x->bmode_costs));
-+    z->token_costs = x->token_costs;
-+    z->inter_bmode_costs = x->inter_bmode_costs;
-+    z->mbmode_cost = x->mbmode_cost;
-+    z->intra_uv_mode_cost = x->intra_uv_mode_cost;
-+    z->bmode_costs = x->bmode_costs;
- 
-     for (i = 0; i < 25; i++)
-     {
-@@ -358,17 +358,15 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
-         z->block[i].quant_fast      = x->block[i].quant_fast;
-         z->block[i].quant_shift     = x->block[i].quant_shift;
-         z->block[i].zbin            = x->block[i].zbin;
--        z->block[i].zrun_zbin_boost   = x->block[i].zrun_zbin_boost;
-+        z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
-         z->block[i].round           = x->block[i].round;
--        z->q_index                  = x->q_index;
--        z->act_zbin_adj             = x->act_zbin_adj;
--        z->last_act_zbin_adj        = x->last_act_zbin_adj;
--        /*
--        z->block[i].src             = x->block[i].src;
--        */
--        z->block[i].src_stride       = x->block[i].src_stride;
-+        z->block[i].src_stride      = x->block[i].src_stride;
-     }
- 
-+    z->q_index           = x->q_index;
-+    z->act_zbin_adj      = x->act_zbin_adj;
-+    z->last_act_zbin_adj = x->last_act_zbin_adj;
-+
-     {
-         MACROBLOCKD *xd = &x->e_mbd;
-         MACROBLOCKD *zd = &z->e_mbd;
-@@ -400,9 +398,11 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
-         zd->subpixel_predict16x16    = xd->subpixel_predict16x16;
-         zd->segmentation_enabled     = xd->segmentation_enabled;
-         zd->mb_segement_abs_delta      = xd->mb_segement_abs_delta;
--        vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
-+        vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data,
-+                   sizeof(xd->segment_feature_data));
- 
--        vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
-+        vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc,
-+                   sizeof(xd->dequant_y1_dc));
-         vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
-         vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
-         vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
-@@ -418,13 +418,23 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
-             zd->block[i].dequant = zd->dequant_uv;
-         zd->block[24].dequant = zd->dequant_y2;
- #endif
-+
-+
-+        vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes));
-+        vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult,
-+                   sizeof(x->rd_thresh_mult));
-+
-+        z->zbin_over_quant = x->zbin_over_quant;
-+        z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;
-+        z->zbin_mode_boost = x->zbin_mode_boost;
-+
-+        vpx_memset(z->error_bins, 0, sizeof(z->error_bins));
-     }
- }
- 
- void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
-                                MACROBLOCK *x,
-                                MB_ROW_COMP *mbr_ei,
--                               int mb_row,
-                                int count
-                               )
- {
-@@ -432,7 +442,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
-     VP8_COMMON *const cm = & cpi->common;
-     MACROBLOCKD *const xd = & x->e_mbd;
-     int i;
--    (void) mb_row;
- 
-     for (i = 0; i < count; i++)
-     {
-@@ -465,10 +474,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
- 
-         vp8_build_block_offsets(mb);
- 
--        vp8_setup_block_dptrs(mbd);
--
--        vp8_setup_block_ptrs(mb);
--
-         mbd->left_context = &cm->left_context;
-         mb->mvc = cm->fc.mvc;
- 
-@@ -477,10 +482,19 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
-         mbd->fullpixel_mask = 0xffffffff;
-         if(cm->full_pixel)
-             mbd->fullpixel_mask = 0xfffffff8;
-+
-+        vp8_zero(mb->coef_counts);
-+        vp8_zero(x->ymode_count);
-+        mb->skip_true_count = 0;
-+        vp8_zero(mb->MVcount);
-+        mb->prediction_error = 0;
-+        mb->intra_error = 0;
-+        vp8_zero(mb->count_mb_ref_frame_usage);
-+        mb->mbs_tested_so_far = 0;
-     }
- }
- 
--void vp8cx_create_encoder_threads(VP8_COMP *cpi)
-+int vp8cx_create_encoder_threads(VP8_COMP *cpi)
- {
-     const VP8_COMMON * cm = &cpi->common;
- 
-@@ -492,6 +506,7 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
-     {
-         int ithread;
-         int th_count = cpi->oxcf.multi_threaded - 1;
-+        int rc = 0;
- 
-         /* don't allocate more threads than cores available */
-         if (cpi->oxcf.multi_threaded > cm->processor_core_count)
-@@ -505,16 +520,17 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
-         }
- 
-         if(th_count == 0)
--            return;
--
--        CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count));
--        CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count));
--        CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
-+            return 0;
-+
-+        CHECK_MEM_ERROR(cpi->h_encoding_thread,
-+                        vpx_malloc(sizeof(pthread_t) * th_count));
-+        CHECK_MEM_ERROR(cpi->h_event_start_encoding,
-+                        vpx_malloc(sizeof(sem_t) * th_count));
-+        CHECK_MEM_ERROR(cpi->mb_row_ei,
-+                        vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
-         vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
-         CHECK_MEM_ERROR(cpi->en_thread_data,
-                         vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
--        CHECK_MEM_ERROR(cpi->mt_current_mb_col,
--                        vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
- 
-         sem_init(&cpi->h_event_end_encoding, 0, 0);
- 
-@@ -528,16 +544,45 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
- 
-         for (ithread = 0; ithread < th_count; ithread++)
-         {
--            ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread];
-+            ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread];
-+
-+            /* Setup block ptrs and offsets */
-+            vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb);
-+            vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd);
- 
-             sem_init(&cpi->h_event_start_encoding[ithread], 0, 0);
-+
-             ethd->ithread = ithread;
-             ethd->ptr1 = (void *)cpi;
-             ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread];
- 
--            pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd);
-+            rc = pthread_create(&cpi->h_encoding_thread[ithread], 0,
-+                                thread_encoding_proc, ethd);
-+            if(rc)
-+                break;
-         }
- 
-+        if(rc)
-+        {
-+            /* shutdown other threads */
-+            cpi->b_multi_threaded = 0;
-+            for(--ithread; ithread >= 0; ithread--)
-+            {
-+                pthread_join(cpi->h_encoding_thread[ithread], 0);
-+                sem_destroy(&cpi->h_event_start_encoding[ithread]);
-+            }
-+            sem_destroy(&cpi->h_event_end_encoding);
-+
-+            /* free thread related resources */
-+            vpx_free(cpi->h_event_start_encoding);
-+            vpx_free(cpi->h_encoding_thread);
-+            vpx_free(cpi->mb_row_ei);
-+            vpx_free(cpi->en_thread_data);
-+
-+            return -1;
-+        }
-+
-+
-         {
-             LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;
- 
-@@ -545,24 +590,47 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
-             sem_init(&cpi->h_event_end_lpf, 0, 0);
- 
-             lpfthd->ptr1 = (void *)cpi;
--            pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd);
-+            rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter,
-+                                lpfthd);
-+
-+            if(rc)
-+            {
-+                /* shutdown other threads */
-+                cpi->b_multi_threaded = 0;
-+                for(--ithread; ithread >= 0; ithread--)
-+                {
-+                    sem_post(&cpi->h_event_start_encoding[ithread]);
-+                    pthread_join(cpi->h_encoding_thread[ithread], 0);
-+                    sem_destroy(&cpi->h_event_start_encoding[ithread]);
-+                }
-+                sem_destroy(&cpi->h_event_end_encoding);
-+                sem_destroy(&cpi->h_event_end_lpf);
-+                sem_destroy(&cpi->h_event_start_lpf);
-+
-+                /* free thread related resources */
-+                vpx_free(cpi->h_event_start_encoding);
-+                vpx_free(cpi->h_encoding_thread);
-+                vpx_free(cpi->mb_row_ei);
-+                vpx_free(cpi->en_thread_data);
-+
-+                return -2;
-+            }
-         }
-     }
--
-+    return 0;
- }
- 
- void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
- {
-     if (cpi->b_multi_threaded)
-     {
--        //shutdown other threads
-+        /* shutdown other threads */
-         cpi->b_multi_threaded = 0;
-         {
-             int i;
- 
-             for (i = 0; i < cpi->encoding_thread_count; i++)
-             {
--                //SetEvent(cpi->h_event_mbrencoding[i]);
-                 sem_post(&cpi->h_event_start_encoding[i]);
-                 pthread_join(cpi->h_encoding_thread[i], 0);
- 
-@@ -577,12 +645,11 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
-         sem_destroy(&cpi->h_event_end_lpf);
-         sem_destroy(&cpi->h_event_start_lpf);
- 
--        //free thread related resources
-+        /* free thread related resources */
-         vpx_free(cpi->h_event_start_encoding);
-         vpx_free(cpi->h_encoding_thread);
-         vpx_free(cpi->mb_row_ei);
-         vpx_free(cpi->en_thread_data);
--        vpx_free(cpi->mt_current_mb_col);
-     }
- }
- #endif
-diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
-index 8de1a6a..30bf8a6 100644
---- a/vp8/encoder/firstpass.c
-+++ b/vp8/encoder/firstpass.c
-@@ -30,14 +30,12 @@
- #include "encodemv.h"
- #include "encodeframe.h"
- 
--//#define OUTPUT_FPF 1
-+/* #define OUTPUT_FPF 1 */
- 
- extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
- extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
- extern void vp8_alloc_compressor_data(VP8_COMP *cpi);
- 
--//#define GFQ_ADJUSTMENT (40 + ((15*Q)/10))
--//#define GFQ_ADJUSTMENT (80 + ((15*Q)/10))
- #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q]
- extern int vp8_kf_boost_qadjustment[QINDEX_RANGE];
- 
-@@ -77,7 +75,9 @@ static const int cq_level[QINDEX_RANGE] =
- 
- static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame);
- 
--// Resets the first pass file to the given position using a relative seek from the current position
-+/* Resets the first pass file to the given position using a relative seek
-+ * from the current position
-+ */
- static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position)
- {
-     cpi->twopass.stats_in = Position;
-@@ -92,14 +92,14 @@ static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
-     return 1;
- }
- 
--// Read frame stats at an offset from the current position
-+/* Read frame stats at an offset from the current position */
- static int read_frame_stats( VP8_COMP *cpi,
-                              FIRSTPASS_STATS *frame_stats,
-                              int offset )
- {
-     FIRSTPASS_STATS * fps_ptr = cpi->twopass.stats_in;
- 
--    // Check legality of offset
-+    /* Check legality of offset */
-     if ( offset >= 0 )
-     {
-         if ( &fps_ptr[offset] >= cpi->twopass.stats_in_end )
-@@ -136,7 +136,7 @@ static void output_stats(const VP8_COMP            *cpi,
-     pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
-     vpx_codec_pkt_list_add(pktlist, &pkt);
- 
--// TEMP debug code
-+/* TEMP debug code */
- #if OUTPUT_FPF
- 
-     {
-@@ -257,7 +257,9 @@ static void avg_stats(FIRSTPASS_STATS *section)
-     section->duration   /= section->count;
- }
- 
--// Calculate a modified Error used in distributing bits between easier and harder frames
-+/* Calculate a modified Error used in distributing bits between easier
-+ * and harder frames
-+ */
- static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- {
-     double av_err = ( cpi->twopass.total_stats.ssim_weighted_pred_err /
-@@ -315,7 +317,9 @@ static double simple_weight(YV12_BUFFER_CONFIG *source)
-     unsigned char *src = source->y_buffer;
-     double sum_weights = 0.0;
- 
--    // Loop throught the Y plane raw examining levels and creating a weight for the image
-+    /* Loop throught the Y plane raw examining levels and creating a weight
-+     * for the image
-+     */
-     i = source->y_height;
-     do
-     {
-@@ -335,41 +339,52 @@ static double simple_weight(YV12_BUFFER_CONFIG *source)
- }
- 
- 
--// This function returns the current per frame maximum bitrate target
-+/* This function returns the current per frame maximum bitrate target */
- static int frame_max_bits(VP8_COMP *cpi)
- {
--    // Max allocation for a single frame based on the max section guidelines passed in and how many bits are left
-+    /* Max allocation for a single frame based on the max section guidelines
-+     * passed in and how many bits are left
-+     */
-     int max_bits;
- 
--    // For CBR we need to also consider buffer fullness.
--    // If we are running below the optimal level then we need to gradually tighten up on max_bits.
-+    /* For CBR we need to also consider buffer fullness.
-+     * If we are running below the optimal level then we need to gradually
-+     * tighten up on max_bits.
-+     */
-     if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-     {
-         double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level);
- 
--        // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user
-+        /* For CBR base this on the target average bits per frame plus the
-+         * maximum sedction rate passed in by the user
-+         */
-         max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
- 
--        // If our buffer is below the optimum level
-+        /* If our buffer is below the optimum level */
-         if (buffer_fullness_ratio < 1.0)
-         {
--            // The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4.
-+            /* The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. */
-             int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2;
- 
-             max_bits = (int)(max_bits * buffer_fullness_ratio);
- 
-+            /* Lowest value we will set ... which should allow the buffer to
-+             * refill.
-+             */
-             if (max_bits < min_max_bits)
--                max_bits = min_max_bits;       // Lowest value we will set ... which should allow the buffer to refil.
-+                max_bits = min_max_bits;
-         }
-     }
--    // VBR
-+    /* VBR */
-     else
-     {
--        // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
-+        /* For VBR base this on the bits and frames left plus the
-+         * two_pass_vbrmax_section rate passed in by the user
-+         */
-         max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
-     }
- 
--    // Trap case where we are out of bits
-+    /* Trap case where we are out of bits */
-     if (max_bits < 0)
-         max_bits = 0;
- 
-@@ -403,13 +418,13 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x,
-     unsigned char *ref_ptr;
-     int ref_stride = x->e_mbd.pre.y_stride;
- 
--    // Set up pointers for this macro block raw buffer
-+    /* Set up pointers for this macro block raw buffer */
-     raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset
-                                 + d->offset);
-     vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride,
-                    (unsigned int *)(raw_motion_err));
- 
--    // Set up pointers for this macro block recon buffer
-+    /* Set up pointers for this macro block recon buffer */
-     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
-     ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset );
-     vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride,
-@@ -430,19 +445,19 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
-     int_mv ref_mv_full;
- 
-     int tmp_err;
--    int step_param = 3;                                       //3;          // Dont search over full range for first pass
--    int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3;
-+    int step_param = 3; /* Dont search over full range for first pass */
-+    int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
-     int n;
-     vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
-     int new_mv_mode_penalty = 256;
- 
--    // override the default variance function to use MSE
-+    /* override the default variance function to use MSE */
-     v_fn_ptr.vf    = vp8_mse16x16;
- 
--    // Set up pointers for this macro block recon buffer
-+    /* Set up pointers for this macro block recon buffer */
-     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
- 
--    // Initial step/diamond search centred on best mv
-+    /* Initial step/diamond search centred on best mv */
-     tmp_mv.as_int = 0;
-     ref_mv_full.as_mv.col = ref_mv->as_mv.col>>3;
-     ref_mv_full.as_mv.row = ref_mv->as_mv.row>>3;
-@@ -459,7 +474,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
-         best_mv->col = tmp_mv.as_mv.col;
-     }
- 
--    // Further step/diamond searches as necessary
-+    /* Further step/diamond searches as necessary */
-     n = num00;
-     num00 = 0;
- 
-@@ -520,7 +535,7 @@ void vp8_first_pass(VP8_COMP *cpi)
- 
-     zero_ref_mv.as_int = 0;
- 
--    vp8_clear_system_state();  //__asm emms;
-+    vp8_clear_system_state();
- 
-     x->src = * cpi->Source;
-     xd->pre = *lst_yv12;
-@@ -530,44 +545,55 @@ void vp8_first_pass(VP8_COMP *cpi)
- 
-     xd->mode_info_context = cm->mi;
- 
--    vp8_build_block_offsets(x);
--
--    vp8_setup_block_dptrs(&x->e_mbd);
-+    if(!cm->use_bilinear_mc_filter)
-+    {
-+         xd->subpixel_predict        = vp8_sixtap_predict4x4;
-+         xd->subpixel_predict8x4     = vp8_sixtap_predict8x4;
-+         xd->subpixel_predict8x8     = vp8_sixtap_predict8x8;
-+         xd->subpixel_predict16x16   = vp8_sixtap_predict16x16;
-+     }
-+     else
-+     {
-+         xd->subpixel_predict        = vp8_bilinear_predict4x4;
-+         xd->subpixel_predict8x4     = vp8_bilinear_predict8x4;
-+         xd->subpixel_predict8x8     = vp8_bilinear_predict8x8;
-+         xd->subpixel_predict16x16   = vp8_bilinear_predict16x16;
-+     }
- 
--    vp8_setup_block_ptrs(x);
-+    vp8_build_block_offsets(x);
- 
--    // set up frame new frame for intra coded blocks
-+    /* set up frame new frame for intra coded blocks */
-     vp8_setup_intra_recon(new_yv12);
-     vp8cx_frame_init_quantizer(cpi);
- 
--    // Initialise the MV cost table to the defaults
--    //if( cm->current_video_frame == 0)
--    //if ( 0 )
-+    /* Initialise the MV cost table to the defaults */
-     {
-         int flag[2] = {1, 1};
--        vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
-+        vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
-         vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
-         vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
-     }
- 
--    // for each macroblock row in image
-+    /* for each macroblock row in image */
-     for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
-     {
-         int_mv best_ref_mv;
- 
-         best_ref_mv.as_int = 0;
- 
--        // reset above block coeffs
-+        /* reset above block coeffs */
-         xd->up_available = (mb_row != 0);
-         recon_yoffset = (mb_row * recon_y_stride * 16);
-         recon_uvoffset = (mb_row * recon_uv_stride * 8);
- 
--        // Set up limit values for motion vectors to prevent them extending outside the UMV borders
-+        /* Set up limit values for motion vectors to prevent them extending
-+         * outside the UMV borders
-+         */
-         x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
-         x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
- 
- 
--        // for each macroblock col in image
-+        /* for each macroblock col in image */
-         for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-         {
-             int this_error;
-@@ -579,26 +605,33 @@ void vp8_first_pass(VP8_COMP *cpi)
-             xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset;
-             xd->left_available = (mb_col != 0);
- 
--            //Copy current mb to a buffer
-+            /* Copy current mb to a buffer */
-             vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
- 
--            // do intra 16x16 prediction
-+            /* do intra 16x16 prediction */
-             this_error = vp8_encode_intra(cpi, x, use_dc_pred);
- 
--            // "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame)
--            // We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv.
--            // When the error score is very low this causes us to pick all or lots of INTRA modes and throw lots of key frames.
--            // This penalty adds a cost matching that of a 0,0 mv to the intra case.
-+            /* "intrapenalty" below deals with situations where the intra
-+             * and inter error scores are very low (eg a plain black frame)
-+             * We do not have special cases in first pass for 0,0 and
-+             * nearest etc so all inter modes carry an overhead cost
-+             * estimate fot the mv. When the error score is very low this
-+             * causes us to pick all or lots of INTRA modes and throw lots
-+             * of key frames. This penalty adds a cost matching that of a
-+             * 0,0 mv to the intra case.
-+             */
-             this_error += intrapenalty;
- 
--            // Cumulative intra error total
-+            /* Cumulative intra error total */
-             intra_error += (int64_t)this_error;
- 
--            // Set up limit values for motion vectors to prevent them extending outside the UMV borders
-+            /* Set up limit values for motion vectors to prevent them
-+             * extending outside the UMV borders
-+             */
-             x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
-             x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
- 
--            // Other than for the first frame do a motion search
-+            /* Other than for the first frame do a motion search */
-             if (cm->current_video_frame > 0)
-             {
-                 BLOCKD *d = &x->e_mbd.block[0];
-@@ -607,7 +640,7 @@ void vp8_first_pass(VP8_COMP *cpi)
-                 int motion_error = INT_MAX;
-                 int raw_motion_error = INT_MAX;
- 
--                // Simple 0,0 motion with no mv overhead
-+                /* Simple 0,0 motion with no mv overhead */
-                 zz_motion_search( cpi, x, cpi->last_frame_unscaled_source,
-                                   &raw_motion_error, lst_yv12, &motion_error,
-                                   recon_yoffset );
-@@ -617,13 +650,16 @@ void vp8_first_pass(VP8_COMP *cpi)
-                 if (raw_motion_error < cpi->oxcf.encode_breakout)
-                     goto skip_motion_search;
- 
--                // Test last reference frame using the previous best mv as the
--                // starting point (best reference) for the search
-+                /* Test last reference frame using the previous best mv as the
-+                 * starting point (best reference) for the search
-+                 */
-                 first_pass_motion_search(cpi, x, &best_ref_mv,
-                                         &d->bmi.mv.as_mv, lst_yv12,
-                                         &motion_error, recon_yoffset);
- 
--                // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well
-+                /* If the current best reference mv is not centred on 0,0
-+                 * then do a 0,0 based search as well
-+                 */
-                 if (best_ref_mv.as_int)
-                 {
-                    tmp_err = INT_MAX;
-@@ -638,7 +674,9 @@ void vp8_first_pass(VP8_COMP *cpi)
-                    }
-                 }
- 
--                // Experimental search in a second reference frame ((0,0) based only)
-+                /* Experimental search in a second reference frame ((0,0)
-+                 * based only)
-+                 */
-                 if (cm->current_video_frame > 1)
-                 {
-                     first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset);
-@@ -646,19 +684,9 @@ void vp8_first_pass(VP8_COMP *cpi)
-                     if ((gf_motion_error < motion_error) && (gf_motion_error < this_error))
-                     {
-                         second_ref_count++;
--                        //motion_error = gf_motion_error;
--                        //d->bmi.mv.as_mv.row = tmp_mv.row;
--                        //d->bmi.mv.as_mv.col = tmp_mv.col;
-                     }
--                    /*else
--                    {
--                        xd->pre.y_buffer = cm->last_frame.y_buffer + recon_yoffset;
--                        xd->pre.u_buffer = cm->last_frame.u_buffer + recon_uvoffset;
--                        xd->pre.v_buffer = cm->last_frame.v_buffer + recon_uvoffset;
--                    }*/
--
- 
--                    // Reset to last frame as reference buffer
-+                    /* Reset to last frame as reference buffer */
-                     xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
-                     xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
-                     xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
-@@ -670,10 +698,11 @@ skip_motion_search:
- 
-                 if (motion_error <= this_error)
-                 {
--                    // Keep a count of cases where the inter and intra were
--                    // very close and very low. This helps with scene cut
--                    // detection for example in cropped clips with black bars
--                    // at the sides or top and bottom.
-+                    /* Keep a count of cases where the inter and intra were
-+                     * very close and very low. This helps with scene cut
-+                     * detection for example in cropped clips with black bars
-+                     * at the sides or top and bottom.
-+                     */
-                     if( (((this_error-intrapenalty) * 9) <=
-                          (motion_error*10)) &&
-                         (this_error < (2*intrapenalty)) )
-@@ -696,17 +725,17 @@ skip_motion_search:
- 
-                     best_ref_mv.as_int = d->bmi.mv.as_int;
- 
--                    // Was the vector non-zero
-+                    /* Was the vector non-zero */
-                     if (d->bmi.mv.as_int)
-                     {
-                         mvcount++;
- 
--                        // Was it different from the last non zero vector
-+                        /* Was it different from the last non zero vector */
-                         if ( d->bmi.mv.as_int != lastmv_as_int )
-                             new_mv_count++;
-                         lastmv_as_int = d->bmi.mv.as_int;
- 
--                        // Does the Row vector point inwards or outwards
-+                        /* Does the Row vector point inwards or outwards */
-                         if (mb_row < cm->mb_rows / 2)
-                         {
-                             if (d->bmi.mv.as_mv.row > 0)
-@@ -722,7 +751,7 @@ skip_motion_search:
-                                 sum_in_vectors--;
-                         }
- 
--                        // Does the Row vector point inwards or outwards
-+                        /* Does the Row vector point inwards or outwards */
-                         if (mb_col < cm->mb_cols / 2)
-                         {
-                             if (d->bmi.mv.as_mv.col > 0)
-@@ -743,7 +772,7 @@ skip_motion_search:
- 
-             coded_error += (int64_t)this_error;
- 
--            // adjust to the next column of macroblocks
-+            /* adjust to the next column of macroblocks */
-             x->src.y_buffer += 16;
-             x->src.u_buffer += 8;
-             x->src.v_buffer += 8;
-@@ -752,25 +781,25 @@ skip_motion_search:
-             recon_uvoffset += 8;
-         }
- 
--        // adjust to the next row of mbs
-+        /* adjust to the next row of mbs */
-         x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
-         x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
-         x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
- 
--        //extend the recon for intra prediction
-+        /* extend the recon for intra prediction */
-         vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
--        vp8_clear_system_state();  //__asm emms;
-+        vp8_clear_system_state();
-     }
- 
--    vp8_clear_system_state();  //__asm emms;
-+    vp8_clear_system_state();
-     {
-         double weight = 0.0;
- 
-         FIRSTPASS_STATS fps;
- 
-         fps.frame      = cm->current_video_frame ;
--        fps.intra_error = intra_error >> 8;
--        fps.coded_error = coded_error >> 8;
-+        fps.intra_error = (double)(intra_error >> 8);
-+        fps.coded_error = (double)(coded_error >> 8);
-         weight = simple_weight(cpi->Source);
- 
- 
-@@ -809,12 +838,13 @@ skip_motion_search:
-             fps.pcnt_motion = 1.0 * (double)mvcount / cpi->common.MBs;
-         }
- 
--        // TODO:  handle the case when duration is set to 0, or something less
--        // than the full time between subsequent cpi->source_time_stamp s  .
--        fps.duration = cpi->source->ts_end
--                       - cpi->source->ts_start;
-+        /* TODO:  handle the case when duration is set to 0, or something less
-+         * than the full time between subsequent cpi->source_time_stamps
-+         */
-+        fps.duration = (double)(cpi->source->ts_end
-+                       - cpi->source->ts_start);
- 
--        // don't want to do output stats with a stack variable!
-+        /* don't want to do output stats with a stack variable! */
-         memcpy(&cpi->twopass.this_frame_stats,
-                &fps,
-                sizeof(FIRSTPASS_STATS));
-@@ -822,7 +852,9 @@ skip_motion_search:
-         accumulate_stats(&cpi->twopass.total_stats, &fps);
-     }
- 
--    // Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met
-+    /* Copy the previous Last Frame into the GF buffer if specific
-+     * conditions for doing so are met
-+     */
-     if ((cm->current_video_frame > 0) &&
-         (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) &&
-         ((cpi->twopass.this_frame_stats.intra_error / cpi->twopass.this_frame_stats.coded_error) > 2.0))
-@@ -830,18 +862,22 @@ skip_motion_search:
-         vp8_yv12_copy_frame(lst_yv12, gld_yv12);
-     }
- 
--    // swap frame pointers so last frame refers to the frame we just compressed
-+    /* swap frame pointers so last frame refers to the frame we just
-+     * compressed
-+     */
-     vp8_swap_yv12_buffer(lst_yv12, new_yv12);
-     vp8_yv12_extend_frame_borders(lst_yv12);
- 
--    // Special case for the first frame. Copy into the GF buffer as a second reference.
-+    /* Special case for the first frame. Copy into the GF buffer as a
-+     * second reference.
-+     */
-     if (cm->current_video_frame == 0)
-     {
-         vp8_yv12_copy_frame(lst_yv12, gld_yv12);
-     }
- 
- 
--    // use this to see what the first pass reconstruction looks like
-+    /* use this to see what the first pass reconstruction looks like */
-     if (0)
-     {
-         char filename[512];
-@@ -853,7 +889,8 @@ skip_motion_search:
-         else
-             recon_file = fopen(filename, "ab");
- 
--        if(fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file));
-+        (void) fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1,
-+                      recon_file);
-         fclose(recon_file);
-     }
- 
-@@ -862,11 +899,10 @@ skip_motion_search:
- }
- extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
- 
--// Estimate a cost per mb attributable to overheads such as the coding of
--// modes and motion vectors.
--// Currently simplistic in its assumptions for testing.
--//
--
-+/* Estimate a cost per mb attributable to overheads such as the coding of
-+ * modes and motion vectors.
-+ * Currently simplistic in its assumptions for testing.
-+ */
- 
- static double bitcost( double prob )
- {
-@@ -890,12 +926,14 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi,
-     motion_cost = bitcost(av_pct_motion);
-     intra_cost = bitcost(av_intra);
- 
--    // Estimate of extra bits per mv overhead for mbs
--    // << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb
-+    /* Estimate of extra bits per mv overhead for mbs
-+     * << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb
-+     */
-     mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9;
- 
--    // Crude estimate of overhead cost from modes
--    // << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb
-+    /* Crude estimate of overhead cost from modes
-+     * << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb
-+     */
-     mode_cost =
-         (int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) +
-                  (av_pct_motion * motion_cost) +
-@@ -914,17 +952,17 @@ static double calc_correction_factor( double err_per_mb,
-     double error_term = err_per_mb / err_devisor;
-     double correction_factor;
- 
--    // Adjustment based on Q to power term.
-+    /* Adjustment based on Q to power term. */
-     power_term = pt_low + (Q * 0.01);
-     power_term = (power_term > pt_high) ? pt_high : power_term;
- 
--    // Adjustments to error term
--    // TBD
-+    /* Adjustments to error term */
-+    /* TBD */
- 
--    // Calculate correction factor
-+    /* Calculate correction factor */
-     correction_factor = pow(error_term, power_term);
- 
--    // Clip range
-+    /* Clip range */
-     correction_factor =
-         (correction_factor < 0.05)
-             ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor;
-@@ -948,15 +986,16 @@ static int estimate_max_q(VP8_COMP *cpi,
-     int overhead_bits_per_mb;
- 
-     if (section_target_bandwitdh <= 0)
--        return cpi->twopass.maxq_max_limit;          // Highest value allowed
-+        return cpi->twopass.maxq_max_limit;       /* Highest value allowed */
- 
-     target_norm_bits_per_mb =
-         (section_target_bandwitdh < (1 << 20))
-             ? (512 * section_target_bandwitdh) / num_mbs
-             : 512 * (section_target_bandwitdh / num_mbs);
- 
--    // Calculate a corrective factor based on a rolling ratio of bits spent
--    // vs target bits
-+    /* Calculate a corrective factor based on a rolling ratio of bits spent
-+     * vs target bits
-+     */
-     if ((cpi->rolling_target_bits > 0) &&
-         (cpi->active_worst_quality < cpi->worst_quality))
-     {
-@@ -977,8 +1016,9 @@ static int estimate_max_q(VP8_COMP *cpi,
-                     ? 10.0 : cpi->twopass.est_max_qcorrection_factor;
-     }
- 
--    // Corrections for higher compression speed settings
--    // (reduced compression expected)
-+    /* Corrections for higher compression speed settings
-+     * (reduced compression expected)
-+     */
-     if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1))
-     {
-         if (cpi->oxcf.cpu_used <= 5)
-@@ -987,18 +1027,20 @@ static int estimate_max_q(VP8_COMP *cpi,
-             speed_correction = 1.25;
-     }
- 
--    // Estimate of overhead bits per mb
--    // Correction to overhead bits for min allowed Q.
-+    /* Estimate of overhead bits per mb */
-+    /* Correction to overhead bits for min allowed Q. */
-     overhead_bits_per_mb = overhead_bits / num_mbs;
--    overhead_bits_per_mb *= pow( 0.98, (double)cpi->twopass.maxq_min_limit );
-+    overhead_bits_per_mb = (int)(overhead_bits_per_mb *
-+                            pow( 0.98, (double)cpi->twopass.maxq_min_limit ));
- 
--    // Try and pick a max Q that will be high enough to encode the
--    // content at the given rate.
-+    /* Try and pick a max Q that will be high enough to encode the
-+     * content at the given rate.
-+     */
-     for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++)
-     {
-         int bits_per_mb_at_this_q;
- 
--        // Error per MB based correction factor
-+        /* Error per MB based correction factor */
-         err_correction_factor =
-             calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q);
- 
-@@ -1010,27 +1052,29 @@ static int estimate_max_q(VP8_COMP *cpi,
-             * cpi->twopass.section_max_qfactor
-             * (double)bits_per_mb_at_this_q);
- 
--        // Mode and motion overhead
--        // As Q rises in real encode loop rd code will force overhead down
--        // We make a crude adjustment for this here as *.98 per Q step.
-+        /* Mode and motion overhead */
-+        /* As Q rises in real encode loop rd code will force overhead down
-+         * We make a crude adjustment for this here as *.98 per Q step.
-+         */
-         overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
- 
-         if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
-             break;
-     }
- 
--    // Restriction on active max q for constrained quality mode.
-+    /* Restriction on active max q for constrained quality mode. */
-     if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
-          (Q < cpi->cq_target_quality) )
-     {
-         Q = cpi->cq_target_quality;
-     }
- 
--    // Adjust maxq_min_limit and maxq_max_limit limits based on
--    // averaga q observed in clip for non kf/gf.arf frames
--    // Give average a chance to settle though.
-+    /* Adjust maxq_min_limit and maxq_max_limit limits based on
-+     * average q observed in clip for non kf/gf.arf frames
-+     * Give average a chance to settle though.
-+     */
-     if ( (cpi->ni_frames >
--                  ((unsigned int)cpi->twopass.total_stats.count >> 8)) &&
-+                  ((int)cpi->twopass.total_stats.count >> 8)) &&
-          (cpi->ni_frames > 150) )
-     {
-         cpi->twopass.maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality)
-@@ -1042,8 +1086,9 @@ static int estimate_max_q(VP8_COMP *cpi,
-     return Q;
- }
- 
--// For cq mode estimate a cq level that matches the observed
--// complexity and data rate.
-+/* For cq mode estimate a cq level that matches the observed
-+ * complexity and data rate.
-+ */
- static int estimate_cq( VP8_COMP *cpi,
-                         FIRSTPASS_STATS * fpstats,
-                         int section_target_bandwitdh,
-@@ -1072,11 +1117,12 @@ static int estimate_cq( VP8_COMP *cpi,
-                               ? (512 * section_target_bandwitdh) / num_mbs
-                               : 512 * (section_target_bandwitdh / num_mbs);
- 
--    // Estimate of overhead bits per mb
-+    /* Estimate of overhead bits per mb */
-     overhead_bits_per_mb = overhead_bits / num_mbs;
- 
--    // Corrections for higher compression speed settings
--    // (reduced compression expected)
-+    /* Corrections for higher compression speed settings
-+     * (reduced compression expected)
-+     */
-     if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1))
-     {
-         if (cpi->oxcf.cpu_used <= 5)
-@@ -1085,19 +1131,19 @@ static int estimate_cq( VP8_COMP *cpi,
-             speed_correction = 1.25;
-     }
- 
--    // II ratio correction factor for clip as a whole
-+    /* II ratio correction factor for clip as a whole */
-     clip_iiratio = cpi->twopass.total_stats.intra_error /
-                    DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error);
-     clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025);
-     if (clip_iifactor < 0.80)
-         clip_iifactor = 0.80;
- 
--    // Try and pick a Q that can encode the content at the given rate.
-+    /* Try and pick a Q that can encode the content at the given rate. */
-     for (Q = 0; Q < MAXQ; Q++)
-     {
-         int bits_per_mb_at_this_q;
- 
--        // Error per MB based correction factor
-+        /* Error per MB based correction factor */
-         err_correction_factor =
-             calc_correction_factor(err_per_mb, 100.0, 0.40, 0.90, Q);
- 
-@@ -1110,16 +1156,17 @@ static int estimate_cq( VP8_COMP *cpi,
-                         clip_iifactor *
-                         (double)bits_per_mb_at_this_q);
- 
--        // Mode and motion overhead
--        // As Q rises in real encode loop rd code will force overhead down
--        // We make a crude adjustment for this here as *.98 per Q step.
-+        /* Mode and motion overhead */
-+        /* As Q rises in real encode loop rd code will force overhead down
-+         * We make a crude adjustment for this here as *.98 per Q step.
-+         */
-         overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
- 
-         if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
-             break;
-     }
- 
--    // Clip value to range "best allowed to (worst allowed - 1)"
-+    /* Clip value to range "best allowed to (worst allowed - 1)" */
-     Q = cq_level[Q];
-     if ( Q >= cpi->worst_quality )
-         Q = cpi->worst_quality - 1;
-@@ -1141,7 +1188,9 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band
- 
-     target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs);
- 
--    // Corrections for higher compression speed settings (reduced compression expected)
-+    /* Corrections for higher compression speed settings
-+     * (reduced compression expected)
-+     */
-     if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1))
-     {
-         if (cpi->oxcf.cpu_used <= 5)
-@@ -1150,12 +1199,12 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band
-             speed_correction = 1.25;
-     }
- 
--    // Try and pick a Q that can encode the content at the given rate.
-+    /* Try and pick a Q that can encode the content at the given rate. */
-     for (Q = 0; Q < MAXQ; Q++)
-     {
-         int bits_per_mb_at_this_q;
- 
--        // Error per MB based correction factor
-+        /* Error per MB based correction factor */
-         err_correction_factor =
-             calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q);
- 
-@@ -1172,7 +1221,7 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band
-     return Q;
- }
- 
--// Estimate a worst case Q for a KF group
-+/* Estimate a worst case Q for a KF group */
- static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, double group_iiratio)
- {
-     int Q;
-@@ -1192,12 +1241,14 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
- 
-     double combined_correction_factor;
- 
--    // Trap special case where the target is <= 0
-+    /* Trap special case where the target is <= 0 */
-     if (target_norm_bits_per_mb <= 0)
-         return MAXQ * 2;
- 
--    // Calculate a corrective factor based on a rolling ratio of bits spent vs target bits
--    // This is clamped to the range 0.1 to 10.0
-+    /* Calculate a corrective factor based on a rolling ratio of bits spent
-+     *  vs target bits
-+     * This is clamped to the range 0.1 to 10.0
-+     */
-     if (cpi->long_rolling_target_bits <= 0)
-         current_spend_ratio = 10.0;
-     else
-@@ -1206,14 +1257,19 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
-         current_spend_ratio = (current_spend_ratio > 10.0) ? 10.0 : (current_spend_ratio < 0.1) ? 0.1 : current_spend_ratio;
-     }
- 
--    // Calculate a correction factor based on the quality of prediction in the sequence as indicated by intra_inter error score ratio (IIRatio)
--    // The idea here is to favour subsampling in the hardest sections vs the easyest.
-+    /* Calculate a correction factor based on the quality of prediction in
-+     * the sequence as indicated by intra_inter error score ratio (IIRatio)
-+     * The idea here is to favour subsampling in the hardest sections vs
-+     * the easyest.
-+     */
-     iiratio_correction_factor = 1.0 - ((group_iiratio - 6.0) * 0.1);
- 
-     if (iiratio_correction_factor < 0.5)
-         iiratio_correction_factor = 0.5;
- 
--    // Corrections for higher compression speed settings (reduced compression expected)
-+    /* Corrections for higher compression speed settings
-+     * (reduced compression expected)
-+     */
-     if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1))
-     {
-         if (cpi->oxcf.cpu_used <= 5)
-@@ -1222,13 +1278,15 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
-             speed_correction = 1.25;
-     }
- 
--    // Combine the various factors calculated above
-+    /* Combine the various factors calculated above */
-     combined_correction_factor = speed_correction * iiratio_correction_factor * current_spend_ratio;
- 
--    // Try and pick a Q that should be high enough to encode the content at the given rate.
-+    /* Try and pick a Q that should be high enough to encode the content at
-+     * the given rate.
-+     */
-     for (Q = 0; Q < MAXQ; Q++)
-     {
--        // Error per MB based correction factor
-+        /* Error per MB based correction factor */
-         err_correction_factor =
-             calc_correction_factor(err_per_mb, 150.0, pow_lowq, pow_highq, Q);
- 
-@@ -1241,7 +1299,9 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
-             break;
-     }
- 
--    // If we could not hit the target even at Max Q then estimate what Q would have bee required
-+    /* If we could not hit the target even at Max Q then estimate what Q
-+     * would have been required
-+     */
-     while ((bits_per_mb_at_this_q > target_norm_bits_per_mb)  && (Q < (MAXQ * 2)))
-     {
- 
-@@ -1280,30 +1340,34 @@ void vp8_init_second_pass(VP8_COMP *cpi)
-     cpi->twopass.total_stats = *cpi->twopass.stats_in_end;
-     cpi->twopass.total_left_stats = cpi->twopass.total_stats;
- 
--    // each frame can have a different duration, as the frame rate in the source
--    // isn't guaranteed to be constant.   The frame rate prior to the first frame
--    // encoded in the second pass is a guess.  However the sum duration is not.
--    // Its calculated based on the actual durations of all frames from the first
--    // pass.
-+    /* each frame can have a different duration, as the frame rate in the
-+     * source isn't guaranteed to be constant.   The frame rate prior to
-+     * the first frame encoded in the second pass is a guess.  However the
-+     * sum duration is not. Its calculated based on the actual durations of
-+     * all frames from the first pass.
-+     */
-     vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
- 
-     cpi->output_frame_rate = cpi->frame_rate;
-     cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
-     cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0);
- 
--    // Calculate a minimum intra value to be used in determining the IIratio
--    // scores used in the second pass. We have this minimum to make sure
--    // that clips that are static but "low complexity" in the intra domain
--    // are still boosted appropriately for KF/GF/ARF
-+    /* Calculate a minimum intra value to be used in determining the IIratio
-+     * scores used in the second pass. We have this minimum to make sure
-+     * that clips that are static but "low complexity" in the intra domain
-+     * are still boosted appropriately for KF/GF/ARF
-+     */
-     cpi->twopass.kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
-     cpi->twopass.gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
- 
--    // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence
-+    /* Scan the first pass file and calculate an average Intra / Inter error
-+     * score ratio for the sequence
-+     */
-     {
-         double sum_iiratio = 0.0;
-         double IIRatio;
- 
--        start_pos = cpi->twopass.stats_in;               // Note starting "file" position
-+        start_pos = cpi->twopass.stats_in; /* Note starting "file" position */
- 
-         while (input_stats(cpi, &this_frame) != EOF)
-         {
-@@ -1314,14 +1378,15 @@ void vp8_init_second_pass(VP8_COMP *cpi)
- 
-         cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count);
- 
--        // Reset file position
-+        /* Reset file position */
-         reset_fpf_position(cpi, start_pos);
-     }
- 
--    // Scan the first pass file and calculate a modified total error based upon the bias/power function
--    // used to allocate bits
-+    /* Scan the first pass file and calculate a modified total error based
-+     * upon the bias/power function used to allocate bits
-+     */
-     {
--        start_pos = cpi->twopass.stats_in;               // Note starting "file" position
-+        start_pos = cpi->twopass.stats_in;  /* Note starting "file" position */
- 
-         cpi->twopass.modified_error_total = 0.0;
-         cpi->twopass.modified_error_used = 0.0;
-@@ -1332,7 +1397,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
-         }
-         cpi->twopass.modified_error_left = cpi->twopass.modified_error_total;
- 
--        reset_fpf_position(cpi, start_pos);            // Reset file position
-+        reset_fpf_position(cpi, start_pos);  /* Reset file position */
- 
-     }
- }
-@@ -1341,23 +1406,24 @@ void vp8_end_second_pass(VP8_COMP *cpi)
- {
- }
- 
--// This function gives and estimate of how badly we believe
--// the prediction quality is decaying from frame to frame.
-+/* This function gives and estimate of how badly we believe the prediction
-+ * quality is decaying from frame to frame.
-+ */
- static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
- {
-     double prediction_decay_rate;
-     double motion_decay;
-     double motion_pct = next_frame->pcnt_motion;
- 
--    // Initial basis is the % mbs inter coded
-+    /* Initial basis is the % mbs inter coded */
-     prediction_decay_rate = next_frame->pcnt_inter;
- 
--    // High % motion -> somewhat higher decay rate
-+    /* High % motion -> somewhat higher decay rate */
-     motion_decay = (1.0 - (motion_pct / 20.0));
-     if (motion_decay < prediction_decay_rate)
-         prediction_decay_rate = motion_decay;
- 
--    // Adjustment to decay rate based on speed of motion
-+    /* Adjustment to decay rate based on speed of motion */
-     {
-         double this_mv_rabs;
-         double this_mv_cabs;
-@@ -1377,9 +1443,10 @@ static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_fra
-     return prediction_decay_rate;
- }
- 
--// Function to test for a condition where a complex transition is followed
--// by a static section. For example in slide shows where there is a fade
--// between slides. This is to help with more optimal kf and gf positioning.
-+/* Function to test for a condition where a complex transition is followed
-+ * by a static section. For example in slide shows where there is a fade
-+ * between slides. This is to help with more optimal kf and gf positioning.
-+ */
- static int detect_transition_to_still(
-     VP8_COMP *cpi,
-     int frame_interval,
-@@ -1389,9 +1456,10 @@ static int detect_transition_to_still(
- {
-     int trans_to_still = 0;
- 
--    // Break clause to detect very still sections after motion
--    // For example a static image after a fade or other transition
--    // instead of a clean scene cut.
-+    /* Break clause to detect very still sections after motion
-+     * For example a static image after a fade or other transition
-+     * instead of a clean scene cut.
-+     */
-     if ( (frame_interval > MIN_GF_INTERVAL) &&
-          (loop_decay_rate >= 0.999) &&
-          (decay_accumulator < 0.9) )
-@@ -1401,8 +1469,7 @@ static int detect_transition_to_still(
-         FIRSTPASS_STATS tmp_next_frame;
-         double decay_rate;
- 
--        // Look ahead a few frames to see if static condition
--        // persists...
-+        /* Look ahead a few frames to see if static condition persists... */
-         for ( j = 0; j < still_interval; j++ )
-         {
-             if (EOF == input_stats(cpi, &tmp_next_frame))
-@@ -1412,10 +1479,10 @@ static int detect_transition_to_still(
-             if ( decay_rate < 0.999 )
-                 break;
-         }
--        // Reset file position
-+        /* Reset file position */
-         reset_fpf_position(cpi, position);
- 
--        // Only if it does do we signal a transition to still
-+        /* Only if it does do we signal a transition to still */
-         if ( j == still_interval )
-             trans_to_still = 1;
-     }
-@@ -1423,24 +1490,26 @@ static int detect_transition_to_still(
-     return trans_to_still;
- }
- 
--// This function detects a flash through the high relative pcnt_second_ref
--// score in the frame following a flash frame. The offset passed in should
--// reflect this
-+/* This function detects a flash through the high relative pcnt_second_ref
-+ * score in the frame following a flash frame. The offset passed in should
-+ * reflect this
-+ */
- static int detect_flash( VP8_COMP *cpi, int offset )
- {
-     FIRSTPASS_STATS next_frame;
- 
-     int flash_detected = 0;
- 
--    // Read the frame data.
--    // The return is 0 (no flash detected) if not a valid frame
-+    /* Read the frame data. */
-+    /* The return is 0 (no flash detected) if not a valid frame */
-     if ( read_frame_stats(cpi, &next_frame, offset) != EOF )
-     {
--        // What we are looking for here is a situation where there is a
--        // brief break in prediction (such as a flash) but subsequent frames
--        // are reasonably well predicted by an earlier (pre flash) frame.
--        // The recovery after a flash is indicated by a high pcnt_second_ref
--        // comapred to pcnt_inter.
-+        /* What we are looking for here is a situation where there is a
-+         * brief break in prediction (such as a flash) but subsequent frames
-+         * are reasonably well predicted by an earlier (pre flash) frame.
-+         * The recovery after a flash is indicated by a high pcnt_second_ref
-+         * comapred to pcnt_inter.
-+         */
-         if ( (next_frame.pcnt_second_ref > next_frame.pcnt_inter) &&
-              (next_frame.pcnt_second_ref >= 0.5 ) )
-         {
-@@ -1461,7 +1530,7 @@ static int detect_flash( VP8_COMP *cpi, int offset )
-     return flash_detected;
- }
- 
--// Update the motion related elements to the GF arf boost calculation
-+/* Update the motion related elements to the GF arf boost calculation */
- static void accumulate_frame_motion_stats(
-     VP8_COMP *cpi,
-     FIRSTPASS_STATS * this_frame,
-@@ -1470,22 +1539,22 @@ static void accumulate_frame_motion_stats(
-     double * abs_mv_in_out_accumulator,
-     double * mv_ratio_accumulator )
- {
--    //double this_frame_mv_in_out;
-     double this_frame_mvr_ratio;
-     double this_frame_mvc_ratio;
-     double motion_pct;
- 
--    // Accumulate motion stats.
-+    /* Accumulate motion stats. */
-     motion_pct = this_frame->pcnt_motion;
- 
--    // Accumulate Motion In/Out of frame stats
-+    /* Accumulate Motion In/Out of frame stats */
-     *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct;
-     *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct;
-     *abs_mv_in_out_accumulator +=
-         fabs(this_frame->mv_in_out_count * motion_pct);
- 
--    // Accumulate a measure of how uniform (or conversely how random)
--    // the motion field is. (A ratio of absmv / mv)
-+    /* Accumulate a measure of how uniform (or conversely how random)
-+     * the motion field is. (A ratio of absmv / mv)
-+     */
-     if (motion_pct > 0.05)
-     {
-         this_frame_mvr_ratio = fabs(this_frame->mvr_abs) /
-@@ -1507,7 +1576,7 @@ static void accumulate_frame_motion_stats(
-     }
- }
- 
--// Calculate a baseline boost number for the current frame.
-+/* Calculate a baseline boost number for the current frame. */
- static double calc_frame_boost(
-     VP8_COMP *cpi,
-     FIRSTPASS_STATS * this_frame,
-@@ -1515,7 +1584,7 @@ static double calc_frame_boost(
- {
-     double frame_boost;
- 
--    // Underlying boost factor is based on inter intra error ratio
-+    /* Underlying boost factor is based on inter intra error ratio */
-     if (this_frame->intra_error > cpi->twopass.gf_intra_err_min)
-         frame_boost = (IIFACTOR * this_frame->intra_error /
-                       DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
-@@ -1523,17 +1592,18 @@ static double calc_frame_boost(
-         frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min /
-                       DOUBLE_DIVIDE_CHECK(this_frame->coded_error));
- 
--    // Increase boost for frames where new data coming into frame
--    // (eg zoom out). Slightly reduce boost if there is a net balance
--    // of motion out of the frame (zoom in).
--    // The range for this_frame_mv_in_out is -1.0 to +1.0
-+    /* Increase boost for frames where new data coming into frame
-+     * (eg zoom out). Slightly reduce boost if there is a net balance
-+     * of motion out of the frame (zoom in).
-+     * The range for this_frame_mv_in_out is -1.0 to +1.0
-+     */
-     if (this_frame_mv_in_out > 0.0)
-         frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
--    // In extreme case boost is halved
-+    /* In extreme case boost is halved */
-     else
-         frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
- 
--    // Clip to maximum
-+    /* Clip to maximum */
-     if (frame_boost > GF_RMAX)
-         frame_boost = GF_RMAX;
- 
-@@ -1561,26 +1631,27 @@ static int calc_arf_boost(
-     double r;
-     int flash_detected = 0;
- 
--    // Search forward from the proposed arf/next gf position
-+    /* Search forward from the proposed arf/next gf position */
-     for ( i = 0; i < f_frames; i++ )
-     {
-         if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF )
-             break;
- 
--        // Update the motion related elements to the boost calculation
-+        /* Update the motion related elements to the boost calculation */
-         accumulate_frame_motion_stats( cpi, &this_frame,
-             &this_frame_mv_in_out, &mv_in_out_accumulator,
-             &abs_mv_in_out_accumulator, &mv_ratio_accumulator );
- 
--        // Calculate the baseline boost number for this frame
-+        /* Calculate the baseline boost number for this frame */
-         r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out );
- 
--        // We want to discount the the flash frame itself and the recovery
--        // frame that follows as both will have poor scores.
-+        /* We want to discount the the flash frame itself and the recovery
-+         * frame that follows as both will have poor scores.
-+         */
-         flash_detected = detect_flash(cpi, (i+offset)) ||
-                          detect_flash(cpi, (i+offset+1));
- 
--        // Cumulative effect of prediction quality decay
-+        /* Cumulative effect of prediction quality decay */
-         if ( !flash_detected )
-         {
-             decay_accumulator =
-@@ -1591,7 +1662,7 @@ static int calc_arf_boost(
-         }
-         boost_score += (decay_accumulator * r);
- 
--        // Break out conditions.
-+        /* Break out conditions. */
-         if  ( (!flash_detected) &&
-               ((mv_ratio_accumulator > 100.0) ||
-                (abs_mv_in_out_accumulator > 3.0) ||
-@@ -1603,7 +1674,7 @@ static int calc_arf_boost(
- 
-     *f_boost = (int)(boost_score * 100.0) >> 4;
- 
--    // Reset for backward looking loop
-+    /* Reset for backward looking loop */
-     boost_score = 0.0;
-     mv_ratio_accumulator = 0.0;
-     decay_accumulator = 1.0;
-@@ -1611,26 +1682,27 @@ static int calc_arf_boost(
-     mv_in_out_accumulator = 0.0;
-     abs_mv_in_out_accumulator = 0.0;
- 
--    // Search forward from the proposed arf/next gf position
-+    /* Search forward from the proposed arf/next gf position */
-     for ( i = -1; i >= -b_frames; i-- )
-     {
-         if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF )
-             break;
- 
--        // Update the motion related elements to the boost calculation
-+        /* Update the motion related elements to the boost calculation */
-         accumulate_frame_motion_stats( cpi, &this_frame,
-             &this_frame_mv_in_out, &mv_in_out_accumulator,
-             &abs_mv_in_out_accumulator, &mv_ratio_accumulator );
- 
--        // Calculate the baseline boost number for this frame
-+        /* Calculate the baseline boost number for this frame */
-         r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out );
- 
--        // We want to discount the the flash frame itself and the recovery
--        // frame that follows as both will have poor scores.
-+        /* We want to discount the the flash frame itself and the recovery
-+         * frame that follows as both will have poor scores.
-+         */
-         flash_detected = detect_flash(cpi, (i+offset)) ||
-                          detect_flash(cpi, (i+offset+1));
- 
--        // Cumulative effect of prediction quality decay
-+        /* Cumulative effect of prediction quality decay */
-         if ( !flash_detected )
-         {
-             decay_accumulator =
-@@ -1642,7 +1714,7 @@ static int calc_arf_boost(
- 
-         boost_score += (decay_accumulator * r);
- 
--        // Break out conditions.
-+        /* Break out conditions. */
-         if  ( (!flash_detected) &&
-               ((mv_ratio_accumulator > 100.0) ||
-                (abs_mv_in_out_accumulator > 3.0) ||
-@@ -1657,7 +1729,7 @@ static int calc_arf_boost(
- }
- #endif
- 
--// Analyse and define a gf/arf group .
-+/* Analyse and define a gf/arf group . */
- static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- {
-     FIRSTPASS_STATS next_frame;
-@@ -1673,14 +1745,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-     double mv_ratio_accumulator = 0.0;
-     double decay_accumulator = 1.0;
- 
--    double loop_decay_rate = 1.00;          // Starting decay rate
-+    double loop_decay_rate = 1.00;          /* Starting decay rate */
- 
-     double this_frame_mv_in_out = 0.0;
-     double mv_in_out_accumulator = 0.0;
-     double abs_mv_in_out_accumulator = 0.0;
-     double mod_err_per_mb_accumulator = 0.0;
- 
--    int max_bits = frame_max_bits(cpi);     // Max for a single frame
-+    int max_bits = frame_max_bits(cpi);     /* Max for a single frame */
- 
-     unsigned int allow_alt_ref =
-                     cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
-@@ -1693,37 +1765,40 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-     cpi->twopass.gf_group_bits = 0;
-     cpi->twopass.gf_decay_rate = 0;
- 
--    vp8_clear_system_state();  //__asm emms;
-+    vp8_clear_system_state();
- 
-     start_pos = cpi->twopass.stats_in;
- 
--    vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
-+    vpx_memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */
- 
--    // Load stats for the current frame.
-+    /* Load stats for the current frame. */
-     mod_frame_err = calculate_modified_err(cpi, this_frame);
- 
--    // Note the error of the frame at the start of the group (this will be
--    // the GF frame error if we code a normal gf
-+    /* Note the error of the frame at the start of the group (this will be
-+     * the GF frame error if we code a normal gf
-+     */
-     gf_first_frame_err = mod_frame_err;
- 
--    // Special treatment if the current frame is a key frame (which is also
--    // a gf). If it is then its error score (and hence bit allocation) need
--    // to be subtracted out from the calculation for the GF group
-+    /* Special treatment if the current frame is a key frame (which is also
-+     * a gf). If it is then its error score (and hence bit allocation) need
-+     * to be subtracted out from the calculation for the GF group
-+     */
-     if (cpi->common.frame_type == KEY_FRAME)
-         gf_group_err -= gf_first_frame_err;
- 
--    // Scan forward to try and work out how many frames the next gf group
--    // should contain and what level of boost is appropriate for the GF
--    // or ARF that will be coded with the group
-+    /* Scan forward to try and work out how many frames the next gf group
-+     * should contain and what level of boost is appropriate for the GF
-+     * or ARF that will be coded with the group
-+     */
-     i = 0;
- 
-     while (((i < cpi->twopass.static_scene_max_gf_interval) ||
-             ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)) &&
-            (i < cpi->twopass.frames_to_key))
-     {
--        i++;    // Increment the loop counter
-+        i++;
- 
--        // Accumulate error score of frames in this gf group
-+        /* Accumulate error score of frames in this gf group */
-         mod_frame_err = calculate_modified_err(cpi, this_frame);
- 
-         gf_group_err += mod_frame_err;
-@@ -1734,19 +1809,20 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         if (EOF == input_stats(cpi, &next_frame))
-             break;
- 
--        // Test for the case where there is a brief flash but the prediction
--        // quality back to an earlier frame is then restored.
-+        /* Test for the case where there is a brief flash but the prediction
-+         * quality back to an earlier frame is then restored.
-+         */
-         flash_detected = detect_flash(cpi, 0);
- 
--        // Update the motion related elements to the boost calculation
-+        /* Update the motion related elements to the boost calculation */
-         accumulate_frame_motion_stats( cpi, &next_frame,
-             &this_frame_mv_in_out, &mv_in_out_accumulator,
-             &abs_mv_in_out_accumulator, &mv_ratio_accumulator );
- 
--        // Calculate a baseline boost number for this frame
-+        /* Calculate a baseline boost number for this frame */
-         r = calc_frame_boost( cpi, &next_frame, this_frame_mv_in_out );
- 
--        // Cumulative effect of prediction quality decay
-+        /* Cumulative effect of prediction quality decay */
-         if ( !flash_detected )
-         {
-             loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
-@@ -1756,8 +1832,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         }
-         boost_score += (decay_accumulator * r);
- 
--        // Break clause to detect very still sections after motion
--        // For example a staic image after a fade or other transition.
-+        /* Break clause to detect very still sections after motion
-+         * For example a staic image after a fade or other transition.
-+         */
-         if ( detect_transition_to_still( cpi, i, 5,
-                                          loop_decay_rate,
-                                          decay_accumulator ) )
-@@ -1767,14 +1844,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             break;
-         }
- 
--        // Break out conditions.
-+        /* Break out conditions. */
-         if  (
--            // Break at cpi->max_gf_interval unless almost totally static
-+            /* Break at cpi->max_gf_interval unless almost totally static */
-             (i >= cpi->max_gf_interval && (decay_accumulator < 0.995)) ||
-             (
--                // Dont break out with a very short interval
-+                /* Dont break out with a very short interval */
-                 (i > MIN_GF_INTERVAL) &&
--                // Dont break out very close to a key frame
-+                /* Dont break out very close to a key frame */
-                 ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) &&
-                 ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) &&
-                 (!flash_detected) &&
-@@ -1796,15 +1873,15 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-     cpi->twopass.gf_decay_rate =
-         (i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0;
- 
--    // When using CBR apply additional buffer related upper limits
-+    /* When using CBR apply additional buffer related upper limits */
-     if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-     {
-         double max_boost;
- 
--        // For cbr apply buffer related limits
-+        /* For cbr apply buffer related limits */
-         if (cpi->drop_frames_allowed)
-         {
--            int df_buffer_level = cpi->oxcf.drop_frames_water_mark *
-+            int64_t df_buffer_level = cpi->oxcf.drop_frames_water_mark *
-                                   (cpi->oxcf.optimal_buffer_level / 100);
- 
-             if (cpi->buffer_level > df_buffer_level)
-@@ -1825,7 +1902,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             boost_score = max_boost;
-     }
- 
--    // Dont allow conventional gf too near the next kf
-+    /* Dont allow conventional gf too near the next kf */
-     if ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)
-     {
-         while (i < cpi->twopass.frames_to_key)
-@@ -1846,14 +1923,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-     cpi->gfu_boost = (int)(boost_score * 100.0) >> 4;
- 
- #if NEW_BOOST
--    // Alterrnative boost calculation for alt ref
-+    /* Alterrnative boost calculation for alt ref */
-     alt_boost = calc_arf_boost( cpi, 0, (i-1), (i-1), &f_boost, &b_boost );
- #endif
- 
--    // Should we use the alternate refernce frame
-+    /* Should we use the alternate refernce frame */
-     if (allow_alt_ref &&
-         (i >= MIN_GF_INTERVAL) &&
--        // dont use ARF very near next kf
-+        /* dont use ARF very near next kf */
-         (i <= (cpi->twopass.frames_to_key - MIN_GF_INTERVAL)) &&
- #if NEW_BOOST
-         ((next_frame.pcnt_inter > 0.75) ||
-@@ -1883,7 +1960,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         cpi->gfu_boost = alt_boost;
- #endif
- 
--        // Estimate the bits to be allocated to the group as a whole
-+        /* Estimate the bits to be allocated to the group as a whole */
-         if ((cpi->twopass.kf_group_bits > 0) &&
-             (cpi->twopass.kf_group_error_left > 0))
-         {
-@@ -1893,7 +1970,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         else
-             group_bits = 0;
- 
--        // Boost for arf frame
-+        /* Boost for arf frame */
- #if NEW_BOOST
-         Boost = (alt_boost * GFQ_ADJUSTMENT) / 100;
- #else
-@@ -1901,7 +1978,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- #endif
-         Boost += (i * 50);
- 
--        // Set max and minimum boost and hence minimum allocation
-+        /* Set max and minimum boost and hence minimum allocation */
-         if (Boost > ((cpi->baseline_gf_interval + 1) * 200))
-             Boost = ((cpi->baseline_gf_interval + 1) * 200);
-         else if (Boost < 125)
-@@ -1909,24 +1986,27 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- 
-         allocation_chunks = (i * 100) + Boost;
- 
--        // Normalize Altboost and allocations chunck down to prevent overflow
-+        /* Normalize Altboost and allocations chunck down to prevent overflow */
-         while (Boost > 1000)
-         {
-             Boost /= 2;
-             allocation_chunks /= 2;
-         }
- 
--        // Calculate the number of bits to be spent on the arf based on the
--        // boost number
-+        /* Calculate the number of bits to be spent on the arf based on the
-+         * boost number
-+         */
-         arf_frame_bits = (int)((double)Boost * (group_bits /
-                                (double)allocation_chunks));
- 
--        // Estimate if there are enough bits available to make worthwhile use
--        // of an arf.
-+        /* Estimate if there are enough bits available to make worthwhile use
-+         * of an arf.
-+         */
-         tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits);
- 
--        // Only use an arf if it is likely we will be able to code
--        // it at a lower Q than the surrounding frames.
-+        /* Only use an arf if it is likely we will be able to code
-+         * it at a lower Q than the surrounding frames.
-+         */
-         if (tmp_q < cpi->worst_quality)
-         {
-             int half_gf_int;
-@@ -1936,42 +2016,46 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- 
-             cpi->source_alt_ref_pending = 1;
- 
--            // For alt ref frames the error score for the end frame of the
--            // group (the alt ref frame) should not contribute to the group
--            // total and hence the number of bit allocated to the group.
--            // Rather it forms part of the next group (it is the GF at the
--            // start of the next group)
--            // gf_group_err -= mod_frame_err;
--
--            // For alt ref frames alt ref frame is technically part of the
--            // GF frame for the next group but we always base the error
--            // calculation and bit allocation on the current group of frames.
--
--            // Set the interval till the next gf or arf.
--            // For ARFs this is the number of frames to be coded before the
--            // future frame that is coded as an ARF.
--            // The future frame itself is part of the next group
-+            /*
-+             * For alt ref frames the error score for the end frame of the
-+             * group (the alt ref frame) should not contribute to the group
-+             * total and hence the number of bit allocated to the group.
-+             * Rather it forms part of the next group (it is the GF at the
-+             * start of the next group)
-+             * gf_group_err -= mod_frame_err;
-+             *
-+             * For alt ref frames alt ref frame is technically part of the
-+             * GF frame for the next group but we always base the error
-+             * calculation and bit allocation on the current group of frames.
-+             *
-+             * Set the interval till the next gf or arf.
-+             * For ARFs this is the number of frames to be coded before the
-+             * future frame that is coded as an ARF.
-+             * The future frame itself is part of the next group
-+             */
-             cpi->baseline_gf_interval = i;
- 
--            // Define the arnr filter width for this group of frames:
--            // We only filter frames that lie within a distance of half
--            // the GF interval from the ARF frame. We also have to trap
--            // cases where the filter extends beyond the end of clip.
--            // Note: this_frame->frame has been updated in the loop
--            // so it now points at the ARF frame.
-+            /*
-+             * Define the arnr filter width for this group of frames:
-+             * We only filter frames that lie within a distance of half
-+             * the GF interval from the ARF frame. We also have to trap
-+             * cases where the filter extends beyond the end of clip.
-+             * Note: this_frame->frame has been updated in the loop
-+             * so it now points at the ARF frame.
-+             */
-             half_gf_int = cpi->baseline_gf_interval >> 1;
--            frames_after_arf = cpi->twopass.total_stats.count -
--                               this_frame->frame - 1;
-+            frames_after_arf = (int)(cpi->twopass.total_stats.count -
-+                               this_frame->frame - 1);
- 
-             switch (cpi->oxcf.arnr_type)
-             {
--            case 1: // Backward filter
-+            case 1: /* Backward filter */
-                 frames_fwd = 0;
-                 if (frames_bwd > half_gf_int)
-                     frames_bwd = half_gf_int;
-                 break;
- 
--            case 2: // Forward filter
-+            case 2: /* Forward filter */
-                 if (frames_fwd > half_gf_int)
-                     frames_fwd = half_gf_int;
-                 if (frames_fwd > frames_after_arf)
-@@ -1979,7 +2063,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-                 frames_bwd = 0;
-                 break;
- 
--            case 3: // Centered filter
-+            case 3: /* Centered filter */
-             default:
-                 frames_fwd >>= 1;
-                 if (frames_fwd > frames_after_arf)
-@@ -1989,8 +2073,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- 
-                 frames_bwd = frames_fwd;
- 
--                // For even length filter there is one more frame backward
--                // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
-+                /* For even length filter there is one more frame backward
-+                 * than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
-+                 */
-                 if (frames_bwd < half_gf_int)
-                     frames_bwd += (cpi->oxcf.arnr_max_frames+1) & 0x1;
-                 break;
-@@ -2010,12 +2095,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         cpi->baseline_gf_interval = i;
-     }
- 
--    // Now decide how many bits should be allocated to the GF group as  a
--    // proportion of those remaining in the kf group.
--    // The final key frame group in the clip is treated as a special case
--    // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left.
--    // This is also important for short clips where there may only be one
--    // key frame.
-+    /*
-+     * Now decide how many bits should be allocated to the GF group as  a
-+     * proportion of those remaining in the kf group.
-+     * The final key frame group in the clip is treated as a special case
-+     * where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left.
-+     * This is also important for short clips where there may only be one
-+     * key frame.
-+     */
-     if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats.count -
-                                             cpi->common.current_video_frame))
-     {
-@@ -2023,7 +2110,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0;
-     }
- 
--    // Calculate the bits to be allocated to the group as a whole
-+    /* Calculate the bits to be allocated to the group as a whole */
-     if ((cpi->twopass.kf_group_bits > 0) &&
-         (cpi->twopass.kf_group_error_left > 0))
-     {
-@@ -2034,31 +2121,32 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-     else
-         cpi->twopass.gf_group_bits = 0;
- 
--    cpi->twopass.gf_group_bits =
-+    cpi->twopass.gf_group_bits = (int)(
-         (cpi->twopass.gf_group_bits < 0)
-             ? 0
-             : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits)
--                ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
-+                ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits);
- 
--    // Clip cpi->twopass.gf_group_bits based on user supplied data rate
--    // variability limit (cpi->oxcf.two_pass_vbrmax_section)
-+    /* Clip cpi->twopass.gf_group_bits based on user supplied data rate
-+     * variability limit (cpi->oxcf.two_pass_vbrmax_section)
-+     */
-     if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval)
-         cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval;
- 
--    // Reset the file position
-+    /* Reset the file position */
-     reset_fpf_position(cpi, start_pos);
- 
--    // Update the record of error used so far (only done once per gf group)
-+    /* Update the record of error used so far (only done once per gf group) */
-     cpi->twopass.modified_error_used += gf_group_err;
- 
--    // Assign  bits to the arf or gf.
-+    /* Assign  bits to the arf or gf. */
-     for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) {
-         int Boost;
-         int allocation_chunks;
-         int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
-         int gf_bits;
- 
--        // For ARF frames
-+        /* For ARF frames */
-         if (cpi->source_alt_ref_pending && i == 0)
-         {
- #if NEW_BOOST
-@@ -2068,7 +2156,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- #endif
-             Boost += (cpi->baseline_gf_interval * 50);
- 
--            // Set max and minimum boost and hence minimum allocation
-+            /* Set max and minimum boost and hence minimum allocation */
-             if (Boost > ((cpi->baseline_gf_interval + 1) * 200))
-                 Boost = ((cpi->baseline_gf_interval + 1) * 200);
-             else if (Boost < 125)
-@@ -2077,13 +2165,13 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             allocation_chunks =
-                 ((cpi->baseline_gf_interval + 1) * 100) + Boost;
-         }
--        // Else for standard golden frames
-+        /* Else for standard golden frames */
-         else
-         {
--            // boost based on inter / intra ratio of subsequent frames
-+            /* boost based on inter / intra ratio of subsequent frames */
-             Boost = (cpi->gfu_boost * GFQ_ADJUSTMENT) / 100;
- 
--            // Set max and minimum boost and hence minimum allocation
-+            /* Set max and minimum boost and hence minimum allocation */
-             if (Boost > (cpi->baseline_gf_interval * 150))
-                 Boost = (cpi->baseline_gf_interval * 150);
-             else if (Boost < 125)
-@@ -2093,22 +2181,24 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-                 (cpi->baseline_gf_interval * 100) + (Boost - 100);
-         }
- 
--        // Normalize Altboost and allocations chunck down to prevent overflow
-+        /* Normalize Altboost and allocations chunck down to prevent overflow */
-         while (Boost > 1000)
-         {
-             Boost /= 2;
-             allocation_chunks /= 2;
-         }
- 
--        // Calculate the number of bits to be spent on the gf or arf based on
--        // the boost number
-+        /* Calculate the number of bits to be spent on the gf or arf based on
-+         * the boost number
-+         */
-         gf_bits = (int)((double)Boost *
-                         (cpi->twopass.gf_group_bits /
-                          (double)allocation_chunks));
- 
--        // If the frame that is to be boosted is simpler than the average for
--        // the gf/arf group then use an alternative calculation
--        // based on the error score of the frame itself
-+        /* If the frame that is to be boosted is simpler than the average for
-+         * the gf/arf group then use an alternative calculation
-+         * based on the error score of the frame itself
-+         */
-         if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval)
-         {
-             double  alt_gf_grp_bits;
-@@ -2127,9 +2217,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-                 gf_bits = alt_gf_bits;
-             }
-         }
--        // Else if it is harder than other frames in the group make sure it at
--        // least receives an allocation in keeping with its relative error
--        // score, otherwise it may be worse off than an "un-boosted" frame
-+        /* Else if it is harder than other frames in the group make sure it at
-+         * least receives an allocation in keeping with its relative error
-+         * score, otherwise it may be worse off than an "un-boosted" frame
-+         */
-         else
-         {
-             int alt_gf_bits =
-@@ -2143,18 +2234,19 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             }
-         }
- 
--        // Apply an additional limit for CBR
-+        /* Apply an additional limit for CBR */
-         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-         {
--            if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1))
--                cpi->twopass.gf_bits = cpi->buffer_level >> 1;
-+            if (cpi->twopass.gf_bits > (int)(cpi->buffer_level >> 1))
-+                cpi->twopass.gf_bits = (int)(cpi->buffer_level >> 1);
-         }
- 
--        // Dont allow a negative value for gf_bits
-+        /* Dont allow a negative value for gf_bits */
-         if (gf_bits < 0)
-             gf_bits = 0;
- 
--        gf_bits += cpi->min_frame_bandwidth;                     // Add in minimum for a frame
-+        /* Add in minimum for a frame */
-+        gf_bits += cpi->min_frame_bandwidth;
- 
-         if (i == 0)
-         {
-@@ -2162,33 +2254,39 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         }
-         if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)))
-         {
--            cpi->per_frame_bandwidth = gf_bits;                 // Per frame bit target for this frame
-+            /* Per frame bit target for this frame */
-+            cpi->per_frame_bandwidth = gf_bits;
-         }
-     }
- 
-     {
--        // Adjust KF group bits and error remainin
--        cpi->twopass.kf_group_error_left -= gf_group_err;
-+        /* Adjust KF group bits and error remainin */
-+        cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err;
-         cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits;
- 
-         if (cpi->twopass.kf_group_bits < 0)
-             cpi->twopass.kf_group_bits = 0;
- 
--        // Note the error score left in the remaining frames of the group.
--        // For normal GFs we want to remove the error score for the first frame of the group (except in Key frame case where this has already happened)
-+        /* Note the error score left in the remaining frames of the group.
-+         * For normal GFs we want to remove the error score for the first
-+         * frame of the group (except in Key frame case where this has
-+         * already happened)
-+         */
-         if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME)
--            cpi->twopass.gf_group_error_left = gf_group_err - gf_first_frame_err;
-+            cpi->twopass.gf_group_error_left = (int)(gf_group_err -
-+                                                     gf_first_frame_err);
-         else
--            cpi->twopass.gf_group_error_left = gf_group_err;
-+            cpi->twopass.gf_group_error_left = (int) gf_group_err;
- 
-         cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth;
- 
-         if (cpi->twopass.gf_group_bits < 0)
-             cpi->twopass.gf_group_bits = 0;
- 
--        // This condition could fail if there are two kfs very close together
--        // despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the
--        // calculation of cpi->twopass.alt_extra_bits.
-+        /* This condition could fail if there are two kfs very close together
-+         * despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the
-+         * calculation of cpi->twopass.alt_extra_bits.
-+         */
-         if ( cpi->baseline_gf_interval >= 3 )
-         {
- #if NEW_BOOST
-@@ -2217,7 +2315,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             cpi->twopass.alt_extra_bits = 0;
-     }
- 
--    // Adjustments based on a measure of complexity of the section
-+    /* Adjustments based on a measure of complexity of the section */
-     if (cpi->common.frame_type != KEY_FRAME)
-     {
-         FIRSTPASS_STATS sectionstats;
-@@ -2234,47 +2332,45 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- 
-         avg_stats(&sectionstats);
- 
--        cpi->twopass.section_intra_rating =
--            sectionstats.intra_error /
--            DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
-+        cpi->twopass.section_intra_rating = (unsigned int)
-+            (sectionstats.intra_error /
-+            DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
- 
-         Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
--        //if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) )
--        //{
-         cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025);
- 
-         if (cpi->twopass.section_max_qfactor < 0.80)
-             cpi->twopass.section_max_qfactor = 0.80;
- 
--        //}
--        //else
--        //    cpi->twopass.section_max_qfactor = 1.0;
--
-         reset_fpf_position(cpi, start_pos);
-     }
- }
- 
--// Allocate bits to a normal frame that is neither a gf an arf or a key frame.
-+/* Allocate bits to a normal frame that is neither a gf an arf or a key frame. */
- static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- {
--    int    target_frame_size;                                                             // gf_group_error_left
-+    int    target_frame_size;
- 
-     double modified_err;
--    double err_fraction;                                                                 // What portion of the remaining GF group error is used by this frame
-+    double err_fraction;
- 
--    int max_bits = frame_max_bits(cpi);    // Max for a single frame
-+    int max_bits = frame_max_bits(cpi);  /* Max for a single frame */
- 
--    // Calculate modified prediction error used in bit allocation
-+    /* Calculate modified prediction error used in bit allocation */
-     modified_err = calculate_modified_err(cpi, this_frame);
- 
-+    /* What portion of the remaining GF group error is used by this frame */
-     if (cpi->twopass.gf_group_error_left > 0)
--        err_fraction = modified_err / cpi->twopass.gf_group_error_left;                              // What portion of the remaining GF group error is used by this frame
-+        err_fraction = modified_err / cpi->twopass.gf_group_error_left;
-     else
-         err_fraction = 0.0;
- 
--    target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction);                    // How many of those bits available for allocation should we give it?
-+    /* How many of those bits available for allocation should we give it? */
-+    target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction);
- 
--    // Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at the top end.
-+    /* Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits)
-+     * at the top end.
-+     */
-     if (target_frame_size < 0)
-         target_frame_size = 0;
-     else
-@@ -2286,22 +2382,25 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             target_frame_size = cpi->twopass.gf_group_bits;
-     }
- 
--    cpi->twopass.gf_group_error_left -= modified_err;                                               // Adjust error remaining
--    cpi->twopass.gf_group_bits -= target_frame_size;                                                // Adjust bits remaining
-+    /* Adjust error and bits remaining */
-+    cpi->twopass.gf_group_error_left -= (int)modified_err;
-+    cpi->twopass.gf_group_bits -= target_frame_size;
- 
-     if (cpi->twopass.gf_group_bits < 0)
-         cpi->twopass.gf_group_bits = 0;
- 
--    target_frame_size += cpi->min_frame_bandwidth;                                          // Add in the minimum number of bits that is set aside for every frame.
-+    /* Add in the minimum number of bits that is set aside for every frame. */
-+    target_frame_size += cpi->min_frame_bandwidth;
- 
--    // Every other frame gets a few extra bits
-+    /* Every other frame gets a few extra bits */
-     if ( (cpi->common.frames_since_golden & 0x01) &&
-          (cpi->frames_till_gf_update_due > 0) )
-     {
-         target_frame_size += cpi->twopass.alt_extra_bits;
-     }
- 
--    cpi->per_frame_bandwidth = target_frame_size;                                           // Per frame bit target for this frame
-+    /* Per frame bit target for this frame */
-+    cpi->per_frame_bandwidth = target_frame_size;
- }
- 
- void vp8_second_pass(VP8_COMP *cpi)
-@@ -2330,20 +2429,25 @@ void vp8_second_pass(VP8_COMP *cpi)
-     this_frame_intra_error = this_frame.intra_error;
-     this_frame_coded_error = this_frame.coded_error;
- 
--    // keyframe and section processing !
-+    /* keyframe and section processing ! */
-     if (cpi->twopass.frames_to_key == 0)
-     {
--        // Define next KF group and assign bits to it
-+        /* Define next KF group and assign bits to it */
-         vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
-         find_next_key_frame(cpi, &this_frame_copy);
- 
--        // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop
--        // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups.
--        // This is temporary code till we decide what should really happen in this case.
-+        /* Special case: Error error_resilient_mode mode does not make much
-+         * sense for two pass but with its current meaning but this code is
-+         * designed to stop outlandish behaviour if someone does set it when
-+         * using two pass. It effectively disables GF groups. This is
-+         * temporary code till we decide what should really happen in this
-+         * case.
-+         */
-         if (cpi->oxcf.error_resilient_mode)
-         {
--            cpi->twopass.gf_group_bits = cpi->twopass.kf_group_bits;
--            cpi->twopass.gf_group_error_left = cpi->twopass.kf_group_error_left;
-+            cpi->twopass.gf_group_bits = (int)cpi->twopass.kf_group_bits;
-+            cpi->twopass.gf_group_error_left =
-+                                  (int)cpi->twopass.kf_group_error_left;
-             cpi->baseline_gf_interval = cpi->twopass.frames_to_key;
-             cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
-             cpi->source_alt_ref_pending = 0;
-@@ -2351,19 +2455,25 @@ void vp8_second_pass(VP8_COMP *cpi)
- 
-     }
- 
--    // Is this a GF / ARF (Note that a KF is always also a GF)
-+    /* Is this a GF / ARF (Note that a KF is always also a GF) */
-     if (cpi->frames_till_gf_update_due == 0)
-     {
--        // Define next gf group and assign bits to it
-+        /* Define next gf group and assign bits to it */
-         vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
-         define_gf_group(cpi, &this_frame_copy);
- 
--        // If we are going to code an altref frame at the end of the group and the current frame is not a key frame....
--        // If the previous group used an arf this frame has already benefited from that arf boost and it should not be given extra bits
--        // If the previous group was NOT coded using arf we may want to apply some boost to this GF as well
-+        /* If we are going to code an altref frame at the end of the group
-+         * and the current frame is not a key frame.... If the previous
-+         * group used an arf this frame has already benefited from that arf
-+         * boost and it should not be given extra bits If the previous
-+         * group was NOT coded using arf we may want to apply some boost to
-+         * this GF as well
-+         */
-         if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))
-         {
--            // Assign a standard frames worth of bits from those allocated to the GF group
-+            /* Assign a standard frames worth of bits from those allocated
-+             * to the GF group
-+             */
-             int bak = cpi->per_frame_bandwidth;
-             vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
-             assign_std_frame_bits(cpi, &this_frame_copy);
-@@ -2371,59 +2481,64 @@ void vp8_second_pass(VP8_COMP *cpi)
-         }
-     }
- 
--    // Otherwise this is an ordinary frame
-+    /* Otherwise this is an ordinary frame */
-     else
-     {
--        // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop
--        // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups.
--        // This is temporary code till we decide what should really happen in this case.
-+        /* Special case: Error error_resilient_mode mode does not make much
-+         * sense for two pass but with its current meaning but this code is
-+         * designed to stop outlandish behaviour if someone does set it
-+         * when using two pass. It effectively disables GF groups. This is
-+         * temporary code till we decide what should really happen in this
-+         * case.
-+         */
-         if (cpi->oxcf.error_resilient_mode)
-         {
-             cpi->frames_till_gf_update_due = cpi->twopass.frames_to_key;
- 
-             if (cpi->common.frame_type != KEY_FRAME)
-             {
--                // Assign bits from those allocated to the GF group
-+                /* Assign bits from those allocated to the GF group */
-                 vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
-                 assign_std_frame_bits(cpi, &this_frame_copy);
-             }
-         }
-         else
-         {
--            // Assign bits from those allocated to the GF group
-+            /* Assign bits from those allocated to the GF group */
-             vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
-             assign_std_frame_bits(cpi, &this_frame_copy);
-         }
-     }
- 
--    // Keep a globally available copy of this and the next frame's iiratio.
--    cpi->twopass.this_iiratio = this_frame_intra_error /
--                        DOUBLE_DIVIDE_CHECK(this_frame_coded_error);
-+    /* Keep a globally available copy of this and the next frame's iiratio. */
-+    cpi->twopass.this_iiratio = (unsigned int)(this_frame_intra_error /
-+                        DOUBLE_DIVIDE_CHECK(this_frame_coded_error));
-     {
-         FIRSTPASS_STATS next_frame;
-         if ( lookup_next_frame_stats(cpi, &next_frame) != EOF )
-         {
--            cpi->twopass.next_iiratio = next_frame.intra_error /
--                                DOUBLE_DIVIDE_CHECK(next_frame.coded_error);
-+            cpi->twopass.next_iiratio = (unsigned int)(next_frame.intra_error /
-+                                DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
-         }
-     }
- 
--    // Set nominal per second bandwidth for this frame
--    cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate;
-+    /* Set nominal per second bandwidth for this frame */
-+    cpi->target_bandwidth = (int)
-+    (cpi->per_frame_bandwidth * cpi->output_frame_rate);
-     if (cpi->target_bandwidth < 0)
-         cpi->target_bandwidth = 0;
- 
- 
--    // Account for mv, mode and other overheads.
--    overhead_bits = estimate_modemvcost(
-+    /* Account for mv, mode and other overheads. */
-+    overhead_bits = (int)estimate_modemvcost(
-                         cpi, &cpi->twopass.total_left_stats );
- 
--    // Special case code for first frame.
-+    /* Special case code for first frame. */
-     if (cpi->common.current_video_frame == 0)
-     {
-         cpi->twopass.est_max_qcorrection_factor = 1.0;
- 
--        // Set a cq_level in constrained quality mode.
-+        /* Set a cq_level in constrained quality mode. */
-         if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY )
-         {
-             int est_cq;
-@@ -2439,7 +2554,7 @@ void vp8_second_pass(VP8_COMP *cpi)
-                 cpi->cq_target_quality = est_cq;
-         }
- 
--        // guess at maxq needed in 2nd pass
-+        /* guess at maxq needed in 2nd pass */
-         cpi->twopass.maxq_max_limit = cpi->worst_quality;
-         cpi->twopass.maxq_min_limit = cpi->best_quality;
- 
-@@ -2449,11 +2564,12 @@ void vp8_second_pass(VP8_COMP *cpi)
-                     (int)(cpi->twopass.bits_left / frames_left),
-                     overhead_bits );
- 
--        // Limit the maxq value returned subsequently.
--        // This increases the risk of overspend or underspend if the initial
--        // estimate for the clip is bad, but helps prevent excessive
--        // variation in Q, especially near the end of a clip
--        // where for example a small overspend may cause Q to crash
-+        /* Limit the maxq value returned subsequently.
-+         * This increases the risk of overspend or underspend if the initial
-+         * estimate for the clip is bad, but helps prevent excessive
-+         * variation in Q, especially near the end of a clip
-+         * where for example a small overspend may cause Q to crash
-+         */
-         cpi->twopass.maxq_max_limit = ((tmp_q + 32) < cpi->worst_quality)
-                                   ? (tmp_q + 32) : cpi->worst_quality;
-         cpi->twopass.maxq_min_limit = ((tmp_q - 32) > cpi->best_quality)
-@@ -2463,10 +2579,11 @@ void vp8_second_pass(VP8_COMP *cpi)
-         cpi->ni_av_qi                     = tmp_q;
-     }
- 
--    // The last few frames of a clip almost always have to few or too many
--    // bits and for the sake of over exact rate control we dont want to make
--    // radical adjustments to the allowed quantizer range just to use up a
--    // few surplus bits or get beneath the target rate.
-+    /* The last few frames of a clip almost always have to few or too many
-+     * bits and for the sake of over exact rate control we dont want to make
-+     * radical adjustments to the allowed quantizer range just to use up a
-+     * few surplus bits or get beneath the target rate.
-+     */
-     else if ( (cpi->common.current_video_frame <
-                  (((unsigned int)cpi->twopass.total_stats.count * 255)>>8)) &&
-               ((cpi->common.current_video_frame + cpi->baseline_gf_interval) <
-@@ -2481,7 +2598,7 @@ void vp8_second_pass(VP8_COMP *cpi)
-                     (int)(cpi->twopass.bits_left / frames_left),
-                     overhead_bits );
- 
--        // Move active_worst_quality but in a damped way
-+        /* Move active_worst_quality but in a damped way */
-         if (tmp_q > cpi->active_worst_quality)
-             cpi->active_worst_quality ++;
-         else if (tmp_q < cpi->active_worst_quality)
-@@ -2493,7 +2610,7 @@ void vp8_second_pass(VP8_COMP *cpi)
- 
-     cpi->twopass.frames_to_key --;
- 
--    // Update the total stats remaining sturcture
-+    /* Update the total stats remaining sturcture */
-     subtract_stats(&cpi->twopass.total_left_stats, &this_frame );
- }
- 
-@@ -2502,8 +2619,9 @@ static int test_candidate_kf(VP8_COMP *cpi,  FIRSTPASS_STATS *last_frame, FIRSTP
- {
-     int is_viable_kf = 0;
- 
--    // Does the frame satisfy the primary criteria of a key frame
--    //      If so, then examine how well it predicts subsequent frames
-+    /* Does the frame satisfy the primary criteria of a key frame
-+     *      If so, then examine how well it predicts subsequent frames
-+     */
-     if ((this_frame->pcnt_second_ref < 0.10) &&
-         (next_frame->pcnt_second_ref < 0.10) &&
-         ((this_frame->pcnt_inter < 0.05) ||
-@@ -2530,10 +2648,10 @@ static int test_candidate_kf(VP8_COMP *cpi,  FIRSTPASS_STATS *last_frame, FIRSTP
- 
-         vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame));
- 
--        // Note the starting file position so we can reset to it
-+        /* Note the starting file position so we can reset to it */
-         start_pos = cpi->twopass.stats_in;
- 
--        // Examine how well the key frame predicts subsequent frames
-+        /* Examine how well the key frame predicts subsequent frames */
-         for (i = 0 ; i < 16; i++)
-         {
-             next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)) ;
-@@ -2541,18 +2659,16 @@ static int test_candidate_kf(VP8_COMP *cpi,  FIRSTPASS_STATS *last_frame, FIRSTP
-             if (next_iiratio > RMAX)
-                 next_iiratio = RMAX;
- 
--            // Cumulative effect of decay in prediction quality
-+            /* Cumulative effect of decay in prediction quality */
-             if (local_next_frame.pcnt_inter > 0.85)
-                 decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter;
-             else
-                 decay_accumulator = decay_accumulator * ((0.85 + local_next_frame.pcnt_inter) / 2.0);
- 
--            //decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter;
--
--            // Keep a running total
-+            /* Keep a running total */
-             boost_score += (decay_accumulator * next_iiratio);
- 
--            // Test various breakout clauses
-+            /* Test various breakout clauses */
-             if ((local_next_frame.pcnt_inter < 0.05) ||
-                 (next_iiratio < 1.5) ||
-                 (((local_next_frame.pcnt_inter -
-@@ -2567,17 +2683,19 @@ static int test_candidate_kf(VP8_COMP *cpi,  FIRSTPASS_STATS *last_frame, FIRSTP
- 
-             old_boost_score = boost_score;
- 
--            // Get the next frame details
-+            /* Get the next frame details */
-             if (EOF == input_stats(cpi, &local_next_frame))
-                 break;
-         }
- 
--        // If there is tolerable prediction for at least the next 3 frames then break out else discard this pottential key frame and move on
-+        /* If there is tolerable prediction for at least the next 3 frames
-+         * then break out else discard this pottential key frame and move on
-+         */
-         if (boost_score > 5.0 && (i > 3))
-             is_viable_kf = 1;
-         else
-         {
--            // Reset the file position
-+            /* Reset the file position */
-             reset_fpf_position(cpi, start_pos);
- 
-             is_viable_kf = 0;
-@@ -2605,65 +2723,71 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-     double kf_group_coded_err = 0.0;
-     double recent_loop_decay[8] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0};
- 
--    vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
-+    vpx_memset(&next_frame, 0, sizeof(next_frame));
- 
--    vp8_clear_system_state();  //__asm emms;
-+    vp8_clear_system_state();
-     start_position = cpi->twopass.stats_in;
- 
-     cpi->common.frame_type = KEY_FRAME;
- 
--    // is this a forced key frame by interval
-+    /* is this a forced key frame by interval */
-     cpi->this_key_frame_forced = cpi->next_key_frame_forced;
- 
--    // Clear the alt ref active flag as this can never be active on a key frame
-+    /* Clear the alt ref active flag as this can never be active on a key
-+     * frame
-+     */
-     cpi->source_alt_ref_active = 0;
- 
--    // Kf is always a gf so clear frames till next gf counter
-+    /* Kf is always a gf so clear frames till next gf counter */
-     cpi->frames_till_gf_update_due = 0;
- 
-     cpi->twopass.frames_to_key = 1;
- 
--    // Take a copy of the initial frame details
-+    /* Take a copy of the initial frame details */
-     vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame));
- 
--    cpi->twopass.kf_group_bits = 0;        // Total bits avaialable to kf group
--    cpi->twopass.kf_group_error_left = 0;  // Group modified error score.
-+    cpi->twopass.kf_group_bits = 0;
-+    cpi->twopass.kf_group_error_left = 0;
- 
-     kf_mod_err = calculate_modified_err(cpi, this_frame);
- 
--    // find the next keyframe
-+    /* find the next keyframe */
-     i = 0;
-     while (cpi->twopass.stats_in < cpi->twopass.stats_in_end)
-     {
--        // Accumulate kf group error
-+        /* Accumulate kf group error */
-         kf_group_err += calculate_modified_err(cpi, this_frame);
- 
--        // These figures keep intra and coded error counts for all frames including key frames in the group.
--        // The effect of the key frame itself can be subtracted out using the first_frame data collected above
-+        /* These figures keep intra and coded error counts for all frames
-+         * including key frames in the group. The effect of the key frame
-+         * itself can be subtracted out using the first_frame data
-+         * collected above
-+         */
-         kf_group_intra_err += this_frame->intra_error;
-         kf_group_coded_err += this_frame->coded_error;
- 
--        // load a the next frame's stats
-+        /* load a the next frame's stats */
-         vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
-         input_stats(cpi, this_frame);
- 
--        // Provided that we are not at the end of the file...
-+        /* Provided that we are not at the end of the file... */
-         if (cpi->oxcf.auto_key
-             && lookup_next_frame_stats(cpi, &next_frame) != EOF)
-         {
--            // Normal scene cut check
-+            /* Normal scene cut check */
-             if ( ( i >= MIN_GF_INTERVAL ) &&
-                  test_candidate_kf(cpi, &last_frame, this_frame, &next_frame) )
-             {
-                 break;
-             }
- 
--            // How fast is prediction quality decaying
-+            /* How fast is prediction quality decaying */
-             loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
- 
--            // We want to know something about the recent past... rather than
--            // as used elsewhere where we are concened with decay in prediction
--            // quality since the last GF or KF.
-+            /* We want to know something about the recent past... rather than
-+             * as used elsewhere where we are concened with decay in prediction
-+             * quality since the last GF or KF.
-+             */
-             recent_loop_decay[i%8] = loop_decay_rate;
-             decay_accumulator = 1.0;
-             for (j = 0; j < 8; j++)
-@@ -2671,8 +2795,9 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-                 decay_accumulator = decay_accumulator * recent_loop_decay[j];
-             }
- 
--            // Special check for transition or high motion followed by a
--            // to a static scene.
-+            /* Special check for transition or high motion followed by a
-+             * static scene.
-+             */
-             if ( detect_transition_to_still( cpi, i,
-                                              (cpi->key_frame_frequency-i),
-                                              loop_decay_rate,
-@@ -2682,11 +2807,12 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             }
- 
- 
--            // Step on to the next frame
-+            /* Step on to the next frame */
-             cpi->twopass.frames_to_key ++;
- 
--            // If we don't have a real key frame within the next two
--            // forcekeyframeevery intervals then break out of the loop.
-+            /* If we don't have a real key frame within the next two
-+             * forcekeyframeevery intervals then break out of the loop.
-+             */
-             if (cpi->twopass.frames_to_key >= 2 *(int)cpi->key_frame_frequency)
-                 break;
-         } else
-@@ -2695,10 +2821,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         i++;
-     }
- 
--    // If there is a max kf interval set by the user we must obey it.
--    // We already breakout of the loop above at 2x max.
--    // This code centers the extra kf if the actual natural
--    // interval is between 1x and 2x
-+    /* If there is a max kf interval set by the user we must obey it.
-+     * We already breakout of the loop above at 2x max.
-+     * This code centers the extra kf if the actual natural
-+     * interval is between 1x and 2x
-+     */
-     if (cpi->oxcf.auto_key
-         && cpi->twopass.frames_to_key > (int)cpi->key_frame_frequency )
-     {
-@@ -2707,29 +2834,29 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- 
-         cpi->twopass.frames_to_key /= 2;
- 
--        // Copy first frame details
-+        /* Copy first frame details */
-         vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
- 
--        // Reset to the start of the group
-+        /* Reset to the start of the group */
-         reset_fpf_position(cpi, start_position);
- 
-         kf_group_err = 0;
-         kf_group_intra_err = 0;
-         kf_group_coded_err = 0;
- 
--        // Rescan to get the correct error data for the forced kf group
-+        /* Rescan to get the correct error data for the forced kf group */
-         for( i = 0; i < cpi->twopass.frames_to_key; i++ )
-         {
--            // Accumulate kf group errors
-+            /* Accumulate kf group errors */
-             kf_group_err += calculate_modified_err(cpi, &tmp_frame);
-             kf_group_intra_err += tmp_frame.intra_error;
-             kf_group_coded_err += tmp_frame.coded_error;
- 
--            // Load a the next frame's stats
-+            /* Load a the next frame's stats */
-             input_stats(cpi, &tmp_frame);
-         }
- 
--        // Reset to the start of the group
-+        /* Reset to the start of the group */
-         reset_fpf_position(cpi, current_pos);
- 
-         cpi->next_key_frame_forced = 1;
-@@ -2737,58 +2864,63 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-     else
-         cpi->next_key_frame_forced = 0;
- 
--    // Special case for the last frame of the file
-+    /* Special case for the last frame of the file */
-     if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end)
-     {
--        // Accumulate kf group error
-+        /* Accumulate kf group error */
-         kf_group_err += calculate_modified_err(cpi, this_frame);
- 
--        // These figures keep intra and coded error counts for all frames including key frames in the group.
--        // The effect of the key frame itself can be subtracted out using the first_frame data collected above
-+        /* These figures keep intra and coded error counts for all frames
-+         * including key frames in the group. The effect of the key frame
-+         * itself can be subtracted out using the first_frame data
-+         * collected above
-+         */
-         kf_group_intra_err += this_frame->intra_error;
-         kf_group_coded_err += this_frame->coded_error;
-     }
- 
--    // Calculate the number of bits that should be assigned to the kf group.
-+    /* Calculate the number of bits that should be assigned to the kf group. */
-     if ((cpi->twopass.bits_left > 0) && (cpi->twopass.modified_error_left > 0.0))
-     {
--        // Max for a single normal frame (not key frame)
-+        /* Max for a single normal frame (not key frame) */
-         int max_bits = frame_max_bits(cpi);
- 
--        // Maximum bits for the kf group
-+        /* Maximum bits for the kf group */
-         int64_t max_grp_bits;
- 
--        // Default allocation based on bits left and relative
--        // complexity of the section
-+        /* Default allocation based on bits left and relative
-+         * complexity of the section
-+         */
-         cpi->twopass.kf_group_bits = (int64_t)( cpi->twopass.bits_left *
-                                           ( kf_group_err /
-                                             cpi->twopass.modified_error_left ));
- 
--        // Clip based on maximum per frame rate defined by the user.
-+        /* Clip based on maximum per frame rate defined by the user. */
-         max_grp_bits = (int64_t)max_bits * (int64_t)cpi->twopass.frames_to_key;
-         if (cpi->twopass.kf_group_bits > max_grp_bits)
-             cpi->twopass.kf_group_bits = max_grp_bits;
- 
--        // Additional special case for CBR if buffer is getting full.
-+        /* Additional special case for CBR if buffer is getting full. */
-         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-         {
--            int opt_buffer_lvl = cpi->oxcf.optimal_buffer_level;
--            int buffer_lvl = cpi->buffer_level;
-+            int64_t opt_buffer_lvl = cpi->oxcf.optimal_buffer_level;
-+            int64_t buffer_lvl = cpi->buffer_level;
- 
--            // If the buffer is near or above the optimal and this kf group is
--            // not being allocated much then increase the allocation a bit.
-+            /* If the buffer is near or above the optimal and this kf group is
-+             * not being allocated much then increase the allocation a bit.
-+             */
-             if (buffer_lvl >= opt_buffer_lvl)
-             {
--                int high_water_mark = (opt_buffer_lvl +
-+                int64_t high_water_mark = (opt_buffer_lvl +
-                                        cpi->oxcf.maximum_buffer_size) >> 1;
- 
-                 int64_t av_group_bits;
- 
--                // Av bits per frame * number of frames
-+                /* Av bits per frame * number of frames */
-                 av_group_bits = (int64_t)cpi->av_per_frame_bandwidth *
-                                 (int64_t)cpi->twopass.frames_to_key;
- 
--                // We are at or above the maximum.
-+                /* We are at or above the maximum. */
-                 if (cpi->buffer_level >= high_water_mark)
-                 {
-                     int64_t min_group_bits;
-@@ -2800,7 +2932,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-                     if (cpi->twopass.kf_group_bits < min_group_bits)
-                         cpi->twopass.kf_group_bits = min_group_bits;
-                 }
--                // We are above optimal but below the maximum
-+                /* We are above optimal but below the maximum */
-                 else if (cpi->twopass.kf_group_bits < av_group_bits)
-                 {
-                     int64_t bits_below_av = av_group_bits -
-@@ -2817,13 +2949,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-     else
-         cpi->twopass.kf_group_bits = 0;
- 
--    // Reset the first pass file position
-+    /* Reset the first pass file position */
-     reset_fpf_position(cpi, start_position);
- 
--    // determine how big to make this keyframe based on how well the subsequent frames use inter blocks
-+    /* determine how big to make this keyframe based on how well the
-+     * subsequent frames use inter blocks
-+     */
-     decay_accumulator = 1.0;
-     boost_score = 0.0;
--    loop_decay_rate = 1.00;       // Starting decay rate
-+    loop_decay_rate = 1.00;       /* Starting decay rate */
- 
-     for (i = 0 ; i < cpi->twopass.frames_to_key ; i++)
-     {
-@@ -2842,7 +2976,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         if (r > RMAX)
-             r = RMAX;
- 
--        // How fast is prediction quality decaying
-+        /* How fast is prediction quality decaying */
-         loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
- 
-         decay_accumulator = decay_accumulator * loop_decay_rate;
-@@ -2875,31 +3009,26 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- 
-         avg_stats(&sectionstats);
- 
--        cpi->twopass.section_intra_rating =
--            sectionstats.intra_error
--            / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
-+        cpi->twopass.section_intra_rating = (unsigned int)
-+            (sectionstats.intra_error
-+            / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
- 
-         Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
--        // if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) )
--        //{
-         cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025);
- 
-         if (cpi->twopass.section_max_qfactor < 0.80)
-             cpi->twopass.section_max_qfactor = 0.80;
--
--        //}
--        //else
--        //    cpi->twopass.section_max_qfactor = 1.0;
-     }
- 
--    // When using CBR apply additional buffer fullness related upper limits
-+    /* When using CBR apply additional buffer fullness related upper limits */
-     if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-     {
-         double max_boost;
- 
-         if (cpi->drop_frames_allowed)
-         {
--            int df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100);
-+            int df_buffer_level = (int)(cpi->oxcf.drop_frames_water_mark
-+                                  * (cpi->oxcf.optimal_buffer_level / 100));
- 
-             if (cpi->buffer_level > df_buffer_level)
-                 max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth);
-@@ -2919,18 +3048,18 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             boost_score = max_boost;
-     }
- 
--    // Reset the first pass file position
-+    /* Reset the first pass file position */
-     reset_fpf_position(cpi, start_position);
- 
--    // Work out how many bits to allocate for the key frame itself
-+    /* Work out how many bits to allocate for the key frame itself */
-     if (1)
-     {
--        int kf_boost = boost_score;
-+        int kf_boost = (int)boost_score;
-         int allocation_chunks;
-         int Counter = cpi->twopass.frames_to_key;
-         int alt_kf_bits;
-         YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
--        // Min boost based on kf interval
-+        /* Min boost based on kf interval */
- #if 0
- 
-         while ((kf_boost < 48) && (Counter > 0))
-@@ -2948,32 +3077,33 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             if (kf_boost > 48) kf_boost = 48;
-         }
- 
--        // bigger frame sizes need larger kf boosts, smaller frames smaller boosts...
-+        /* bigger frame sizes need larger kf boosts, smaller frames smaller
-+         * boosts...
-+         */
-         if ((lst_yv12->y_width * lst_yv12->y_height) > (320 * 240))
-             kf_boost += 2 * (lst_yv12->y_width * lst_yv12->y_height) / (320 * 240);
-         else if ((lst_yv12->y_width * lst_yv12->y_height) < (320 * 240))
-             kf_boost -= 4 * (320 * 240) / (lst_yv12->y_width * lst_yv12->y_height);
- 
--        kf_boost = (int)((double)kf_boost * 100.0) >> 4;                          // Scale 16 to 100
--
--        // Adjustment to boost based on recent average q
--        //kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100;
--
--        if (kf_boost < 250)                                                      // Min KF boost
-+        /* Min KF boost */
-+        kf_boost = (int)((double)kf_boost * 100.0) >> 4; /* Scale 16 to 100 */
-+        if (kf_boost < 250)
-             kf_boost = 250;
- 
--        // We do three calculations for kf size.
--        // The first is based on the error score for the whole kf group.
--        // The second (optionaly) on the key frames own error if this is
--        // smaller than the average for the group.
--        // The final one insures that the frame receives at least the
--        // allocation it would have received based on its own error score vs
--        // the error score remaining
--        // Special case if the sequence appears almost totaly static
--        // as measured by the decay accumulator. In this case we want to
--        // spend almost all of the bits on the key frame.
--        // cpi->twopass.frames_to_key-1 because key frame itself is taken
--        // care of by kf_boost.
-+        /*
-+         * We do three calculations for kf size.
-+         * The first is based on the error score for the whole kf group.
-+         * The second (optionaly) on the key frames own error if this is
-+         * smaller than the average for the group.
-+         * The final one insures that the frame receives at least the
-+         * allocation it would have received based on its own error score vs
-+         * the error score remaining
-+         * Special case if the sequence appears almost totaly static
-+         * as measured by the decay accumulator. In this case we want to
-+         * spend almost all of the bits on the key frame.
-+         * cpi->twopass.frames_to_key-1 because key frame itself is taken
-+         * care of by kf_boost.
-+         */
-         if ( decay_accumulator >= 0.99 )
-         {
-             allocation_chunks =
-@@ -2985,7 +3115,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-                 ((cpi->twopass.frames_to_key - 1) * 100) + kf_boost;
-         }
- 
--        // Normalize Altboost and allocations chunck down to prevent overflow
-+        /* Normalize Altboost and allocations chunck down to prevent overflow */
-         while (kf_boost > 1000)
-         {
-             kf_boost /= 2;
-@@ -2994,20 +3124,21 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
- 
-         cpi->twopass.kf_group_bits = (cpi->twopass.kf_group_bits < 0) ? 0 : cpi->twopass.kf_group_bits;
- 
--        // Calculate the number of bits to be spent on the key frame
-+        /* Calculate the number of bits to be spent on the key frame */
-         cpi->twopass.kf_bits  = (int)((double)kf_boost * ((double)cpi->twopass.kf_group_bits / (double)allocation_chunks));
- 
--        // Apply an additional limit for CBR
-+        /* Apply an additional limit for CBR */
-         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-         {
--            if (cpi->twopass.kf_bits > ((3 * cpi->buffer_level) >> 2))
--                cpi->twopass.kf_bits = (3 * cpi->buffer_level) >> 2;
-+            if (cpi->twopass.kf_bits > (int)((3 * cpi->buffer_level) >> 2))
-+                cpi->twopass.kf_bits = (int)((3 * cpi->buffer_level) >> 2);
-         }
- 
--        // If the key frame is actually easier than the average for the
--        // kf group (which does sometimes happen... eg a blank intro frame)
--        // Then use an alternate calculation based on the kf error score
--        // which should give a smaller key frame.
-+        /* If the key frame is actually easier than the average for the
-+         * kf group (which does sometimes happen... eg a blank intro frame)
-+         * Then use an alternate calculation based on the kf error score
-+         * which should give a smaller key frame.
-+         */
-         if (kf_mod_err < kf_group_err / cpi->twopass.frames_to_key)
-         {
-             double  alt_kf_grp_bits =
-@@ -3023,9 +3154,10 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-                 cpi->twopass.kf_bits = alt_kf_bits;
-             }
-         }
--        // Else if it is much harder than other frames in the group make sure
--        // it at least receives an allocation in keeping with its relative
--        // error score
-+        /* Else if it is much harder than other frames in the group make sure
-+         * it at least receives an allocation in keeping with its relative
-+         * error score
-+         */
-         else
-         {
-             alt_kf_bits =
-@@ -3040,17 +3172,23 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         }
- 
-         cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits;
--        cpi->twopass.kf_bits += cpi->min_frame_bandwidth;                                          // Add in the minimum frame allowance
-+        /* Add in the minimum frame allowance */
-+        cpi->twopass.kf_bits += cpi->min_frame_bandwidth;
-+
-+        /* Peer frame bit target for this frame */
-+        cpi->per_frame_bandwidth = cpi->twopass.kf_bits;
- 
--        cpi->per_frame_bandwidth = cpi->twopass.kf_bits;                                           // Peer frame bit target for this frame
--        cpi->target_bandwidth = cpi->twopass.kf_bits * cpi->output_frame_rate;                      // Convert to a per second bitrate
-+        /* Convert to a per second bitrate */
-+        cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *
-+                                      cpi->output_frame_rate);
-     }
- 
--    // Note the total error score of the kf group minus the key frame itself
-+    /* Note the total error score of the kf group minus the key frame itself */
-     cpi->twopass.kf_group_error_left = (int)(kf_group_err - kf_mod_err);
- 
--    // Adjust the count of total modified error left.
--    // The count of bits left is adjusted elsewhere based on real coded frame sizes
-+    /* Adjust the count of total modified error left. The count of bits left
-+     * is adjusted elsewhere based on real coded frame sizes
-+     */
-     cpi->twopass.modified_error_left -= kf_group_err;
- 
-     if (cpi->oxcf.allow_spatial_resampling)
-@@ -3063,7 +3201,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         int new_width = cpi->oxcf.Width;
-         int new_height = cpi->oxcf.Height;
- 
--        int projected_buffer_level = cpi->buffer_level;
-+        int projected_buffer_level = (int)cpi->buffer_level;
-         int tmp_q;
- 
-         double projected_bits_perframe;
-@@ -3076,40 +3214,47 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-         if ((cpi->common.Width != cpi->oxcf.Width) || (cpi->common.Height != cpi->oxcf.Height))
-             last_kf_resampled = 1;
- 
--        // Set back to unscaled by defaults
-+        /* Set back to unscaled by defaults */
-         cpi->common.horiz_scale = NORMAL;
-         cpi->common.vert_scale = NORMAL;
- 
--        // Calculate Average bits per frame.
--        //av_bits_per_frame = cpi->twopass.bits_left/(double)(cpi->twopass.total_stats.count - cpi->common.current_video_frame);
-+        /* Calculate Average bits per frame. */
-         av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate);
--        //if ( av_bits_per_frame < 0.0 )
--        //  av_bits_per_frame = 0.0
- 
--        // CBR... Use the clip average as the target for deciding resample
-+        /* CBR... Use the clip average as the target for deciding resample */
-         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-         {
-             bits_per_frame = av_bits_per_frame;
-         }
- 
--        // In VBR we want to avoid downsampling in easy section unless we are under extreme pressure
--        // So use the larger of target bitrate for this sectoion or average bitrate for sequence
-+        /* In VBR we want to avoid downsampling in easy section unless we
-+         * are under extreme pressure So use the larger of target bitrate
-+         * for this section or average bitrate for sequence
-+         */
-         else
-         {
--            bits_per_frame = cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key;     // This accounts for how hard the section is...
-+            /* This accounts for how hard the section is... */
-+            bits_per_frame = (double)
-+                (cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key);
- 
--            if (bits_per_frame < av_bits_per_frame)                      // Dont turn to resampling in easy sections just because they have been assigned a small number of bits
-+            /* Dont turn to resampling in easy sections just because they
-+             * have been assigned a small number of bits
-+             */
-+            if (bits_per_frame < av_bits_per_frame)
-                 bits_per_frame = av_bits_per_frame;
-         }
- 
--        // bits_per_frame should comply with our minimum
-+        /* bits_per_frame should comply with our minimum */
-         if (bits_per_frame < (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100))
-             bits_per_frame = (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
- 
--        // Work out if spatial resampling is necessary
--        kf_q = estimate_kf_group_q(cpi, err_per_frame, bits_per_frame, group_iiratio);
-+        /* Work out if spatial resampling is necessary */
-+        kf_q = estimate_kf_group_q(cpi, err_per_frame,
-+                                  (int)bits_per_frame, group_iiratio);
- 
--        // If we project a required Q higher than the maximum allowed Q then make a guess at the actual size of frames in this section
-+        /* If we project a required Q higher than the maximum allowed Q then
-+         * make a guess at the actual size of frames in this section
-+         */
-         projected_bits_perframe = bits_per_frame;
-         tmp_q = kf_q;
- 
-@@ -3119,8 +3264,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             tmp_q--;
-         }
- 
--        // Guess at buffer level at the end of the section
--        projected_buffer_level = cpi->buffer_level - (int)((projected_bits_perframe - av_bits_per_frame) * cpi->twopass.frames_to_key);
-+        /* Guess at buffer level at the end of the section */
-+        projected_buffer_level = (int)
-+                    (cpi->buffer_level - (int)
-+                    ((projected_bits_perframe - av_bits_per_frame) *
-+                    cpi->twopass.frames_to_key));
- 
-         if (0)
-         {
-@@ -3129,15 +3277,17 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             fclose(f);
-         }
- 
--        // The trigger for spatial resampling depends on the various parameters such as whether we are streaming (CBR) or VBR.
-+        /* The trigger for spatial resampling depends on the various
-+         * parameters such as whether we are streaming (CBR) or VBR.
-+         */
-         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-         {
--            // Trigger resample if we are projected to fall below down sample level or
--            // resampled last time and are projected to remain below the up sample level
-+            /* Trigger resample if we are projected to fall below down
-+             * sample level or resampled last time and are projected to
-+             * remain below the up sample level
-+             */
-             if ((projected_buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100)) ||
-                 (last_kf_resampled && (projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))))
--                //( ((cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100))) &&
--                //  ((projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))) ))
-                 resample_trigger = 1;
-             else
-                 resample_trigger = 0;
-@@ -3147,9 +3297,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-             int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate));
-             int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
- 
--            if ((last_kf_resampled && (kf_q > cpi->worst_quality)) ||                                               // If triggered last time the threshold for triggering again is reduced
--                ((kf_q > cpi->worst_quality) &&                                                                  // Projected Q higher than allowed and ...
--                 (over_spend > clip_bits / 20)))                                                               // ... Overspend > 5% of total bits
-+            /* If triggered last time the threshold for triggering again is
-+             * reduced:
-+             *
-+             * Projected Q higher than allowed and Overspend > 5% of total
-+             * bits
-+             */
-+            if ((last_kf_resampled && (kf_q > cpi->worst_quality)) ||
-+                ((kf_q > cpi->worst_quality) &&
-+                 (over_spend > clip_bits / 20)))
-                 resample_trigger = 1;
-             else
-                 resample_trigger = 0;
-@@ -3171,13 +3327,19 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
-                 new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs;
-                 new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs;
- 
--                // Reducing the area to 1/4 does not reduce the complexity (err_per_frame) to 1/4...
--                // effective_sizeratio attempts to provide a crude correction for this
-+                /* Reducing the area to 1/4 does not reduce the complexity
-+                 * (err_per_frame) to 1/4... effective_sizeratio attempts
-+                 * to provide a crude correction for this
-+                 */
-                 effective_size_ratio = (double)(new_width * new_height) / (double)(cpi->oxcf.Width * cpi->oxcf.Height);
-                 effective_size_ratio = (1.0 + (3.0 * effective_size_ratio)) / 4.0;
- 
--                // Now try again and see what Q we get with the smaller image size
--                kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, bits_per_frame, group_iiratio);
-+                /* Now try again and see what Q we get with the smaller
-+                 * image size
-+                 */
-+                kf_q = estimate_kf_group_q(cpi,
-+                                          err_per_frame * effective_size_ratio,
-+                                          (int)bits_per_frame, group_iiratio);
- 
-                 if (0)
-                 {
-diff --git a/vp8/encoder/lookahead.c b/vp8/encoder/lookahead.c
-index 4c92281..ce2ce08 100644
---- a/vp8/encoder/lookahead.c
-+++ b/vp8/encoder/lookahead.c
-@@ -118,10 +118,11 @@ vp8_lookahead_push(struct lookahead_ctx *ctx,
-     ctx->sz++;
-     buf = pop(ctx, &ctx->write_idx);
- 
--    // Only do this partial copy if the following conditions are all met:
--    // 1. Lookahead queue has has size of 1.
--    // 2. Active map is provided.
--    // 3. This is not a key frame, golden nor altref frame.
-+    /* Only do this partial copy if the following conditions are all met:
-+     * 1. Lookahead queue has has size of 1.
-+     * 2. Active map is provided.
-+     * 3. This is not a key frame, golden nor altref frame.
-+     */
-     if (ctx->max_sz == 1 && active_map && !flags)
-     {
-         for (row = 0; row < mb_rows; ++row)
-@@ -130,18 +131,18 @@ vp8_lookahead_push(struct lookahead_ctx *ctx,
- 
-             while (1)
-             {
--                // Find the first active macroblock in this row.
-+                /* Find the first active macroblock in this row. */
-                 for (; col < mb_cols; ++col)
-                 {
-                     if (active_map[col])
-                         break;
-                 }
- 
--                // No more active macroblock in this row.
-+                /* No more active macroblock in this row. */
-                 if (col == mb_cols)
-                     break;
- 
--                // Find the end of active region in this row.
-+                /* Find the end of active region in this row. */
-                 active_end = col;
- 
-                 for (; active_end < mb_cols; ++active_end)
-@@ -150,13 +151,13 @@ vp8_lookahead_push(struct lookahead_ctx *ctx,
-                         break;
-                 }
- 
--                // Only copy this active region.
-+                /* Only copy this active region. */
-                 vp8_copy_and_extend_frame_with_rect(src, &buf->img,
-                                                     row << 4,
-                                                     col << 4, 16,
-                                                     (active_end - col) << 4);
- 
--                // Start again from the end of this active region.
-+                /* Start again from the end of this active region. */
-                 col = active_end;
-             }
- 
-diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
-index 67e4f7e..b08c7a5 100644
---- a/vp8/encoder/mcomp.c
-+++ b/vp8/encoder/mcomp.c
-@@ -25,26 +25,35 @@ static int mv_mode_cts [4] [2];
- 
- int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
- {
--    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
--    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
--    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
--    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
-+    /* MV costing is based on the distribution of vectors in the previous
-+     * frame and as such will tend to over state the cost of vectors. In
-+     * addition coding a new vector can have a knock on effect on the cost
-+     * of subsequent vectors and the quality of prediction from NEAR and
-+     * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
-+     * limited extent, for some account to be taken of these factors.
-+     */
-     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
- }
- 
- static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
- {
--    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
--        mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
--        * error_per_bit + 128) >> 8;
-+    /* Ignore mv costing if mvcost is NULL */
-+    if (mvcost)
-+        return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
-+                 mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
-+                 * error_per_bit + 128) >> 8;
-+    return 0;
- }
- 
- static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
- {
-     /* Calculate sad error cost on full pixel basis. */
--    return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
--        mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
--        * error_per_bit + 128) >> 8;
-+    /* Ignore mv costing if mvsadcost is NULL */
-+    if (mvsadcost)
-+        return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
-+                 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
-+                * error_per_bit + 128) >> 8;
-+    return 0;
- }
- 
- void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
-@@ -53,7 +62,7 @@ void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
-     int search_site_count = 0;
- 
- 
--    // Generate offsets for 4 search sites per step.
-+    /* Generate offsets for 4 search sites per step. */
-     Len = MAX_FIRST_STEP;
-     x->ss[search_site_count].mv.col = 0;
-     x->ss[search_site_count].mv.row = 0;
-@@ -63,31 +72,31 @@ void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
-     while (Len > 0)
-     {
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = 0;
-         x->ss[search_site_count].mv.row = -Len;
-         x->ss[search_site_count].offset = -Len * stride;
-         search_site_count++;
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = 0;
-         x->ss[search_site_count].mv.row = Len;
-         x->ss[search_site_count].offset = Len * stride;
-         search_site_count++;
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = -Len;
-         x->ss[search_site_count].mv.row = 0;
-         x->ss[search_site_count].offset = -Len;
-         search_site_count++;
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = Len;
-         x->ss[search_site_count].mv.row = 0;
-         x->ss[search_site_count].offset = Len;
-         search_site_count++;
- 
--        // Contract.
-+        /* Contract. */
-         Len /= 2;
-     }
- 
-@@ -100,7 +109,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
-     int Len;
-     int search_site_count = 0;
- 
--    // Generate offsets for 8 search sites per step.
-+    /* Generate offsets for 8 search sites per step. */
-     Len = MAX_FIRST_STEP;
-     x->ss[search_site_count].mv.col = 0;
-     x->ss[search_site_count].mv.row = 0;
-@@ -110,56 +119,56 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
-     while (Len > 0)
-     {
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = 0;
-         x->ss[search_site_count].mv.row = -Len;
-         x->ss[search_site_count].offset = -Len * stride;
-         search_site_count++;
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = 0;
-         x->ss[search_site_count].mv.row = Len;
-         x->ss[search_site_count].offset = Len * stride;
-         search_site_count++;
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = -Len;
-         x->ss[search_site_count].mv.row = 0;
-         x->ss[search_site_count].offset = -Len;
-         search_site_count++;
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = Len;
-         x->ss[search_site_count].mv.row = 0;
-         x->ss[search_site_count].offset = Len;
-         search_site_count++;
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = -Len;
-         x->ss[search_site_count].mv.row = -Len;
-         x->ss[search_site_count].offset = -Len * stride - Len;
-         search_site_count++;
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = Len;
-         x->ss[search_site_count].mv.row = -Len;
-         x->ss[search_site_count].offset = -Len * stride + Len;
-         search_site_count++;
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = -Len;
-         x->ss[search_site_count].mv.row = Len;
-         x->ss[search_site_count].offset = Len * stride - Len;
-         search_site_count++;
- 
--        // Compute offsets for search sites.
-+        /* Compute offsets for search sites. */
-         x->ss[search_site_count].mv.col = Len;
-         x->ss[search_site_count].mv.row = Len;
-         x->ss[search_site_count].offset = Len * stride + Len;
-         search_site_count++;
- 
- 
--        // Contract.
-+        /* Contract. */
-         Len /= 2;
-     }
- 
-@@ -176,13 +185,20 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
-  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
-  * could reduce the area.
-  */
--#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
--#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
--#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
--#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
-+
-+/* estimated cost of a motion vector (r,c) */
-+#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
-+/* pointer to predictor base of a motionvector */
-+#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
-+/* convert motion vector component to offset for svf calc */
-+#define SP(x) (((x)&3)<<1)
-+/* returns subpixel variance error function. */
-+#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
- #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
--#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
--#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
-+/* returns distortion + motion vector cost */
-+#define ERR(r,c) (MVC(r,c)+DIST(r,c))
-+/* checks if (r,c) has better score than previous best */
-+#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
- 
- int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-                                              int_mv *bestmv, int_mv *ref_mv,
-@@ -196,7 +212,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-     int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
-     int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
-     int tr = br, tc = bc;
--    unsigned int besterr = INT_MAX;
-+    unsigned int besterr;
-     unsigned int left, right, up, down, diag;
-     unsigned int sse;
-     unsigned int whichdir;
-@@ -221,7 +237,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-     unsigned char *y;
-     int buf_r1, buf_r2, buf_c1, buf_c2;
- 
--    // Clamping to avoid out-of-range data access
-+    /* Clamping to avoid out-of-range data access */
-     buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
-     buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
-     buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
-@@ -238,19 +254,21 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
- 
-     offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
- 
--    // central mv
-+    /* central mv */
-     bestmv->as_mv.row <<= 3;
-     bestmv->as_mv.col <<= 3;
- 
--    // calculate central point error
-+    /* calculate central point error */
-     besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
-     *distortion = besterr;
-     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
- 
--    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
-+    /* TODO: Each subsequent iteration checks at least one point in common
-+     * with the last iteration could be 2 ( if diag selected)
-+     */
-     while (--halfiters)
-     {
--        // 1/2 pel
-+        /* 1/2 pel */
-         CHECK_BETTER(left, tr, tc - 2);
-         CHECK_BETTER(right, tr, tc + 2);
-         CHECK_BETTER(up, tr - 2, tc);
-@@ -274,7 +292,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-             break;
-         }
- 
--        // no reason to check the same one again.
-+        /* no reason to check the same one again. */
-         if (tr == br && tc == bc)
-             break;
- 
-@@ -282,8 +300,11 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-         tc = bc;
-     }
- 
--    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
--    // 1/4 pel
-+    /* TODO: Each subsequent iteration checks at least one point in common
-+     * with the last iteration could be 2 ( if diag selected)
-+     */
-+
-+    /* 1/4 pel */
-     while (--quarteriters)
-     {
-         CHECK_BETTER(left, tr, tc - 1);
-@@ -309,7 +330,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-             break;
-         }
- 
--        // no reason to check the same one again.
-+        /* no reason to check the same one again. */
-         if (tr == br && tc == bc)
-             break;
- 
-@@ -367,17 +388,17 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-      y_stride = pre_stride;
- #endif
- 
--    // central mv
-+    /* central mv */
-     bestmv->as_mv.row <<= 3;
-     bestmv->as_mv.col <<= 3;
-     startmv = *bestmv;
- 
--    // calculate central point error
-+    /* calculate central point error */
-     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
-     *distortion = bestmse;
-     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
- 
--    // go left then right and check error
-+    /* go left then right and check error */
-     this_mv.as_mv.row = startmv.as_mv.row;
-     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
-     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
-@@ -403,7 +424,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-         *sse1 = sse;
-     }
- 
--    // go up then down and check error
-+    /* go up then down and check error */
-     this_mv.as_mv.col = startmv.as_mv.col;
-     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
-     thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
-@@ -430,10 +451,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-     }
- 
- 
--    // now check 1 more diagonal
-+    /* now check 1 more diagonal */
-     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
--    //for(whichdir =0;whichdir<4;whichdir++)
--    //{
-     this_mv = startmv;
- 
-     switch (whichdir)
-@@ -471,10 +490,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-         *sse1 = sse;
-     }
- 
--//  }
--
- 
--    // time to check quarter pels.
-+    /* time to check quarter pels. */
-     if (bestmv->as_mv.row < startmv.as_mv.row)
-         y -= y_stride;
- 
-@@ -485,7 +502,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
- 
- 
- 
--    // go left then right and check error
-+    /* go left then right and check error */
-     this_mv.as_mv.row = startmv.as_mv.row;
- 
-     if (startmv.as_mv.col & 7)
-@@ -521,7 +538,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-         *sse1 = sse;
-     }
- 
--    // go up then down and check error
-+    /* go up then down and check error */
-     this_mv.as_mv.col = startmv.as_mv.col;
- 
-     if (startmv.as_mv.row & 7)
-@@ -558,11 +575,9 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-     }
- 
- 
--    // now check 1 more diagonal
-+    /* now check 1 more diagonal */
-     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
- 
--//  for(whichdir=0;whichdir<4;whichdir++)
--//  {
-     this_mv = startmv;
- 
-     switch (whichdir)
-@@ -684,17 +699,17 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-     y_stride = pre_stride;
- #endif
- 
--    // central mv
-+    /* central mv */
-     bestmv->as_mv.row <<= 3;
-     bestmv->as_mv.col <<= 3;
-     startmv = *bestmv;
- 
--    // calculate central point error
-+    /* calculate central point error */
-     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
-     *distortion = bestmse;
-     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
- 
--    // go left then right and check error
-+    /* go left then right and check error */
-     this_mv.as_mv.row = startmv.as_mv.row;
-     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
-     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
-@@ -720,7 +735,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-         *sse1 = sse;
-     }
- 
--    // go up then down and check error
-+    /* go up then down and check error */
-     this_mv.as_mv.col = startmv.as_mv.col;
-     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
-     thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
-@@ -746,7 +761,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-         *sse1 = sse;
-     }
- 
--    // now check 1 more diagonal -
-+    /* now check 1 more diagonal - */
-     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-     this_mv = startmv;
- 
-@@ -855,7 +870,7 @@ int vp8_hex_search
-     int in_what_stride = pre_stride;
-     int br, bc;
-     int_mv this_mv;
--    unsigned int bestsad = 0x7fffffff;
-+    unsigned int bestsad;
-     unsigned int thissad;
-     unsigned char *base_offset;
-     unsigned char *this_offset;
-@@ -869,18 +884,17 @@ int vp8_hex_search
-     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
-     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- 
--    // adjust ref_mv to make sure it is within MV range
-+    /* adjust ref_mv to make sure it is within MV range */
-     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
-     br = ref_mv->as_mv.row;
-     bc = ref_mv->as_mv.col;
- 
--    // Work out the start point for the search
-+    /* Work out the start point for the search */
-     base_offset = (unsigned char *)(base_pre + d->offset);
-     this_offset = base_offset + (br * (pre_stride)) + bc;
-     this_mv.as_mv.row = br;
-     this_mv.as_mv.col = bc;
--    bestsad = vfp->sdf( what, what_stride, this_offset,
--                        in_what_stride, 0x7fffffff)
-+    bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
-             + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- 
- #if CONFIG_MULTI_RES_ENCODING
-@@ -895,8 +909,7 @@ int vp8_hex_search
-     dia_range = 8;
- #endif
- 
--    // hex search
--    //j=0
-+    /* hex search */
-     CHECK_BOUNDS(2)
- 
-     if(all_in)
-@@ -906,7 +919,7 @@ int vp8_hex_search
-             this_mv.as_mv.row = br + hex[i].row;
-             this_mv.as_mv.col = bc + hex[i].col;
-             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
--            thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
-+            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
-             CHECK_BETTER
-         }
-     }else
-@@ -917,7 +930,7 @@ int vp8_hex_search
-             this_mv.as_mv.col = bc + hex[i].col;
-             CHECK_POINT
-             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
--            thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
-+            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
-             CHECK_BETTER
-         }
-     }
-@@ -943,7 +956,7 @@ int vp8_hex_search
-                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
-                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
-                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
--                thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
-+                thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
-                 CHECK_BETTER
-             }
-         }else
-@@ -954,7 +967,7 @@ int vp8_hex_search
-                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
-                 CHECK_POINT
-                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
--                thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
-+                thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
-                 CHECK_BETTER
-             }
-         }
-@@ -971,7 +984,7 @@ int vp8_hex_search
-         }
-     }
- 
--    // check 4 1-away neighbors
-+    /* check 4 1-away neighbors */
- cal_neighbors:
-     for (j = 0; j < dia_range; j++)
-     {
-@@ -985,7 +998,7 @@ cal_neighbors:
-                 this_mv.as_mv.row = br + neighbors[i].row;
-                 this_mv.as_mv.col = bc + neighbors[i].col;
-                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
--                thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
-+                thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
-                 CHECK_BETTER
-             }
-         }else
-@@ -996,7 +1009,7 @@ cal_neighbors:
-                 this_mv.as_mv.col = bc + neighbors[i].col;
-                 CHECK_POINT
-                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
--                thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
-+                thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
-                 CHECK_BETTER
-             }
-         }
-@@ -1047,7 +1060,8 @@ int vp8_diamond_search_sad_c
-     int tot_steps;
-     int_mv this_mv;
- 
--    int bestsad = INT_MAX;
-+    unsigned int bestsad;
-+    unsigned int thissad;
-     int best_site = 0;
-     int last_site = 0;
- 
-@@ -1058,10 +1072,12 @@ int vp8_diamond_search_sad_c
-     search_site *ss;
- 
-     unsigned char *check_here;
--    int thissad;
- 
--    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-+    int *mvsadcost[2];
-     int_mv fcenter_mv;
-+
-+    mvsadcost[0] = x->mvsadcost[0];
-+    mvsadcost[1] = x->mvsadcost[1];
-     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
-     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- 
-@@ -1072,17 +1088,18 @@ int vp8_diamond_search_sad_c
-     best_mv->as_mv.row = ref_row;
-     best_mv->as_mv.col = ref_col;
- 
--    // Work out the start point for the search
-+    /* Work out the start point for the search */
-     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
-     best_address = in_what;
- 
--    // Check the starting position
--    bestsad = fn_ptr->sdf(what, what_stride, in_what,
--                          in_what_stride, 0x7fffffff)
--              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
-+    /* Check the starting position */
-+    bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
-+            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- 
--    // search_param determines the length of the initial step and hence the number of iterations
--    // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
-+    /* search_param determines the length of the initial step and hence
-+     * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
-+     * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
-+     */
-     ss = &x->ss[search_param * x->searches_per_step];
-     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
- 
-@@ -1092,7 +1109,7 @@ int vp8_diamond_search_sad_c
-     {
-         for (j = 0 ; j < x->searches_per_step ; j++)
-         {
--            // Trap illegal vectors
-+            /* Trap illegal vectors */
-             this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
-             this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
- 
-@@ -1101,14 +1118,14 @@ int vp8_diamond_search_sad_c
- 
-             {
-                 check_here = ss[i].offset + best_address;
--                thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-+                thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
- 
-                 if (thissad < bestsad)
-                 {
-                     this_mv.as_mv.row = this_row_offset;
-                     this_mv.as_mv.col = this_col_offset;
-                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
--                                                mvsadcost, sad_per_bit);
-+                                              mvsadcost, sad_per_bit);
- 
-                     if (thissad < bestsad)
-                     {
-@@ -1135,11 +1152,8 @@ int vp8_diamond_search_sad_c
-     this_mv.as_mv.row = best_mv->as_mv.row << 3;
-     this_mv.as_mv.col = best_mv->as_mv.col << 3;
- 
--    if (bestsad == INT_MAX)
--        return INT_MAX;
--
--    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
--        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
-+    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
-+           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- }
- 
- int vp8_diamond_search_sadx4
-@@ -1170,7 +1184,8 @@ int vp8_diamond_search_sadx4
-     int tot_steps;
-     int_mv this_mv;
- 
--    unsigned int bestsad = UINT_MAX;
-+    unsigned int bestsad;
-+    unsigned int thissad;
-     int best_site = 0;
-     int last_site = 0;
- 
-@@ -1181,10 +1196,12 @@ int vp8_diamond_search_sadx4
-     search_site *ss;
- 
-     unsigned char *check_here;
--    unsigned int thissad;
- 
--    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-+    int *mvsadcost[2];
-     int_mv fcenter_mv;
-+
-+    mvsadcost[0] = x->mvsadcost[0];
-+    mvsadcost[1] = x->mvsadcost[1];
-     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
-     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- 
-@@ -1195,17 +1212,18 @@ int vp8_diamond_search_sadx4
-     best_mv->as_mv.row = ref_row;
-     best_mv->as_mv.col = ref_col;
- 
--    // Work out the start point for the search
-+    /* Work out the start point for the search */
-     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
-     best_address = in_what;
- 
--    // Check the starting position
--    bestsad = fn_ptr->sdf(what, what_stride,
--                          in_what, in_what_stride, 0x7fffffff)
--              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
-+    /* Check the starting position */
-+    bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
-+            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- 
--    // search_param determines the length of the initial step and hence the number of iterations
--    // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
-+    /* search_param determines the length of the initial step and hence the
-+     * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
-+     * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
-+     */
-     ss = &x->ss[search_param * x->searches_per_step];
-     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
- 
-@@ -1215,8 +1233,10 @@ int vp8_diamond_search_sadx4
-     {
-         int all_in = 1, t;
- 
--        // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
--        // checking 4 bounds for each points.
-+        /* To know if all neighbor points are within the bounds, 4 bounds
-+         * checking are enough instead of checking 4 bounds for each
-+         * points.
-+         */
-         all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
-         all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
-         all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
-@@ -1228,7 +1248,7 @@ int vp8_diamond_search_sadx4
- 
-             for (j = 0 ; j < x->searches_per_step ; j += 4)
-             {
--                unsigned char *block_offset[4];
-+                const unsigned char *block_offset[4];
- 
-                 for (t = 0; t < 4; t++)
-                     block_offset[t] = ss[i+t].offset + best_address;
-@@ -1257,7 +1277,7 @@ int vp8_diamond_search_sadx4
-         {
-             for (j = 0 ; j < x->searches_per_step ; j++)
-             {
--                // Trap illegal vectors
-+                /* Trap illegal vectors */
-                 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
-                 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
- 
-@@ -1265,14 +1285,14 @@ int vp8_diamond_search_sadx4
-                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
-                 {
-                     check_here = ss[i].offset + best_address;
--                    thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-+                    thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
- 
-                     if (thissad < bestsad)
-                     {
-                         this_mv.as_mv.row = this_row_offset;
-                         this_mv.as_mv.col = this_col_offset;
-                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
--                                                   mvsadcost, sad_per_bit);
-+                                                  mvsadcost, sad_per_bit);
- 
-                         if (thissad < bestsad)
-                         {
-@@ -1299,11 +1319,8 @@ int vp8_diamond_search_sadx4
-     this_mv.as_mv.row = best_mv->as_mv.row << 3;
-     this_mv.as_mv.col = best_mv->as_mv.col << 3;
- 
--    if (bestsad == INT_MAX)
--        return INT_MAX;
--
--    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
--        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
-+    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
-+           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- }
- 
- int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-@@ -1321,11 +1338,11 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-     unsigned char *bestaddress;
-     int_mv *best_mv = &d->bmi.mv;
-     int_mv this_mv;
--    int bestsad = INT_MAX;
-+    unsigned int bestsad;
-+    unsigned int thissad;
-     int r, c;
- 
-     unsigned char *check_here;
--    int thissad;
- 
-     int ref_row = ref_mv->as_mv.row;
-     int ref_col = ref_mv->as_mv.col;
-@@ -1335,24 +1352,29 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-     int col_min = ref_col - distance;
-     int col_max = ref_col + distance;
- 
--    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-+    int *mvsadcost[2];
-     int_mv fcenter_mv;
-+
-+    mvsadcost[0] = x->mvsadcost[0];
-+    mvsadcost[1] = x->mvsadcost[1];
-     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
-     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- 
--    // Work out the mid point for the search
-+    /* Work out the mid point for the search */
-     in_what = base_pre + d->offset;
-     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
- 
-     best_mv->as_mv.row = ref_row;
-     best_mv->as_mv.col = ref_col;
- 
--    // Baseline value at the centre
-+    /* Baseline value at the centre */
-     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
--                          in_what_stride, 0x7fffffff)
--              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
-+                          in_what_stride, UINT_MAX)
-+            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- 
--    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
-+    /* Apply further limits to prevent us looking using vectors that
-+     * stretch beyiond the UMV border
-+     */
-     if (col_min < x->mv_col_min)
-         col_min = x->mv_col_min;
- 
-@@ -1372,11 +1394,11 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
- 
-         for (c = col_min; c < col_max; c++)
-         {
--            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-+            thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
- 
-             this_mv.as_mv.col = c;
--            thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
--                        mvsadcost, sad_per_bit);
-+            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-+                                      mvsadcost, sad_per_bit);
- 
-             if (thissad < bestsad)
-             {
-@@ -1393,11 +1415,8 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-     this_mv.as_mv.row = best_mv->as_mv.row << 3;
-     this_mv.as_mv.col = best_mv->as_mv.col << 3;
- 
--    if (bestsad < INT_MAX)
--        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
--        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
--    else
--        return INT_MAX;
-+    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
-+           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- }
- 
- int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-@@ -1415,11 +1434,11 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-     unsigned char *bestaddress;
-     int_mv *best_mv = &d->bmi.mv;
-     int_mv this_mv;
--    unsigned int bestsad = UINT_MAX;
-+    unsigned int bestsad;
-+    unsigned int thissad;
-     int r, c;
- 
-     unsigned char *check_here;
--    unsigned int thissad;
- 
-     int ref_row = ref_mv->as_mv.row;
-     int ref_col = ref_mv->as_mv.col;
-@@ -1431,24 +1450,29 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
- 
-     unsigned int sad_array[3];
- 
--    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-+    int *mvsadcost[2];
-     int_mv fcenter_mv;
-+
-+    mvsadcost[0] = x->mvsadcost[0];
-+    mvsadcost[1] = x->mvsadcost[1];
-     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
-     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- 
--    // Work out the mid point for the search
-+    /* Work out the mid point for the search */
-     in_what = base_pre + d->offset;
-     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
- 
-     best_mv->as_mv.row = ref_row;
-     best_mv->as_mv.col = ref_col;
- 
--    // Baseline value at the centre
--    bestsad = fn_ptr->sdf(what, what_stride,
--                          bestaddress, in_what_stride, 0x7fffffff)
--              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
-+    /* Baseline value at the centre */
-+    bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
-+                          in_what_stride, UINT_MAX)
-+            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- 
--    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
-+    /* Apply further limits to prevent us looking using vectors that stretch
-+     * beyond the UMV border
-+     */
-     if (col_min < x->mv_col_min)
-         col_min = x->mv_col_min;
- 
-@@ -1471,7 +1495,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-         {
-             int i;
- 
--            fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
-+            fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
- 
-             for (i = 0; i < 3; i++)
-             {
-@@ -1480,8 +1504,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-                 if (thissad < bestsad)
-                 {
-                     this_mv.as_mv.col = c;
--                    thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
--                                                mvsadcost, sad_per_bit);
-+                    thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-+                                              mvsadcost, sad_per_bit);
- 
-                     if (thissad < bestsad)
-                     {
-@@ -1499,13 +1523,13 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
- 
-         while (c < col_max)
-         {
--            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-+            thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
- 
-             if (thissad < bestsad)
-             {
-                 this_mv.as_mv.col = c;
--                thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
--                                            mvsadcost, sad_per_bit);
-+                thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-+                                          mvsadcost, sad_per_bit);
- 
-                 if (thissad < bestsad)
-                 {
-@@ -1525,11 +1549,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-     this_mv.as_mv.row = best_mv->as_mv.row << 3;
-     this_mv.as_mv.col = best_mv->as_mv.col << 3;
- 
--    if (bestsad < INT_MAX)
--        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
--        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
--    else
--        return INT_MAX;
-+    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
-+           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- }
- 
- int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-@@ -1547,11 +1568,11 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-     unsigned char *bestaddress;
-     int_mv *best_mv = &d->bmi.mv;
-     int_mv this_mv;
--    unsigned int bestsad = UINT_MAX;
-+    unsigned int bestsad;
-+    unsigned int thissad;
-     int r, c;
- 
-     unsigned char *check_here;
--    unsigned int thissad;
- 
-     int ref_row = ref_mv->as_mv.row;
-     int ref_col = ref_mv->as_mv.col;
-@@ -1564,24 +1585,29 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-     DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
-     unsigned int sad_array[3];
- 
--    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-+    int *mvsadcost[2];
-     int_mv fcenter_mv;
-+
-+    mvsadcost[0] = x->mvsadcost[0];
-+    mvsadcost[1] = x->mvsadcost[1];
-     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
-     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- 
--    // Work out the mid point for the search
-+    /* Work out the mid point for the search */
-     in_what = base_pre + d->offset;
-     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
- 
-     best_mv->as_mv.row = ref_row;
-     best_mv->as_mv.col = ref_col;
- 
--    // Baseline value at the centre
-+    /* Baseline value at the centre */
-     bestsad = fn_ptr->sdf(what, what_stride,
--                          bestaddress, in_what_stride, 0x7fffffff)
--              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
-+                          bestaddress, in_what_stride, UINT_MAX)
-+            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
- 
--    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
-+    /* Apply further limits to prevent us looking using vectors that stretch
-+     * beyond the UMV border
-+     */
-     if (col_min < x->mv_col_min)
-         col_min = x->mv_col_min;
- 
-@@ -1604,17 +1630,17 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-         {
-             int i;
- 
--            fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
-+            fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
- 
-             for (i = 0; i < 8; i++)
-             {
--                thissad = (unsigned int)sad_array8[i];
-+                thissad = sad_array8[i];
- 
-                 if (thissad < bestsad)
-                 {
-                     this_mv.as_mv.col = c;
--                    thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
--                                                mvsadcost, sad_per_bit);
-+                    thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-+                                              mvsadcost, sad_per_bit);
- 
-                     if (thissad < bestsad)
-                     {
-@@ -1687,11 +1713,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-     this_mv.as_mv.row = best_mv->as_mv.row << 3;
-     this_mv.as_mv.col = best_mv->as_mv.col << 3;
- 
--    if (bestsad < INT_MAX)
--        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
--        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
--    else
--        return INT_MAX;
-+    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
-+           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- }
- 
- int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
-@@ -1711,17 +1734,21 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
-     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
-         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
-     unsigned char *check_here;
--    unsigned int thissad;
-     int_mv this_mv;
--    unsigned int bestsad = INT_MAX;
-+    unsigned int bestsad;
-+    unsigned int thissad;
- 
--    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-+    int *mvsadcost[2];
-     int_mv fcenter_mv;
- 
-+    mvsadcost[0] = x->mvsadcost[0];
-+    mvsadcost[1] = x->mvsadcost[1];
-     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
-     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- 
--    bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
-+    bestsad = fn_ptr->sdf(what, what_stride, best_address,
-+                          in_what_stride, UINT_MAX)
-+            + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
- 
-     for (i=0; i<search_range; i++)
-     {
-@@ -1766,11 +1793,8 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
-     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
-     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
- 
--    if (bestsad < INT_MAX)
--        return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
--        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
--    else
--        return INT_MAX;
-+    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
-+           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- }
- 
- int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-@@ -1790,17 +1814,21 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
-         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
-     unsigned char *check_here;
--    unsigned int thissad;
-     int_mv this_mv;
--    unsigned int bestsad = INT_MAX;
-+    unsigned int bestsad;
-+    unsigned int thissad;
- 
--    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
-+    int *mvsadcost[2];
-     int_mv fcenter_mv;
- 
-+    mvsadcost[0] = x->mvsadcost[0];
-+    mvsadcost[1] = x->mvsadcost[1];
-     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
-     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- 
--    bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
-+    bestsad = fn_ptr->sdf(what, what_stride, best_address,
-+                          in_what_stride, UINT_MAX)
-+            + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
- 
-     for (i=0; i<search_range; i++)
-     {
-@@ -1815,7 +1843,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-         if(all_in)
-         {
-             unsigned int sad_array[4];
--            unsigned char *block_offset[4];
-+            const unsigned char *block_offset[4];
-             block_offset[0] = best_address - in_what_stride;
-             block_offset[1] = best_address - 1;
-             block_offset[2] = best_address + 1;
-@@ -1881,11 +1909,8 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
-     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
-     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
- 
--    if (bestsad < INT_MAX)
--        return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
--        + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
--    else
--        return INT_MAX;
-+    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
-+           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
- }
- 
- #ifdef ENTROPY_STATS
-@@ -1900,16 +1925,16 @@ void print_mode_context(void)
- 
-     for (j = 0; j < 6; j++)
-     {
--        fprintf(f, "  { // %d \n", j);
-+        fprintf(f, "  { /* %d */\n", j);
-         fprintf(f, "    ");
- 
-         for (i = 0; i < 4; i++)
-         {
-             int overal_prob;
-             int this_prob;
--            int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
-+            int count;
- 
--            // Overall probs
-+            /* Overall probs */
-             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
- 
-             if (count)
-@@ -1920,7 +1945,7 @@ void print_mode_context(void)
-             if (overal_prob == 0)
-                 overal_prob = 1;
- 
--            // context probs
-+            /* context probs */
-             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
- 
-             if (count)
-@@ -1932,8 +1957,6 @@ void print_mode_context(void)
-                 this_prob = 1;
- 
-             fprintf(f, "%5d, ", this_prob);
--            //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
--            //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
-         }
- 
-         fprintf(f, "  },\n");
-diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
-index cdb0cb6..890113f 100644
---- a/vp8/encoder/mcomp.h
-+++ b/vp8/encoder/mcomp.h
-@@ -21,9 +21,16 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
- #endif
- 
- 
--#define MAX_MVSEARCH_STEPS 8                                    // The maximum number of steps in a step search given the largest allowed initial step
--#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1)      // Max full pel mv specified in 1 pel units
--#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))            // Maximum size of the first step in full pel units
-+/* The maximum number of steps in a step search given the largest allowed
-+ * initial step
-+ */
-+#define MAX_MVSEARCH_STEPS 8
-+
-+/* Max full pel mv specified in 1 pel units */
-+#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1)
-+
-+/* Maximum size of the first step in full pel units */
-+#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))
- 
- extern void print_mode_context(void);
- extern int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight);
-diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c
-index c636c48..c61563c 100644
---- a/vp8/encoder/modecosts.c
-+++ b/vp8/encoder/modecosts.c
-@@ -18,6 +18,8 @@
- void vp8_init_mode_costs(VP8_COMP *c)
- {
-     VP8_COMMON *x = &c->common;
-+    struct rd_costs_struct *rd_costs = &c->rd_costs;
-+
-     {
-         const vp8_tree_p T = vp8_bmode_tree;
- 
-@@ -29,19 +31,24 @@ void vp8_init_mode_costs(VP8_COMP *c)
- 
-             do
-             {
--                vp8_cost_tokens((int *)c->mb.bmode_costs[i][j], x->kf_bmode_prob[i][j], T);
-+                vp8_cost_tokens(rd_costs->bmode_costs[i][j],
-+                                vp8_kf_bmode_prob[i][j], T);
-             }
-             while (++j < VP8_BINTRAMODES);
-         }
-         while (++i < VP8_BINTRAMODES);
- 
--        vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T);
-+        vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.bmode_prob, T);
-     }
--    vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_tree);
-+    vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.sub_mv_ref_prob,
-+                    vp8_sub_mv_ref_tree);
- 
--    vp8_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree);
--    vp8_cost_tokens(c->mb.mbmode_cost[0], x->kf_ymode_prob, vp8_kf_ymode_tree);
-+    vp8_cost_tokens(rd_costs->mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree);
-+    vp8_cost_tokens(rd_costs->mbmode_cost[0], vp8_kf_ymode_prob,
-+                    vp8_kf_ymode_tree);
- 
--    vp8_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree);
--    vp8_cost_tokens(c->mb.intra_uv_mode_cost[0], x->kf_uv_mode_prob, vp8_uv_mode_tree);
-+    vp8_cost_tokens(rd_costs->intra_uv_mode_cost[1], x->fc.uv_mode_prob,
-+                    vp8_uv_mode_tree);
-+    vp8_cost_tokens(rd_costs->intra_uv_mode_cost[0], vp8_kf_uv_mode_prob,
-+                    vp8_uv_mode_tree);
- }
-diff --git a/vp8/encoder/mr_dissim.c b/vp8/encoder/mr_dissim.c
-index 7a62a06..71218cc 100644
---- a/vp8/encoder/mr_dissim.c
-+++ b/vp8/encoder/mr_dissim.c
-@@ -53,6 +53,7 @@ if(x->mbmi.ref_frame !=INTRA_FRAME)   \
- void vp8_cal_dissimilarity(VP8_COMP *cpi)
- {
-     VP8_COMMON *cm = &cpi->common;
-+    int i;
- 
-     /* Note: The first row & first column in mip are outside the frame, which
-      * were initialized to all 0.(ref_frame, mode, mv...)
-@@ -65,14 +66,25 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi)
-         /* Store info for show/no-show frames for supporting alt_ref.
-          * If parent frame is alt_ref, child has one too.
-          */
-+        LOWER_RES_FRAME_INFO* store_info
-+                      = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info;
-+
-+        store_info->frame_type = cm->frame_type;
-+
-+        if(cm->frame_type != KEY_FRAME)
-+        {
-+            store_info->is_frame_dropped = 0;
-+            for (i = 1; i < MAX_REF_FRAMES; i++)
-+                store_info->low_res_ref_frames[i] = cpi->current_ref_frames[i];
-+        }
-+
-         if(cm->frame_type != KEY_FRAME)
-         {
-             int mb_row;
-             int mb_col;
-             /* Point to beginning of allocated MODE_INFO arrays. */
-             MODE_INFO *tmp = cm->mip + cm->mode_info_stride;
--            LOWER_RES_INFO* store_mode_info
--                            = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info;
-+            LOWER_RES_MB_INFO* store_mode_info = store_info->mb_info;
- 
-             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
-             {
-@@ -199,3 +211,26 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi)
-         }
-     }
- }
-+
-+/* This function is called only when this frame is dropped at current
-+   resolution level. */
-+void vp8_store_drop_frame_info(VP8_COMP *cpi)
-+{
-+    /* If the frame is dropped in lower-resolution encoding, this information
-+       is passed to higher resolution level so that the encoder knows there
-+       is no mode & motion info available.
-+     */
-+    if (cpi->oxcf.mr_total_resolutions >1
-+        && cpi->oxcf.mr_encoder_id < (cpi->oxcf.mr_total_resolutions - 1))
-+    {
-+        /* Store info for show/no-show frames for supporting alt_ref.
-+         * If parent frame is alt_ref, child has one too.
-+         */
-+        LOWER_RES_FRAME_INFO* store_info
-+                      = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info;
-+
-+        /* Set frame_type to be INTER_FRAME since we won't drop key frame. */
-+        store_info->frame_type = INTER_FRAME;
-+        store_info->is_frame_dropped = 1;
-+    }
-+}
-diff --git a/vp8/encoder/mr_dissim.h b/vp8/encoder/mr_dissim.h
-index 3d2c203..f8cb135 100644
---- a/vp8/encoder/mr_dissim.h
-+++ b/vp8/encoder/mr_dissim.h
-@@ -15,5 +15,6 @@
- 
- extern void vp8_cal_low_res_mb_cols(VP8_COMP *cpi);
- extern void vp8_cal_dissimilarity(VP8_COMP *cpi);
-+extern void vp8_store_drop_frame_info(VP8_COMP *cpi);
- 
- #endif
-diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
-index cee62fa..4680f39 100644
---- a/vp8/encoder/onyx_if.c
-+++ b/vp8/encoder/onyx_if.c
-@@ -11,6 +11,7 @@
- 
- #include "vpx_config.h"
- #include "vp8/common/onyxc_int.h"
-+#include "vp8/common/blockd.h"
- #include "onyx_int.h"
- #include "vp8/common/systemdependent.h"
- #include "quantize.h"
-@@ -55,12 +56,8 @@ extern void vp8_deblock_frame(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *po
- extern void print_parms(VP8_CONFIG *ocf, char *filenam);
- extern unsigned int vp8_get_processor_freq();
- extern void print_tree_update_probs();
--extern void vp8cx_create_encoder_threads(VP8_COMP *cpi);
-+extern int vp8cx_create_encoder_threads(VP8_COMP *cpi);
- extern void vp8cx_remove_encoder_threads(VP8_COMP *cpi);
--#if HAVE_NEON
--extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
--extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
--#endif
- 
- int vp8_estimate_entropy_savings(VP8_COMP *cpi);
- 
-@@ -143,7 +140,7 @@ extern const int qzbin_factors[129];
- extern void vp8cx_init_quantizer(VP8_COMP *cpi);
- extern const int vp8cx_base_skip_false_prob[128];
- 
--// Tables relating active max Q to active min Q
-+/* Tables relating active max Q to active min Q */
- static const unsigned char kf_low_motion_minq[QINDEX_RANGE] =
- {
-     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-@@ -219,9 +216,8 @@ static void save_layer_context(VP8_COMP *cpi)
- {
-     LAYER_CONTEXT *lc = &cpi->layer_context[cpi->current_layer];
- 
--    // Save layer dependent coding state
-+    /* Save layer dependent coding state */
-     lc->target_bandwidth                 = cpi->target_bandwidth;
--    //lc->target_bandwidth                 = cpi->oxcf.target_bandwidth;
-     lc->starting_buffer_level            = cpi->oxcf.starting_buffer_level;
-     lc->optimal_buffer_level             = cpi->oxcf.optimal_buffer_level;
-     lc->maximum_buffer_size              = cpi->oxcf.maximum_buffer_size;
-@@ -242,7 +238,7 @@ static void save_layer_context(VP8_COMP *cpi)
-     lc->rate_correction_factor           = cpi->rate_correction_factor;
-     lc->key_frame_rate_correction_factor = cpi->key_frame_rate_correction_factor;
-     lc->gf_rate_correction_factor        = cpi->gf_rate_correction_factor;
--    lc->zbin_over_quant                  = cpi->zbin_over_quant;
-+    lc->zbin_over_quant                  = cpi->mb.zbin_over_quant;
-     lc->inter_frame_target               = cpi->inter_frame_target;
-     lc->total_byte_count                 = cpi->total_byte_count;
-     lc->filter_level                     = cpi->common.filter_level;
-@@ -250,15 +246,15 @@ static void save_layer_context(VP8_COMP *cpi)
-     lc->last_frame_percent_intra         = cpi->last_frame_percent_intra;
- 
-     memcpy (lc->count_mb_ref_frame_usage,
--            cpi->count_mb_ref_frame_usage,
--            sizeof(cpi->count_mb_ref_frame_usage));
-+            cpi->mb.count_mb_ref_frame_usage,
-+            sizeof(cpi->mb.count_mb_ref_frame_usage));
- }
- 
- static void restore_layer_context(VP8_COMP *cpi, const int layer)
- {
-     LAYER_CONTEXT *lc = &cpi->layer_context[layer];
- 
--    // Restore layer dependent coding state
-+    /* Restore layer dependent coding state */
-     cpi->current_layer                    = layer;
-     cpi->target_bandwidth                 = lc->target_bandwidth;
-     cpi->oxcf.target_bandwidth            = lc->target_bandwidth;
-@@ -271,9 +267,7 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer)
-     cpi->buffer_level                     = lc->buffer_level;
-     cpi->bits_off_target                  = lc->bits_off_target;
-     cpi->total_actual_bits                = lc->total_actual_bits;
--    //cpi->worst_quality                    = lc->worst_quality;
-     cpi->active_worst_quality             = lc->active_worst_quality;
--    //cpi->best_quality                     = lc->best_quality;
-     cpi->active_best_quality              = lc->active_best_quality;
-     cpi->ni_av_qi                         = lc->ni_av_qi;
-     cpi->ni_tot_qi                        = lc->ni_tot_qi;
-@@ -282,26 +276,31 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer)
-     cpi->rate_correction_factor           = lc->rate_correction_factor;
-     cpi->key_frame_rate_correction_factor = lc->key_frame_rate_correction_factor;
-     cpi->gf_rate_correction_factor        = lc->gf_rate_correction_factor;
--    cpi->zbin_over_quant                  = lc->zbin_over_quant;
-+    cpi->mb.zbin_over_quant                  = lc->zbin_over_quant;
-     cpi->inter_frame_target               = lc->inter_frame_target;
-     cpi->total_byte_count                 = lc->total_byte_count;
-     cpi->common.filter_level              = lc->filter_level;
- 
-     cpi->last_frame_percent_intra         = lc->last_frame_percent_intra;
- 
--    memcpy (cpi->count_mb_ref_frame_usage,
-+    memcpy (cpi->mb.count_mb_ref_frame_usage,
-             lc->count_mb_ref_frame_usage,
--            sizeof(cpi->count_mb_ref_frame_usage));
-+            sizeof(cpi->mb.count_mb_ref_frame_usage));
- }
- 
- static void setup_features(VP8_COMP *cpi)
- {
--    // Set up default state for MB feature flags
--    cpi->mb.e_mbd.segmentation_enabled = 0;
--    cpi->mb.e_mbd.update_mb_segmentation_map = 0;
--    cpi->mb.e_mbd.update_mb_segmentation_data = 0;
--    vpx_memset(cpi->mb.e_mbd.mb_segment_tree_probs, 255, sizeof(cpi->mb.e_mbd.mb_segment_tree_probs));
--    vpx_memset(cpi->mb.e_mbd.segment_feature_data, 0, sizeof(cpi->mb.e_mbd.segment_feature_data));
-+    // If segmentation enabled set the update flags
-+    if ( cpi->mb.e_mbd.segmentation_enabled )
-+    {
-+        cpi->mb.e_mbd.update_mb_segmentation_map = 1;
-+        cpi->mb.e_mbd.update_mb_segmentation_data = 1;
-+    }
-+    else
-+    {
-+        cpi->mb.e_mbd.update_mb_segmentation_map = 0;
-+        cpi->mb.e_mbd.update_mb_segmentation_data = 0;
-+    }
- 
-     cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0;
-     cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
-@@ -323,7 +322,7 @@ static void dealloc_compressor_data(VP8_COMP *cpi)
-     vpx_free(cpi->tplist);
-     cpi->tplist = NULL;
- 
--    // Delete last frame MV storage buffers
-+    /* Delete last frame MV storage buffers */
-     vpx_free(cpi->lfmv);
-     cpi->lfmv = 0;
- 
-@@ -333,7 +332,7 @@ static void dealloc_compressor_data(VP8_COMP *cpi)
-     vpx_free(cpi->lf_ref_frame);
-     cpi->lf_ref_frame = 0;
- 
--    // Delete sementation map
-+    /* Delete sementation map */
-     vpx_free(cpi->segmentation_map);
-     cpi->segmentation_map = 0;
- 
-@@ -349,53 +348,61 @@ static void dealloc_compressor_data(VP8_COMP *cpi)
-     vpx_free(cpi->tok);
-     cpi->tok = 0;
- 
--    // Structure used to monitor GF usage
-+    /* Structure used to monitor GF usage */
-     vpx_free(cpi->gf_active_flags);
-     cpi->gf_active_flags = 0;
- 
--    // Activity mask based per mb zbin adjustments
-+    /* Activity mask based per mb zbin adjustments */
-     vpx_free(cpi->mb_activity_map);
-     cpi->mb_activity_map = 0;
--    vpx_free(cpi->mb_norm_activity_map);
--    cpi->mb_norm_activity_map = 0;
- 
-     vpx_free(cpi->mb.pip);
-     cpi->mb.pip = 0;
-+
-+#if CONFIG_MULTITHREAD
-+    vpx_free(cpi->mt_current_mb_col);
-+    cpi->mt_current_mb_col = NULL;
-+#endif
- }
- 
- static void enable_segmentation(VP8_COMP *cpi)
- {
--    // Set the appropriate feature bit
-+    /* Set the appropriate feature bit */
-     cpi->mb.e_mbd.segmentation_enabled = 1;
-     cpi->mb.e_mbd.update_mb_segmentation_map = 1;
-     cpi->mb.e_mbd.update_mb_segmentation_data = 1;
- }
- static void disable_segmentation(VP8_COMP *cpi)
- {
--    // Clear the appropriate feature bit
-+    /* Clear the appropriate feature bit */
-     cpi->mb.e_mbd.segmentation_enabled = 0;
- }
- 
--// Valid values for a segment are 0 to 3
--// Segmentation map is arrange as [Rows][Columns]
-+/* Valid values for a segment are 0 to 3
-+ * Segmentation map is arrange as [Rows][Columns]
-+ */
- static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map)
- {
--    // Copy in the new segmentation map
-+    /* Copy in the new segmentation map */
-     vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols));
- 
--    // Signal that the map should be updated.
-+    /* Signal that the map should be updated. */
-     cpi->mb.e_mbd.update_mb_segmentation_map = 1;
-     cpi->mb.e_mbd.update_mb_segmentation_data = 1;
- }
- 
--// The values given for each segment can be either deltas (from the default value chosen for the frame) or absolute values.
--//
--// Valid range for abs values is (0-127 for MB_LVL_ALT_Q) , (0-63 for SEGMENT_ALT_LF)
--// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q) , (+/-63 for SEGMENT_ALT_LF)
--//
--// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use the absolute values given).
--//
--//
-+/* The values given for each segment can be either deltas (from the default
-+ * value chosen for the frame) or absolute values.
-+ *
-+ * Valid range for abs values is:
-+ *    (0-127 for MB_LVL_ALT_Q), (0-63 for SEGMENT_ALT_LF)
-+ * Valid range for delta values are:
-+ *    (+/-127 for MB_LVL_ALT_Q), (+/-63 for SEGMENT_ALT_LF)
-+ *
-+ * abs_delta = SEGMENT_DELTADATA (deltas)
-+ * abs_delta = SEGMENT_ABSDATA (use the absolute values given).
-+ *
-+ */
- static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta)
- {
-     cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta;
-@@ -411,26 +418,6 @@ static void segmentation_test_function(VP8_COMP *cpi)
-     // Create a temporary map for segmentation data.
-     CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
- 
--    // MB loop to set local segmentation map
--    /*for ( i = 0; i < cpi->common.mb_rows; i++ )
--    {
--        for ( j = 0; j < cpi->common.mb_cols; j++ )
--        {
--            //seg_map[(i*cpi->common.mb_cols) + j] = (j % 2) + ((i%2)* 2);
--            //if ( j < cpi->common.mb_cols/2 )
--
--            // Segment 1 around the edge else 0
--            if ( (i == 0) || (j == 0) || (i == (cpi->common.mb_rows-1)) || (j == (cpi->common.mb_cols-1)) )
--                seg_map[(i*cpi->common.mb_cols) + j] = 1;
--            //else if ( (i < 2) || (j < 2) || (i > (cpi->common.mb_rows-3)) || (j > (cpi->common.mb_cols-3)) )
--            //  seg_map[(i*cpi->common.mb_cols) + j] = 2;
--            //else if ( (i < 5) || (j < 5) || (i > (cpi->common.mb_rows-6)) || (j > (cpi->common.mb_cols-6)) )
--            //  seg_map[(i*cpi->common.mb_cols) + j] = 3;
--            else
--                seg_map[(i*cpi->common.mb_cols) + j] = 0;
--        }
--    }*/
--
-     // Set the segmentation Map
-     set_segmentation_map(cpi, seg_map);
- 
-@@ -453,103 +440,78 @@ static void segmentation_test_function(VP8_COMP *cpi)
-     set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA);
- 
-     // Delete sementation map
--        vpx_free(seg_map);
-+    vpx_free(seg_map);
- 
-     seg_map = 0;
--
- }
- 
--// A simple function to cyclically refresh the background at a lower Q
-+/* A simple function to cyclically refresh the background at a lower Q */
- static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
- {
--    unsigned char *seg_map;
-+    unsigned char *seg_map = cpi->segmentation_map;
-     signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
-     int i;
-     int block_count = cpi->cyclic_refresh_mode_max_mbs_perframe;
-     int mbs_in_frame = cpi->common.mb_rows * cpi->common.mb_cols;
- 
--    // Create a temporary map for segmentation data.
--    CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
-+    cpi->cyclic_refresh_q = Q / 2;
- 
--    cpi->cyclic_refresh_q = Q;
-+    // Set every macroblock to be eligible for update.
-+    // For key frame this will reset seg map to 0.
-+    vpx_memset(cpi->segmentation_map, 0, mbs_in_frame);
- 
--    for (i = Q; i > 0; i--)
--    {
--        if (vp8_bits_per_mb[cpi->common.frame_type][i] >= ((vp8_bits_per_mb[cpi->common.frame_type][Q]*(Q + 128)) / 64))
--            //if ( vp8_bits_per_mb[cpi->common.frame_type][i] >= ((vp8_bits_per_mb[cpi->common.frame_type][Q]*((2*Q)+96))/64) )
--        {
--            break;
--        }
--    }
--
--    cpi->cyclic_refresh_q = i;
--
--    // Only update for inter frames
-     if (cpi->common.frame_type != KEY_FRAME)
-     {
--        // Cycle through the macro_block rows
--        // MB loop to set local segmentation map
--        for (i = cpi->cyclic_refresh_mode_index; i < mbs_in_frame; i++)
-+        /* Cycle through the macro_block rows */
-+        /* MB loop to set local segmentation map */
-+        i = cpi->cyclic_refresh_mode_index;
-+        assert(i < mbs_in_frame);
-+        do
-         {
--            // If the MB is as a candidate for clean up then mark it for possible boost/refresh (segment 1)
--            // The segment id may get reset to 0 later if the MB gets coded anything other than last frame 0,0
--            // as only (last frame 0,0) MBs are eligable for refresh : that is to say Mbs likely to be background blocks.
--            if (cpi->cyclic_refresh_map[i] == 0)
--            {
--                seg_map[i] = 1;
--            }
--            else
--            {
--                seg_map[i] = 0;
--
--                // Skip blocks that have been refreshed recently anyway.
--                if (cpi->cyclic_refresh_map[i] < 0)
--                    //cpi->cyclic_refresh_map[i] = cpi->cyclic_refresh_map[i] / 16;
--                    cpi->cyclic_refresh_map[i]++;
--            }
--
--
--            if (block_count > 0)
--                block_count--;
--            else
--                break;
-+          /* If the MB is as a candidate for clean up then mark it for
-+           * possible boost/refresh (segment 1) The segment id may get
-+           * reset to 0 later if the MB gets coded anything other than
-+           * last frame 0,0 as only (last frame 0,0) MBs are eligable for
-+           * refresh : that is to say Mbs likely to be background blocks.
-+           */
-+          if (cpi->cyclic_refresh_map[i] == 0)
-+          {
-+              seg_map[i] = 1;
-+              block_count --;
-+          }
-+          else if (cpi->cyclic_refresh_map[i] < 0)
-+              cpi->cyclic_refresh_map[i]++;
-+
-+          i++;
-+          if (i == mbs_in_frame)
-+              i = 0;
- 
-         }
-+        while(block_count && i != cpi->cyclic_refresh_mode_index);
- 
--        // If we have gone through the frame reset to the start
-         cpi->cyclic_refresh_mode_index = i;
--
--        if (cpi->cyclic_refresh_mode_index >= mbs_in_frame)
--            cpi->cyclic_refresh_mode_index = 0;
-     }
- 
--    // Set the segmentation Map
--    set_segmentation_map(cpi, seg_map);
--
--    // Activate segmentation.
-+    /* Activate segmentation. */
-+    cpi->mb.e_mbd.update_mb_segmentation_map = 1;
-+    cpi->mb.e_mbd.update_mb_segmentation_data = 1;
-     enable_segmentation(cpi);
- 
--    // Set up the quant segment data
-+    /* Set up the quant segment data */
-     feature_data[MB_LVL_ALT_Q][0] = 0;
-     feature_data[MB_LVL_ALT_Q][1] = (cpi->cyclic_refresh_q - Q);
-     feature_data[MB_LVL_ALT_Q][2] = 0;
-     feature_data[MB_LVL_ALT_Q][3] = 0;
- 
--    // Set up the loop segment data
-+    /* Set up the loop segment data */
-     feature_data[MB_LVL_ALT_LF][0] = 0;
-     feature_data[MB_LVL_ALT_LF][1] = lf_adjustment;
-     feature_data[MB_LVL_ALT_LF][2] = 0;
-     feature_data[MB_LVL_ALT_LF][3] = 0;
- 
--    // Initialise the feature data structure
--    // SEGMENT_DELTADATA    0, SEGMENT_ABSDATA      1
-+    /* Initialise the feature data structure */
-     set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA);
- 
--    // Delete sementation map
--    vpx_free(seg_map);
--
--    seg_map = 0;
--
- }
- 
- static void set_default_lf_deltas(VP8_COMP *cpi)
-@@ -560,16 +522,21 @@ static void set_default_lf_deltas(VP8_COMP *cpi)
-     vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
-     vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
- 
--    // Test of ref frame deltas
-+    /* Test of ref frame deltas */
-     cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2;
-     cpi->mb.e_mbd.ref_lf_deltas[LAST_FRAME] = 0;
-     cpi->mb.e_mbd.ref_lf_deltas[GOLDEN_FRAME] = -2;
-     cpi->mb.e_mbd.ref_lf_deltas[ALTREF_FRAME] = -2;
- 
--    cpi->mb.e_mbd.mode_lf_deltas[0] = 4;               // BPRED
--    cpi->mb.e_mbd.mode_lf_deltas[1] = -2;              // Zero
--    cpi->mb.e_mbd.mode_lf_deltas[2] = 2;               // New mv
--    cpi->mb.e_mbd.mode_lf_deltas[3] = 4;               // Split mv
-+    cpi->mb.e_mbd.mode_lf_deltas[0] = 4;               /* BPRED */
-+
-+    if(cpi->oxcf.Mode == MODE_REALTIME)
-+      cpi->mb.e_mbd.mode_lf_deltas[1] = -12;              /* Zero */
-+    else
-+      cpi->mb.e_mbd.mode_lf_deltas[1] = -2;              /* Zero */
-+
-+    cpi->mb.e_mbd.mode_lf_deltas[2] = 2;               /* New mv */
-+    cpi->mb.e_mbd.mode_lf_deltas[3] = 4;               /* Split mv */
- }
- 
- /* Convenience macros for mapping speed and mode into a continuous
-@@ -669,17 +636,16 @@ void vp8_set_speed_features(VP8_COMP *cpi)
-     int last_improved_quant = sf->improved_quant;
-     int ref_frames;
- 
--    // Initialise default mode frequency sampling variables
-+    /* Initialise default mode frequency sampling variables */
-     for (i = 0; i < MAX_MODES; i ++)
-     {
-         cpi->mode_check_freq[i] = 0;
--        cpi->mode_test_hit_counts[i] = 0;
-         cpi->mode_chosen_counts[i] = 0;
-     }
- 
--    cpi->mbs_tested_so_far = 0;
-+    cpi->mb.mbs_tested_so_far = 0;
- 
--    // best quality defaults
-+    /* best quality defaults */
-     sf->RD = 1;
-     sf->search_method = NSTEP;
-     sf->improved_quant = 1;
-@@ -697,17 +663,17 @@ void vp8_set_speed_features(VP8_COMP *cpi)
-     sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
-     sf->improved_mv_pred = 1;
- 
--    // default thresholds to 0
-+    /* default thresholds to 0 */
-     for (i = 0; i < MAX_MODES; i++)
-         sf->thresh_mult[i] = 0;
- 
-     /* Count enabled references */
-     ref_frames = 1;
--    if (cpi->ref_frame_flags & VP8_LAST_FLAG)
-+    if (cpi->ref_frame_flags & VP8_LAST_FRAME)
-         ref_frames++;
--    if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
-+    if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
-         ref_frames++;
--    if (cpi->ref_frame_flags & VP8_ALT_FLAG)
-+    if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
-         ref_frames++;
- 
-     /* Convert speed to continuous range, with clamping */
-@@ -779,7 +745,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
-     switch (Mode)
-     {
- #if !(CONFIG_REALTIME_ONLY)
--    case 0: // best quality mode
-+    case 0: /* best quality mode */
-         sf->first_step = 0;
-         sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
-         break;
-@@ -800,8 +766,9 @@ void vp8_set_speed_features(VP8_COMP *cpi)
-             sf->improved_quant = 0;
-             sf->improved_dct = 0;
- 
--            // Only do recode loop on key frames, golden frames and
--            // alt ref frames
-+            /* Only do recode loop on key frames, golden frames and
-+             * alt ref frames
-+             */
-             sf->recode_loop = 2;
- 
-         }
-@@ -809,14 +776,14 @@ void vp8_set_speed_features(VP8_COMP *cpi)
-         if (Speed > 3)
-         {
-             sf->auto_filter = 1;
--            sf->recode_loop = 0; // recode loop off
--            sf->RD = 0;         // Turn rd off
-+            sf->recode_loop = 0; /* recode loop off */
-+            sf->RD = 0;         /* Turn rd off */
- 
-         }
- 
-         if (Speed > 4)
-         {
--            sf->auto_filter = 0;                     // Faster selection of loop filter
-+            sf->auto_filter = 0;  /* Faster selection of loop filter */
-         }
- 
-         break;
-@@ -839,7 +806,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
-         }
- 
-         if (Speed > 2)
--            sf->auto_filter = 0;                     // Faster selection of loop filter
-+            sf->auto_filter = 0;  /* Faster selection of loop filter */
- 
-         if (Speed > 3)
-         {
-@@ -849,7 +816,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
- 
-         if (Speed > 4)
-         {
--            sf->auto_filter = 0;                     // Faster selection of loop filter
-+            sf->auto_filter = 0;  /* Faster selection of loop filter */
-             sf->search_method = HEX;
-             sf->iterative_sub_pixel = 0;
-         }
-@@ -870,16 +837,16 @@ void vp8_set_speed_features(VP8_COMP *cpi)
- 
-             for (i = 0; i < min; i++)
-             {
--                sum += cpi->error_bins[i];
-+                sum += cpi->mb.error_bins[i];
-             }
- 
-             total_skip = sum;
-             sum = 0;
- 
--            // i starts from 2 to make sure thresh started from 2048
-+            /* i starts from 2 to make sure thresh started from 2048 */
-             for (; i < 1024; i++)
-             {
--                sum += cpi->error_bins[i];
-+                sum += cpi->mb.error_bins[i];
- 
-                 if (10 * sum >= (unsigned int)(cpi->Speed - 6)*(total_mbs - total_skip))
-                     break;
-@@ -930,16 +897,17 @@ void vp8_set_speed_features(VP8_COMP *cpi)
-             cm->filter_type = SIMPLE_LOOPFILTER;
-         }
- 
--        // This has a big hit on quality. Last resort
-+        /* This has a big hit on quality. Last resort */
-         if (Speed >= 15)
-             sf->half_pixel_search = 0;
- 
--        vpx_memset(cpi->error_bins, 0, sizeof(cpi->error_bins));
-+        vpx_memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins));
- 
-     }; /* switch */
- 
--    // Slow quant, dct and trellis not worthwhile for first pass
--    // so make sure they are always turned off.
-+    /* Slow quant, dct and trellis not worthwhile for first pass
-+     * so make sure they are always turned off.
-+     */
-     if ( cpi->pass == 1 )
-     {
-         sf->improved_quant = 0;
-@@ -1107,27 +1075,46 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
-         CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
-     }
- 
--    // Data used for real time vc mode to see if gf needs refreshing
--    cpi->inter_zz_count = 0;
--    cpi->gf_bad_count = 0;
--    cpi->gf_update_recommended = 0;
-+    /* Data used for real time vc mode to see if gf needs refreshing */
-+    cpi->zeromv_count = 0;
- 
- 
--    // Structures used to minitor GF usage
-+    /* Structures used to monitor GF usage */
-     vpx_free(cpi->gf_active_flags);
-     CHECK_MEM_ERROR(cpi->gf_active_flags,
--                    vpx_calloc(1, cm->mb_rows * cm->mb_cols));
-+                    vpx_calloc(sizeof(*cpi->gf_active_flags),
-+                    cm->mb_rows * cm->mb_cols));
-     cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
- 
-     vpx_free(cpi->mb_activity_map);
-     CHECK_MEM_ERROR(cpi->mb_activity_map,
--                    vpx_calloc(sizeof(unsigned int),
-+                    vpx_calloc(sizeof(*cpi->mb_activity_map),
-                     cm->mb_rows * cm->mb_cols));
- 
--    vpx_free(cpi->mb_norm_activity_map);
--    CHECK_MEM_ERROR(cpi->mb_norm_activity_map,
--                    vpx_calloc(sizeof(unsigned int),
--                    cm->mb_rows * cm->mb_cols));
-+    /* allocate memory for storing last frame's MVs for MV prediction. */
-+    vpx_free(cpi->lfmv);
-+    CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2),
-+                    sizeof(*cpi->lfmv)));
-+    vpx_free(cpi->lf_ref_frame_sign_bias);
-+    CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias,
-+                    vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2),
-+                    sizeof(*cpi->lf_ref_frame_sign_bias)));
-+    vpx_free(cpi->lf_ref_frame);
-+    CHECK_MEM_ERROR(cpi->lf_ref_frame,
-+                    vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2),
-+                    sizeof(*cpi->lf_ref_frame)));
-+
-+    /* Create the encoder segmentation map and set all entries to 0 */
-+    vpx_free(cpi->segmentation_map);
-+    CHECK_MEM_ERROR(cpi->segmentation_map,
-+                    vpx_calloc(cm->mb_rows * cm->mb_cols,
-+                    sizeof(*cpi->segmentation_map)));
-+    cpi->cyclic_refresh_mode_index = 0;
-+    vpx_free(cpi->active_map);
-+    CHECK_MEM_ERROR(cpi->active_map,
-+                    vpx_calloc(cm->mb_rows * cm->mb_cols,
-+                    sizeof(*cpi->active_map)));
-+    vpx_memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols));
- 
- #if CONFIG_MULTITHREAD
-     if (width < 640)
-@@ -1138,15 +1125,22 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
-         cpi->mt_sync_range = 8;
-     else
-         cpi->mt_sync_range = 16;
-+
-+    if (cpi->oxcf.multi_threaded > 1)
-+    {
-+        vpx_free(cpi->mt_current_mb_col);
-+        CHECK_MEM_ERROR(cpi->mt_current_mb_col,
-+                    vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
-+    }
-+
- #endif
- 
-     vpx_free(cpi->tplist);
--
--    CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
-+    CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows));
- }
- 
- 
--// Quant MOD
-+/* Quant MOD */
- static const int q_trans[] =
- {
-     0,   1,  2,  3,  4,  5,  7,  8,
-@@ -1168,7 +1162,7 @@ int vp8_reverse_trans(int x)
-             return i;
- 
-     return 63;
--};
-+}
- void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
- {
-     if(framerate < .1)
-@@ -1182,16 +1176,16 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
-     cpi->min_frame_bandwidth    = (int)(cpi->av_per_frame_bandwidth *
-                                   cpi->oxcf.two_pass_vbrmin_section / 100);
- 
--    // Set Maximum gf/arf interval
-+    /* Set Maximum gf/arf interval */
-     cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
- 
-     if(cpi->max_gf_interval < 12)
-         cpi->max_gf_interval = 12;
- 
--    // Extended interval for genuinely static scenes
-+    /* Extended interval for genuinely static scenes */
-     cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
- 
--     // Special conditions when altr ref frame enabled in lagged compress mode
-+     /* Special conditions when altr ref frame enabled in lagged compress mode */
-     if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames)
-     {
-         if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1)
-@@ -1213,7 +1207,7 @@ rescale(int val, int num, int denom)
-     int64_t llden = denom;
-     int64_t llval = val;
- 
--    return llval * llnum / llden;
-+    return (int)(llval * llnum / llden);
- }
- 
- 
-@@ -1225,7 +1219,6 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
- 
-     cpi->auto_gold = 1;
-     cpi->auto_adjust_gold_quantizer = 1;
--    cpi->goldfreq = 7;
- 
-     cm->version = oxcf->Version;
-     vp8_setup_version(cm);
-@@ -1244,15 +1237,15 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
- 
-     cpi->ref_frame_rate = cpi->frame_rate;
- 
--    // change includes all joint functionality
-+    /* change includes all joint functionality */
-     vp8_change_config(cpi, oxcf);
- 
--    // Initialize active best and worst q and average q values.
-+    /* Initialize active best and worst q and average q values. */
-     cpi->active_worst_quality         = cpi->oxcf.worst_allowed_q;
-     cpi->active_best_quality          = cpi->oxcf.best_allowed_q;
-     cpi->avg_frame_qindex             = cpi->oxcf.worst_allowed_q;
- 
--    // Initialise the starting buffer levels
-+    /* Initialise the starting buffer levels */
-     cpi->buffer_level                 = cpi->oxcf.starting_buffer_level;
-     cpi->bits_off_target              = cpi->oxcf.starting_buffer_level;
- 
-@@ -1264,7 +1257,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-     cpi->total_actual_bits            = 0;
-     cpi->total_target_vs_actual       = 0;
- 
--    // Temporal scalabilty
-+    /* Temporal scalabilty */
-     if (cpi->oxcf.number_of_layers > 1)
-     {
-         unsigned int i;
-@@ -1274,7 +1267,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-         {
-             LAYER_CONTEXT *lc = &cpi->layer_context[i];
- 
--            // Layer configuration
-+            /* Layer configuration */
-             lc->frame_rate =
-                         cpi->output_frame_rate / cpi->oxcf.rate_decimator[i];
-             lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000;
-@@ -1284,28 +1277,29 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-             lc->maximum_buffer_size_in_ms   = oxcf->maximum_buffer_size;
- 
-             lc->starting_buffer_level =
--              rescale(oxcf->starting_buffer_level,
-+              rescale((int)(oxcf->starting_buffer_level),
-                           lc->target_bandwidth, 1000);
- 
-             if (oxcf->optimal_buffer_level == 0)
-                 lc->optimal_buffer_level = lc->target_bandwidth / 8;
-             else
-                 lc->optimal_buffer_level =
--                  rescale(oxcf->optimal_buffer_level,
-+                  rescale((int)(oxcf->optimal_buffer_level),
-                           lc->target_bandwidth, 1000);
- 
-             if (oxcf->maximum_buffer_size == 0)
-                 lc->maximum_buffer_size = lc->target_bandwidth / 8;
-             else
-                 lc->maximum_buffer_size =
--                  rescale(oxcf->maximum_buffer_size,
-+                  rescale((int)oxcf->maximum_buffer_size,
-                           lc->target_bandwidth, 1000);
- 
--            // Work out the average size of a frame within this layer
-+            /* Work out the average size of a frame within this layer */
-             if (i > 0)
--                lc->avg_frame_size_for_layer = (cpi->oxcf.target_bitrate[i] -
--                    cpi->oxcf.target_bitrate[i-1]) * 1000 /
--                    (lc->frame_rate - prev_layer_frame_rate);
-+                lc->avg_frame_size_for_layer =
-+                  (int)((cpi->oxcf.target_bitrate[i] -
-+                         cpi->oxcf.target_bitrate[i-1]) * 1000 /
-+                        (lc->frame_rate - prev_layer_frame_rate));
- 
-             lc->active_worst_quality         = cpi->oxcf.worst_allowed_q;
-             lc->active_best_quality          = cpi->oxcf.best_allowed_q;
-@@ -1321,7 +1315,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-             lc->rate_correction_factor            = 1.0;
-             lc->key_frame_rate_correction_factor  = 1.0;
-             lc->gf_rate_correction_factor         = 1.0;
--            lc->inter_frame_target                = 0.0;
-+            lc->inter_frame_target                = 0;
- 
-             prev_layer_frame_rate = lc->frame_rate;
-         }
-@@ -1358,32 +1352,29 @@ static void update_layer_contexts (VP8_COMP *cpi)
-             lc->target_bandwidth = oxcf->target_bitrate[i] * 1000;
- 
-             lc->starting_buffer_level = rescale(
--                          oxcf->starting_buffer_level_in_ms,
-+                          (int)oxcf->starting_buffer_level_in_ms,
-                           lc->target_bandwidth, 1000);
- 
-             if (oxcf->optimal_buffer_level == 0)
-                 lc->optimal_buffer_level = lc->target_bandwidth / 8;
-             else
-                 lc->optimal_buffer_level = rescale(
--                          oxcf->optimal_buffer_level_in_ms,
-+                          (int)oxcf->optimal_buffer_level_in_ms,
-                           lc->target_bandwidth, 1000);
- 
-             if (oxcf->maximum_buffer_size == 0)
-                 lc->maximum_buffer_size = lc->target_bandwidth / 8;
-             else
-                 lc->maximum_buffer_size = rescale(
--                          oxcf->maximum_buffer_size_in_ms,
-+                          (int)oxcf->maximum_buffer_size_in_ms,
-                           lc->target_bandwidth, 1000);
- 
--            // Work out the average size of a frame within this layer
-+            /* Work out the average size of a frame within this layer */
-             if (i > 0)
--                lc->avg_frame_size_for_layer = (oxcf->target_bitrate[i] -
--                    oxcf->target_bitrate[i-1]) * 1000 /
--                    (lc->frame_rate - prev_layer_frame_rate);
--
--            lc->active_worst_quality         = oxcf->worst_allowed_q;
--            lc->active_best_quality          = oxcf->best_allowed_q;
--            lc->avg_frame_qindex             = oxcf->worst_allowed_q;
-+                lc->avg_frame_size_for_layer =
-+                   (int)((oxcf->target_bitrate[i] -
-+                          oxcf->target_bitrate[i-1]) * 1000 /
-+                          (lc->frame_rate - prev_layer_frame_rate));
- 
-             prev_layer_frame_rate = lc->frame_rate;
-         }
-@@ -1514,10 +1505,8 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-     cpi->baseline_gf_interval =
-         cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
- 
--    cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
-+    cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME;
- 
--    //cpi->use_golden_frame_only = 0;
--    //cpi->use_last_frame_only = 0;
-     cm->refresh_golden_frame = 0;
-     cm->refresh_last_frame = 1;
-     cm->refresh_entropy_probs = 1;
-@@ -1539,11 +1528,11 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-             cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
-     }
- 
--    // At the moment the first order values may not be > MAXQ
-+    /* At the moment the first order values may not be > MAXQ */
-     if (cpi->oxcf.fixed_q > MAXQ)
-         cpi->oxcf.fixed_q = MAXQ;
- 
--    // local file playback mode == really big buffer
-+    /* local file playback mode == really big buffer */
-     if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
-     {
-         cpi->oxcf.starting_buffer_level       = 60000;
-@@ -1554,41 +1543,41 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-         cpi->oxcf.maximum_buffer_size_in_ms   = 240000;
-     }
- 
--    // Convert target bandwidth from Kbit/s to Bit/s
-+    /* Convert target bandwidth from Kbit/s to Bit/s */
-     cpi->oxcf.target_bandwidth       *= 1000;
- 
-     cpi->oxcf.starting_buffer_level =
--        rescale(cpi->oxcf.starting_buffer_level,
-+        rescale((int)cpi->oxcf.starting_buffer_level,
-                 cpi->oxcf.target_bandwidth, 1000);
- 
--    // Set or reset optimal and maximum buffer levels.
-+    /* Set or reset optimal and maximum buffer levels. */
-     if (cpi->oxcf.optimal_buffer_level == 0)
-         cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
-     else
-         cpi->oxcf.optimal_buffer_level =
--            rescale(cpi->oxcf.optimal_buffer_level,
-+            rescale((int)cpi->oxcf.optimal_buffer_level,
-                     cpi->oxcf.target_bandwidth, 1000);
- 
-     if (cpi->oxcf.maximum_buffer_size == 0)
-         cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
-     else
-         cpi->oxcf.maximum_buffer_size =
--            rescale(cpi->oxcf.maximum_buffer_size,
-+            rescale((int)cpi->oxcf.maximum_buffer_size,
-                     cpi->oxcf.target_bandwidth, 1000);
- 
--    // Set up frame rate and related parameters rate control values.
-+    /* Set up frame rate and related parameters rate control values. */
-     vp8_new_frame_rate(cpi, cpi->frame_rate);
- 
--    // Set absolute upper and lower quality limits
-+    /* Set absolute upper and lower quality limits */
-     cpi->worst_quality               = cpi->oxcf.worst_allowed_q;
-     cpi->best_quality                = cpi->oxcf.best_allowed_q;
- 
--    // active values should only be modified if out of new range
-+    /* active values should only be modified if out of new range */
-     if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q)
-     {
-       cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
-     }
--    // less likely
-+    /* less likely */
-     else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q)
-     {
-       cpi->active_worst_quality = cpi->oxcf.best_allowed_q;
-@@ -1597,7 +1586,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-     {
-       cpi->active_best_quality = cpi->oxcf.best_allowed_q;
-     }
--    // less likely
-+    /* less likely */
-     else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q)
-     {
-       cpi->active_best_quality = cpi->oxcf.worst_allowed_q;
-@@ -1607,14 +1596,9 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
- 
-     cpi->cq_target_quality = cpi->oxcf.cq_level;
- 
--    // Only allow dropped frames in buffered mode
-+    /* Only allow dropped frames in buffered mode */
-     cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
- 
--    if (!cm->use_bilinear_mc_filter)
--        cm->mcomp_filter_type = SIXTAP;
--    else
--        cm->mcomp_filter_type = BILINEAR;
--
-     cpi->target_bandwidth = cpi->oxcf.target_bandwidth;
- 
- 
-@@ -1627,7 +1611,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-      * correct.
-      */
- 
--    // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
-+    /* VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) */
-     if (cpi->oxcf.Sharpness > 7)
-         cpi->oxcf.Sharpness = 7;
- 
-@@ -1641,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-         Scale2Ratio(cm->horiz_scale, &hr, &hs);
-         Scale2Ratio(cm->vert_scale, &vr, &vs);
- 
--        // always go to the next whole number
-+        /* always go to the next whole number */
-         cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs;
-         cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
-     }
-@@ -1655,6 +1639,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
-           cm->yv12_fb[cm->lst_fb_idx].y_height ||
-         cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
-     {
-+        dealloc_raw_frame_buffers(cpi);
-         alloc_raw_frame_buffers(cpi);
-         vp8_alloc_compressor_data(cpi);
-     }
-@@ -1667,16 +1652,16 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
- 
-     cpi->Speed = cpi->oxcf.cpu_used;
- 
--    // force to allowlag to 0 if lag_in_frames is 0;
-+    /* force to allowlag to 0 if lag_in_frames is 0; */
-     if (cpi->oxcf.lag_in_frames == 0)
-     {
-         cpi->oxcf.allow_lag = 0;
-     }
--    // Limit on lag buffers as these are not currently dynamically allocated
-+    /* Limit on lag buffers as these are not currently dynamically allocated */
-     else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
-         cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
- 
--    // YX Temp
-+    /* YX Temp */
-     cpi->alt_ref_source = NULL;
-     cpi->is_src_frame_alt_ref = 0;
- 
-@@ -1693,7 +1678,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
- #endif
- 
- #if 0
--    // Experimental RD Code
-+    /* Experimental RD Code */
-     cpi->frame_distortion = 0;
-     cpi->last_frame_distortion = 0;
- #endif
-@@ -1728,7 +1713,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
-     VP8_COMMON *cm;
- 
-     cpi = vpx_memalign(32, sizeof(VP8_COMP));
--    // Check that the CPI instance is valid
-+    /* Check that the CPI instance is valid */
-     if (!cpi)
-         return 0;
- 
-@@ -1762,14 +1747,15 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
-     cpi->prob_gf_coded                = 128;
-     cpi->prob_intra_coded             = 63;
- 
--    // Prime the recent reference frame usage counters.
--    // Hereafter they will be maintained as a sort of moving average
-+    /* Prime the recent reference frame usage counters.
-+     * Hereafter they will be maintained as a sort of moving average
-+     */
-     cpi->recent_ref_frame_usage[INTRA_FRAME]  = 1;
-     cpi->recent_ref_frame_usage[LAST_FRAME]   = 1;
-     cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1;
-     cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1;
- 
--    // Set reference frame sign bias for ALTREF frame to 1 (for now)
-+    /* Set reference frame sign bias for ALTREF frame to 1 (for now) */
-     cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1;
- 
-     cpi->twopass.gf_decay_rate = 0;
-@@ -1779,21 +1765,12 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
-     cpi->alt_is_last  = 0 ;
-     cpi->gold_is_alt  = 0 ;
- 
--    // allocate memory for storing last frame's MVs for MV prediction.
--    CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int_mv)));
--    CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
--    CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
--
--    // Create the encoder segmentation map and set all entries to 0
--    CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
--    CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
--    vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols));
-     cpi->active_map_enabled = 0;
- 
- #if 0
--    // Experimental code for lagged and one pass
--    // Initialise one_pass GF frames stats
--    // Update stats used for GF selection
-+    /* Experimental code for lagged and one pass */
-+    /* Initialise one_pass GF frames stats */
-+    /* Update stats used for GF selection */
-     if (cpi->pass == 0)
-     {
-         cpi->one_pass_frame_index = 0;
-@@ -1813,10 +1790,11 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
-     }
- #endif
- 
--    // Should we use the cyclic refresh method.
--    // Currently this is tied to error resilliant mode
-+    /* Should we use the cyclic refresh method.
-+     * Currently this is tied to error resilliant mode
-+     */
-     cpi->cyclic_refresh_mode_enabled = cpi->oxcf.error_resilient_mode;
--    cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 40;
-+    cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 5;
-     cpi->cyclic_refresh_mode_index = 0;
-     cpi->cyclic_refresh_q = 32;
- 
-@@ -1827,9 +1805,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
-     else
-         cpi->cyclic_refresh_map = (signed char *) NULL;
- 
--    // Test function for segmentation
--    //segmentation_test_function( cpi);
--
- #ifdef ENTROPY_STATS
-     init_context_counters();
- #endif
-@@ -1837,7 +1812,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
-     /*Initialize the feed-forward activity masking.*/
-     cpi->activity_avg = 90<<12;
- 
--    cpi->frames_since_key = 8;        // Give a sensible default for the first frame.
-+    /* Give a sensible default for the first frame. */
-+    cpi->frames_since_key = 8;
-     cpi->key_frame_frequency = cpi->oxcf.key_freq;
-     cpi->this_key_frame_forced = 0;
-     cpi->next_key_frame_forced = 0;
-@@ -1880,10 +1856,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
- 
- #endif
- 
--#ifndef LLONG_MAX
--#define LLONG_MAX  9223372036854775807LL
--#endif
--    cpi->first_time_stamp_ever = LLONG_MAX;
-+    cpi->first_time_stamp_ever = 0x7FFFFFFF;
- 
-     cpi->frames_till_gf_update_due      = 0;
-     cpi->key_frame_count              = 1;
-@@ -1894,22 +1867,12 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
-     cpi->total_byte_count             = 0;
- 
-     cpi->drop_frame                  = 0;
--    cpi->drop_count                  = 0;
--    cpi->max_drop_count               = 0;
--    cpi->max_consec_dropped_frames     = 4;
- 
-     cpi->rate_correction_factor         = 1.0;
-     cpi->key_frame_rate_correction_factor = 1.0;
-     cpi->gf_rate_correction_factor  = 1.0;
-     cpi->twopass.est_max_qcorrection_factor  = 1.0;
- 
--    cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max+1];
--    cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max+1];
--    cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max+1];
--    cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max+1];
--
--    cal_mvsadcosts(cpi->mb.mvsadcost);
--
-     for (i = 0; i < KEY_FRAME_CONTEXT; i++)
-     {
-         cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
-@@ -1935,7 +1898,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
-     else if (cpi->pass == 2)
-     {
-         size_t packet_sz = sizeof(FIRSTPASS_STATS);
--        int packets = oxcf->two_pass_stats_in.sz / packet_sz;
-+        int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
- 
-         cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
-         cpi->twopass.stats_in = cpi->twopass.stats_in_start;
-@@ -1948,17 +1911,16 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
- 
-     if (cpi->compressor_speed == 2)
-     {
--        cpi->cpu_freq            = 0; //vp8_get_processor_freq();
-         cpi->avg_encode_time      = 0;
-         cpi->avg_pick_mode_time    = 0;
-     }
- 
-     vp8_set_speed_features(cpi);
- 
--    // Set starting values of RD threshold multipliers (128 = *1)
-+    /* Set starting values of RD threshold multipliers (128 = *1) */
-     for (i = 0; i < MAX_MODES; i++)
-     {
--        cpi->rd_thresh_mult[i] = 128;
-+        cpi->mb.rd_thresh_mult[i] = 128;
-     }
- 
- #ifdef ENTROPY_STATS
-@@ -1966,7 +1928,11 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
- #endif
- 
- #if CONFIG_MULTITHREAD
--    vp8cx_create_encoder_threads(cpi);
-+    if(vp8cx_create_encoder_threads(cpi))
-+    {
-+        vp8_remove_compressor(&cpi);
-+        return 0;
-+    }
- #endif
- 
-     cpi->fn_ptr[BLOCK_16X16].sdf            = vp8_sad16x16;
-@@ -2031,11 +1997,14 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
-     cpi->diamond_search_sad = vp8_diamond_search_sad;
-     cpi->refining_search_sad = vp8_refining_search_sad;
- 
--    // make sure frame 1 is okay
--    cpi->error_bins[0] = cpi->common.MBs;
-+    /* make sure frame 1 is okay */
-+    cpi->mb.error_bins[0] = cpi->common.MBs;
- 
--    //vp8cx_init_quantizer() is first called here. Add check in vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only called later
--    //when needed. This will avoid unnecessary calls of vp8cx_init_quantizer() for every frame.
-+    /* vp8cx_init_quantizer() is first called here. Add check in
-+     * vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only
-+     * called later when needed. This will avoid unnecessary calls of
-+     * vp8cx_init_quantizer() for every frame.
-+     */
-     vp8cx_init_quantizer(cpi);
- 
-     vp8_loop_filter_init(cm);
-@@ -2043,13 +2012,33 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
-     cpi->common.error.setjmp = 0;
- 
- #if CONFIG_MULTI_RES_ENCODING
-+
-     /* Calculate # of MBs in a row in lower-resolution level image. */
-     if (cpi->oxcf.mr_encoder_id > 0)
-         vp8_cal_low_res_mb_cols(cpi);
-+
- #endif
- 
--    return  cpi;
-+    /* setup RD costs to MACROBLOCK struct */
-+
-+    cpi->mb.mvcost[0] = &cpi->rd_costs.mvcosts[0][mv_max+1];
-+    cpi->mb.mvcost[1] = &cpi->rd_costs.mvcosts[1][mv_max+1];
-+    cpi->mb.mvsadcost[0] = &cpi->rd_costs.mvsadcosts[0][mvfp_max+1];
-+    cpi->mb.mvsadcost[1] = &cpi->rd_costs.mvsadcosts[1][mvfp_max+1];
- 
-+    cal_mvsadcosts(cpi->mb.mvsadcost);
-+
-+    cpi->mb.mbmode_cost = cpi->rd_costs.mbmode_cost;
-+    cpi->mb.intra_uv_mode_cost = cpi->rd_costs.intra_uv_mode_cost;
-+    cpi->mb.bmode_costs = cpi->rd_costs.bmode_costs;
-+    cpi->mb.inter_bmode_costs = cpi->rd_costs.inter_bmode_costs;
-+    cpi->mb.token_costs = cpi->rd_costs.token_costs;
-+
-+    /* setup block ptrs & offsets */
-+    vp8_setup_block_ptrs(&cpi->mb);
-+    vp8_setup_block_dptrs(&cpi->mb.e_mbd);
-+
-+    return  cpi;
- }
- 
- 
-@@ -2099,7 +2088,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
- 
-                     fprintf(f, "Layer\tBitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t"
-                                "GLPsnrP\tVPXSSIM\t\n");
--                    for (i=0; i<cpi->oxcf.number_of_layers; i++)
-+                    for (i=0; i<(int)cpi->oxcf.number_of_layers; i++)
-                     {
-                         double dr = (double)cpi->bytes_in_layer[i] *
-                                               8.0 / 1000.0  / time_encoded;
-@@ -2150,7 +2139,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
- 
-                     fprintf(f, "Layer\tBitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t"
-                                "Time(us)\n");
--                    for (i=0; i<cpi->oxcf.number_of_layers; i++)
-+                    for (i=0; i<(int)cpi->oxcf.number_of_layers; i++)
-                     {
-                         double dr = (double)cpi->bytes_in_layer[i] *
-                                     8.0 / 1000.0  / time_encoded;
-@@ -2204,7 +2193,6 @@ void vp8_remove_compressor(VP8_COMP **ptr)
-                 fprintf(f, "%5d", frames_at_speed[i]);
- 
-             fprintf(f, "\n");
--            //fprintf(f, "%10d PM %10d %10d %10d EF %10d %10d %10d\n", cpi->Speed, cpi->avg_pick_mode_time, (tot_pm/cnt_pm), cnt_pm,  cpi->avg_encode_time, 0, 0);
-             fclose(f);
-         }
- 
-@@ -2266,7 +2254,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
-             for (i = 0; i < 10; i++)
-             {
- 
--                fprintf(fmode, "    { //Above Mode :  %d\n", i);
-+                fprintf(fmode, "    { /* Above Mode :  %d */\n", i);
- 
-                 for (j = 0; j < 10; j++)
-                 {
-@@ -2281,7 +2269,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
-                             fprintf(fmode, " %5d, ", intra_mode_stats[i][j][k]);
-                     }
- 
--                    fprintf(fmode, "}, // left_mode %d\n", j);
-+                    fprintf(fmode, "}, /* left_mode %d */\n", j);
- 
-                 }
- 
-@@ -2459,7 +2447,7 @@ static void generate_psnr_packet(VP8_COMP *cpi)
- 
-     for (i = 0; i < 4; i++)
-         pkt.data.psnr.psnr[i] = vp8_mse2psnr(pkt.data.psnr.samples[i], 255.0,
--                                             pkt.data.psnr.sse[i]);
-+                                             (double)(pkt.data.psnr.sse[i]));
- 
-     vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
- }
-@@ -2482,28 +2470,28 @@ int vp8_update_reference(VP8_COMP *cpi, int ref_frame_flags)
-     cpi->common.refresh_alt_ref_frame = 0;
-     cpi->common.refresh_last_frame   = 0;
- 
--    if (ref_frame_flags & VP8_LAST_FLAG)
-+    if (ref_frame_flags & VP8_LAST_FRAME)
-         cpi->common.refresh_last_frame = 1;
- 
--    if (ref_frame_flags & VP8_GOLD_FLAG)
-+    if (ref_frame_flags & VP8_GOLD_FRAME)
-         cpi->common.refresh_golden_frame = 1;
- 
--    if (ref_frame_flags & VP8_ALT_FLAG)
-+    if (ref_frame_flags & VP8_ALTR_FRAME)
-         cpi->common.refresh_alt_ref_frame = 1;
- 
-     return 0;
- }
- 
--int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-+int vp8_get_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd)
- {
-     VP8_COMMON *cm = &cpi->common;
-     int ref_fb_idx;
- 
--    if (ref_frame_flag == VP8_LAST_FLAG)
-+    if (ref_frame_flag == VP8_LAST_FRAME)
-         ref_fb_idx = cm->lst_fb_idx;
--    else if (ref_frame_flag == VP8_GOLD_FLAG)
-+    else if (ref_frame_flag == VP8_GOLD_FRAME)
-         ref_fb_idx = cm->gld_fb_idx;
--    else if (ref_frame_flag == VP8_ALT_FLAG)
-+    else if (ref_frame_flag == VP8_ALTR_FRAME)
-         ref_fb_idx = cm->alt_fb_idx;
-     else
-         return -1;
-@@ -2512,17 +2500,17 @@ int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CO
- 
-     return 0;
- }
--int vp8_set_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-+int vp8_set_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd)
- {
-     VP8_COMMON *cm = &cpi->common;
- 
-     int ref_fb_idx;
- 
--    if (ref_frame_flag == VP8_LAST_FLAG)
-+    if (ref_frame_flag == VP8_LAST_FRAME)
-         ref_fb_idx = cm->lst_fb_idx;
--    else if (ref_frame_flag == VP8_GOLD_FLAG)
-+    else if (ref_frame_flag == VP8_GOLD_FRAME)
-         ref_fb_idx = cm->gld_fb_idx;
--    else if (ref_frame_flag == VP8_ALT_FLAG)
-+    else if (ref_frame_flag == VP8_ALTR_FRAME)
-         ref_fb_idx = cm->alt_fb_idx;
-     else
-         return -1;
-@@ -2583,7 +2571,7 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
- {
-     VP8_COMMON *cm = &cpi->common;
- 
--    // are we resizing the image
-+    /* are we resizing the image */
-     if (cm->horiz_scale != 0 || cm->vert_scale != 0)
-     {
- #if CONFIG_SPATIAL_RESAMPLING
-@@ -2611,51 +2599,57 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
- }
- 
- 
--static void resize_key_frame(VP8_COMP *cpi)
-+static int resize_key_frame(VP8_COMP *cpi)
- {
- #if CONFIG_SPATIAL_RESAMPLING
-     VP8_COMMON *cm = &cpi->common;
- 
--    // Do we need to apply resampling for one pass cbr.
--    // In one pass this is more limited than in two pass cbr
--    // The test and any change is only made one per key frame sequence
-+    /* Do we need to apply resampling for one pass cbr.
-+     * In one pass this is more limited than in two pass cbr
-+     * The test and any change is only made one per key frame sequence
-+     */
-     if (cpi->oxcf.allow_spatial_resampling && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
-     {
-         int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs);
-         int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs);
-         int new_width, new_height;
- 
--        // If we are below the resample DOWN watermark then scale down a notch.
-+        /* If we are below the resample DOWN watermark then scale down a
-+         * notch.
-+         */
-         if (cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100))
-         {
-             cm->horiz_scale = (cm->horiz_scale < ONETWO) ? cm->horiz_scale + 1 : ONETWO;
-             cm->vert_scale = (cm->vert_scale < ONETWO) ? cm->vert_scale + 1 : ONETWO;
-         }
--        // Should we now start scaling back up
-+        /* Should we now start scaling back up */
-         else if (cpi->buffer_level > (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))
-         {
-             cm->horiz_scale = (cm->horiz_scale > NORMAL) ? cm->horiz_scale - 1 : NORMAL;
-             cm->vert_scale = (cm->vert_scale > NORMAL) ? cm->vert_scale - 1 : NORMAL;
-         }
- 
--        // Get the new hieght and width
-+        /* Get the new hieght and width */
-         Scale2Ratio(cm->horiz_scale, &hr, &hs);
-         Scale2Ratio(cm->vert_scale, &vr, &vs);
-         new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs;
-         new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs;
- 
--        // If the image size has changed we need to reallocate the buffers
--        // and resample the source image
-+        /* If the image size has changed we need to reallocate the buffers
-+         * and resample the source image
-+         */
-         if ((cm->Width != new_width) || (cm->Height != new_height))
-         {
-             cm->Width = new_width;
-             cm->Height = new_height;
-             vp8_alloc_compressor_data(cpi);
-             scale_and_extend_source(cpi->un_scaled_source, cpi);
-+            return 1;
-         }
-     }
- 
- #endif
-+    return 0;
- }
- 
- 
-@@ -2663,34 +2657,35 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi)
- {
-     VP8_COMMON *cm = &cpi->common;
- 
--    // Select an interval before next GF or altref
-+    /* Select an interval before next GF or altref */
-     if (!cpi->auto_gold)
--        cpi->frames_till_gf_update_due = cpi->goldfreq;
-+        cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL;
- 
-     if ((cpi->pass != 2) && cpi->frames_till_gf_update_due)
-     {
-         cpi->current_gf_interval = cpi->frames_till_gf_update_due;
- 
--        // Set the bits per frame that we should try and recover in subsequent inter frames
--        // to account for the extra GF spend... note that his does not apply for GF updates
--        // that occur coincident with a key frame as the extra cost of key frames is dealt
--        // with elsewhere.
--
-+        /* Set the bits per frame that we should try and recover in
-+         * subsequent inter frames to account for the extra GF spend...
-+         * note that his does not apply for GF updates that occur
-+         * coincident with a key frame as the extra cost of key frames is
-+         * dealt with elsewhere.
-+         */
-         cpi->gf_overspend_bits += cpi->projected_frame_size;
-         cpi->non_gf_bitrate_adjustment = cpi->gf_overspend_bits / cpi->frames_till_gf_update_due;
-     }
- 
--    // Update data structure that monitors level of reference to last GF
-+    /* Update data structure that monitors level of reference to last GF */
-     vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-     cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
- 
--    // this frame refreshes means next frames don't unless specified by user
-+    /* this frame refreshes means next frames don't unless specified by user */
-     cpi->common.frames_since_golden = 0;
- 
--    // Clear the alternate reference update pending flag.
-+    /* Clear the alternate reference update pending flag. */
-     cpi->source_alt_ref_pending = 0;
- 
--    // Set the alternate refernce frame active flag
-+    /* Set the alternate refernce frame active flag */
-     cpi->source_alt_ref_active = 1;
- 
- 
-@@ -2699,25 +2694,29 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
- {
-     VP8_COMMON *cm = &cpi->common;
- 
--    // Update the Golden frame usage counts.
-+    /* Update the Golden frame usage counts. */
-     if (cm->refresh_golden_frame)
-     {
--        // Select an interval before next GF
-+        /* Select an interval before next GF */
-         if (!cpi->auto_gold)
--            cpi->frames_till_gf_update_due = cpi->goldfreq;
-+            cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL;
- 
-         if ((cpi->pass != 2) && (cpi->frames_till_gf_update_due > 0))
-         {
-             cpi->current_gf_interval = cpi->frames_till_gf_update_due;
- 
--            // Set the bits per frame that we should try and recover in subsequent inter frames
--            // to account for the extra GF spend... note that his does not apply for GF updates
--            // that occur coincident with a key frame as the extra cost of key frames is dealt
--            // with elsewhere.
-+            /* Set the bits per frame that we should try and recover in
-+             * subsequent inter frames to account for the extra GF spend...
-+             * note that his does not apply for GF updates that occur
-+             * coincident with a key frame as the extra cost of key frames
-+             * is dealt with elsewhere.
-+             */
-             if ((cm->frame_type != KEY_FRAME) && !cpi->source_alt_ref_active)
-             {
--                // Calcluate GF bits to be recovered
--                // Projected size - av frame bits available for inter frames for clip as a whole
-+                /* Calcluate GF bits to be recovered
-+                 * Projected size - av frame bits available for inter
-+                 * frames for clip as a whole
-+                 */
-                 cpi->gf_overspend_bits += (cpi->projected_frame_size - cpi->inter_frame_target);
-             }
- 
-@@ -2725,32 +2724,25 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
- 
-         }
- 
--        // Update data structure that monitors level of reference to last GF
-+        /* Update data structure that monitors level of reference to last GF */
-         vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-         cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
- 
--        // this frame refreshes means next frames don't unless specified by user
-+        /* this frame refreshes means next frames don't unless specified by
-+         * user
-+         */
-         cm->refresh_golden_frame = 0;
-         cpi->common.frames_since_golden = 0;
- 
--        //if ( cm->frame_type == KEY_FRAME )
--        //{
-         cpi->recent_ref_frame_usage[INTRA_FRAME] = 1;
-         cpi->recent_ref_frame_usage[LAST_FRAME] = 1;
-         cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1;
-         cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1;
--        //}
--        //else
--        //{
--        //  // Carry a potrtion of count over to begining of next gf sequence
--        //  cpi->recent_ref_frame_usage[INTRA_FRAME] >>= 5;
--        //  cpi->recent_ref_frame_usage[LAST_FRAME] >>= 5;
--        //  cpi->recent_ref_frame_usage[GOLDEN_FRAME] >>= 5;
--        //  cpi->recent_ref_frame_usage[ALTREF_FRAME] >>= 5;
--        //}
--
--        // ******** Fixed Q test code only ************
--        // If we are going to use the ALT reference for the next group of frames set a flag to say so.
-+
-+        /* ******** Fixed Q test code only ************ */
-+        /* If we are going to use the ALT reference for the next group of
-+         * frames set a flag to say so.
-+         */
-         if (cpi->oxcf.fixed_q >= 0 &&
-             cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame)
-         {
-@@ -2761,14 +2753,14 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
-         if (!cpi->source_alt_ref_pending)
-             cpi->source_alt_ref_active = 0;
- 
--        // Decrement count down till next gf
-+        /* Decrement count down till next gf */
-         if (cpi->frames_till_gf_update_due > 0)
-             cpi->frames_till_gf_update_due--;
- 
-     }
-     else if (!cpi->common.refresh_alt_ref_frame)
-     {
--        // Decrement count down till next gf
-+        /* Decrement count down till next gf */
-         if (cpi->frames_till_gf_update_due > 0)
-             cpi->frames_till_gf_update_due--;
- 
-@@ -2779,21 +2771,26 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
- 
-         if (cpi->common.frames_since_golden > 1)
-         {
--            cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->count_mb_ref_frame_usage[INTRA_FRAME];
--            cpi->recent_ref_frame_usage[LAST_FRAME] += cpi->count_mb_ref_frame_usage[LAST_FRAME];
--            cpi->recent_ref_frame_usage[GOLDEN_FRAME] += cpi->count_mb_ref_frame_usage[GOLDEN_FRAME];
--            cpi->recent_ref_frame_usage[ALTREF_FRAME] += cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
-+            cpi->recent_ref_frame_usage[INTRA_FRAME] +=
-+                cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME];
-+            cpi->recent_ref_frame_usage[LAST_FRAME] +=
-+                cpi->mb.count_mb_ref_frame_usage[LAST_FRAME];
-+            cpi->recent_ref_frame_usage[GOLDEN_FRAME] +=
-+                cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME];
-+            cpi->recent_ref_frame_usage[ALTREF_FRAME] +=
-+                cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME];
-         }
-     }
- }
- 
--// This function updates the reference frame probability estimates that
--// will be used during mode selection
-+/* This function updates the reference frame probability estimates that
-+ * will be used during mode selection
-+ */
- static void update_rd_ref_frame_probs(VP8_COMP *cpi)
- {
-     VP8_COMMON *cm = &cpi->common;
- 
--    const int *const rfct = cpi->count_mb_ref_frame_usage;
-+    const int *const rfct = cpi->mb.count_mb_ref_frame_usage;
-     const int rf_intra = rfct[INTRA_FRAME];
-     const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
- 
-@@ -2810,7 +2807,9 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi)
-         cpi->prob_gf_coded    = 128;
-     }
- 
--    // update reference frame costs since we can do better than what we got last frame.
-+    /* update reference frame costs since we can do better than what we got
-+     * last frame.
-+     */
-     if (cpi->oxcf.number_of_layers == 1)
-     {
-         if (cpi->common.refresh_alt_ref_frame)
-@@ -2841,7 +2840,7 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi)
- }
- 
- 
--// 1 = key, 0 = inter
-+/* 1 = key, 0 = inter */
- static int decide_key_frame(VP8_COMP *cpi)
- {
-     VP8_COMMON *cm = &cpi->common;
-@@ -2853,43 +2852,22 @@ static int decide_key_frame(VP8_COMP *cpi)
-     if (cpi->Speed > 11)
-         return 0;
- 
--    // Clear down mmx registers
--    vp8_clear_system_state();  //__asm emms;
-+    /* Clear down mmx registers */
-+    vp8_clear_system_state();
- 
-     if ((cpi->compressor_speed == 2) && (cpi->Speed >= 5) && (cpi->sf.RD == 0))
-     {
--        double change = 1.0 * abs((int)(cpi->intra_error - cpi->last_intra_error)) / (1 + cpi->last_intra_error);
--        double change2 = 1.0 * abs((int)(cpi->prediction_error - cpi->last_prediction_error)) / (1 + cpi->last_prediction_error);
-+        double change = 1.0 * abs((int)(cpi->mb.intra_error -
-+            cpi->last_intra_error)) / (1 + cpi->last_intra_error);
-+        double change2 = 1.0 * abs((int)(cpi->mb.prediction_error -
-+            cpi->last_prediction_error)) / (1 + cpi->last_prediction_error);
-         double minerror = cm->MBs * 256;
- 
--#if 0
--
--        if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15
--            && cpi->prediction_error > minerror
--            && (change > .25 || change2 > .25))
--        {
--            FILE *f = fopen("intra_inter.stt", "a");
--
--            if (cpi->prediction_error <= 0)
--                cpi->prediction_error = 1;
--
--            fprintf(f, "%d %d %d %d %14.4f\n",
--                    cm->current_video_frame,
--                    (int) cpi->prediction_error,
--                    (int) cpi->intra_error,
--                    (int)((10 * cpi->intra_error) / cpi->prediction_error),
--                    change);
--
--            fclose(f);
--        }
--
--#endif
--
--        cpi->last_intra_error = cpi->intra_error;
--        cpi->last_prediction_error = cpi->prediction_error;
-+        cpi->last_intra_error = cpi->mb.intra_error;
-+        cpi->last_prediction_error = cpi->mb.prediction_error;
- 
--        if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15
--            && cpi->prediction_error > minerror
-+        if (10 * cpi->mb.intra_error / (1 + cpi->mb.prediction_error) < 15
-+            && cpi->mb.prediction_error > minerror
-             && (change > .25 || change2 > .25))
-         {
-             /*(change > 1.4 || change < .75)&& cpi->this_frame_percent_intra > cpi->last_frame_percent_intra + 3*/
-@@ -2900,7 +2878,7 @@ static int decide_key_frame(VP8_COMP *cpi)
- 
-     }
- 
--    // If the following are true we might as well code a key frame
-+    /* If the following are true we might as well code a key frame */
-     if (((cpi->this_frame_percent_intra == 100) &&
-          (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra + 2))) ||
-         ((cpi->this_frame_percent_intra > 95) &&
-@@ -2908,9 +2886,12 @@ static int decide_key_frame(VP8_COMP *cpi)
-     {
-         code_key_frame = 1;
-     }
--    // in addition if the following are true and this is not a golden frame then code a key frame
--    // Note that on golden frames there often seems to be a pop in intra useage anyway hence this
--    // restriction is designed to prevent spurious key frames. The Intra pop needs to be investigated.
-+    /* in addition if the following are true and this is not a golden frame
-+     * then code a key frame Note that on golden frames there often seems
-+     * to be a pop in intra useage anyway hence this restriction is
-+     * designed to prevent spurious key frames. The Intra pop needs to be
-+     * investigated.
-+     */
-     else if (((cpi->this_frame_percent_intra > 60) &&
-               (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra * 2))) ||
-              ((cpi->this_frame_percent_intra > 75) &&
-@@ -2942,7 +2923,7 @@ static void Pass1Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
- void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
- {
- 
--    // write the frame
-+    /* write the frame */
-     FILE *yframe;
-     int i;
-     char filename[255];
-@@ -2970,10 +2951,11 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
-     fclose(yframe);
- }
- #endif
--// return of 0 means drop frame
-+/* return of 0 means drop frame */
- 
--// Function to test for conditions that indeicate we should loop
--// back and recode a frame.
-+/* Function to test for conditions that indeicate we should loop
-+ * back and recode a frame.
-+ */
- static int recode_loop_test( VP8_COMP *cpi,
-                               int high_limit, int low_limit,
-                               int q, int maxq, int minq )
-@@ -2981,32 +2963,33 @@ static int recode_loop_test( VP8_COMP *cpi,
-     int force_recode = 0;
-     VP8_COMMON *cm = &cpi->common;
- 
--    // Is frame recode allowed at all
--    // Yes if either recode mode 1 is selected or mode two is selcted
--    // and the frame is a key frame. golden frame or alt_ref_frame
-+    /* Is frame recode allowed at all
-+     * Yes if either recode mode 1 is selected or mode two is selcted
-+     * and the frame is a key frame. golden frame or alt_ref_frame
-+     */
-     if ( (cpi->sf.recode_loop == 1) ||
-          ( (cpi->sf.recode_loop == 2) &&
-            ( (cm->frame_type == KEY_FRAME) ||
-              cm->refresh_golden_frame ||
-              cm->refresh_alt_ref_frame ) ) )
-     {
--        // General over and under shoot tests
-+        /* General over and under shoot tests */
-         if ( ((cpi->projected_frame_size > high_limit) && (q < maxq)) ||
-              ((cpi->projected_frame_size < low_limit) && (q > minq)) )
-         {
-             force_recode = 1;
-         }
--        // Special Constrained quality tests
-+        /* Special Constrained quality tests */
-         else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
-         {
--            // Undershoot and below auto cq level
-+            /* Undershoot and below auto cq level */
-             if ( (q > cpi->cq_target_quality) &&
-                  (cpi->projected_frame_size <
-                      ((cpi->this_frame_target * 7) >> 3)))
-             {
-                 force_recode = 1;
-             }
--            // Severe undershoot and between auto and user cq level
-+            /* Severe undershoot and between auto and user cq level */
-             else if ( (q > cpi->oxcf.cq_level) &&
-                       (cpi->projected_frame_size < cpi->min_frame_bandwidth) &&
-                       (cpi->active_best_quality > cpi->oxcf.cq_level))
-@@ -3020,21 +3003,28 @@ static int recode_loop_test( VP8_COMP *cpi,
-     return force_recode;
- }
- 
--static void update_reference_frames(VP8_COMMON *cm)
-+static void update_reference_frames(VP8_COMP *cpi)
- {
-+    VP8_COMMON *cm = &cpi->common;
-     YV12_BUFFER_CONFIG *yv12_fb = cm->yv12_fb;
- 
--    // At this point the new frame has been encoded.
--    // If any buffer copy / swapping is signaled it should be done here.
-+    /* At this point the new frame has been encoded.
-+     * If any buffer copy / swapping is signaled it should be done here.
-+     */
- 
-     if (cm->frame_type == KEY_FRAME)
-     {
--        yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FLAG | VP8_ALT_FLAG ;
-+        yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME | VP8_ALTR_FRAME ;
- 
--        yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG;
--        yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG;
-+        yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME;
-+        yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME;
- 
-         cm->alt_fb_idx = cm->gld_fb_idx = cm->new_fb_idx;
-+
-+#if CONFIG_MULTI_RES_ENCODING
-+        cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame;
-+        cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame;
-+#endif
-     }
-     else    /* For non key frames */
-     {
-@@ -3042,9 +3032,13 @@ static void update_reference_frames(VP8_COMMON *cm)
-         {
-             assert(!cm->copy_buffer_to_arf);
- 
--            cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALT_FLAG;
--            cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG;
-+            cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALTR_FRAME;
-+            cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME;
-             cm->alt_fb_idx = cm->new_fb_idx;
-+
-+#if CONFIG_MULTI_RES_ENCODING
-+            cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame;
-+#endif
-         }
-         else if (cm->copy_buffer_to_arf)
-         {
-@@ -3054,18 +3048,28 @@ static void update_reference_frames(VP8_COMMON *cm)
-             {
-                 if(cm->alt_fb_idx != cm->lst_fb_idx)
-                 {
--                    yv12_fb[cm->lst_fb_idx].flags |= VP8_ALT_FLAG;
--                    yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG;
-+                    yv12_fb[cm->lst_fb_idx].flags |= VP8_ALTR_FRAME;
-+                    yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME;
-                     cm->alt_fb_idx = cm->lst_fb_idx;
-+
-+#if CONFIG_MULTI_RES_ENCODING
-+                    cpi->current_ref_frames[ALTREF_FRAME] =
-+                        cpi->current_ref_frames[LAST_FRAME];
-+#endif
-                 }
-             }
-             else /* if (cm->copy_buffer_to_arf == 2) */
-             {
-                 if(cm->alt_fb_idx != cm->gld_fb_idx)
-                 {
--                    yv12_fb[cm->gld_fb_idx].flags |= VP8_ALT_FLAG;
--                    yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG;
-+                    yv12_fb[cm->gld_fb_idx].flags |= VP8_ALTR_FRAME;
-+                    yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME;
-                     cm->alt_fb_idx = cm->gld_fb_idx;
-+
-+#if CONFIG_MULTI_RES_ENCODING
-+                    cpi->current_ref_frames[ALTREF_FRAME] =
-+                        cpi->current_ref_frames[GOLDEN_FRAME];
-+#endif
-                 }
-             }
-         }
-@@ -3074,9 +3078,13 @@ static void update_reference_frames(VP8_COMMON *cm)
-         {
-             assert(!cm->copy_buffer_to_gf);
- 
--            cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FLAG;
--            cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG;
-+            cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME;
-+            cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME;
-             cm->gld_fb_idx = cm->new_fb_idx;
-+
-+#if CONFIG_MULTI_RES_ENCODING
-+            cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame;
-+#endif
-         }
-         else if (cm->copy_buffer_to_gf)
-         {
-@@ -3086,18 +3094,28 @@ static void update_reference_frames(VP8_COMMON *cm)
-             {
-                 if(cm->gld_fb_idx != cm->lst_fb_idx)
-                 {
--                    yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FLAG;
--                    yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG;
-+                    yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FRAME;
-+                    yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME;
-                     cm->gld_fb_idx = cm->lst_fb_idx;
-+
-+#if CONFIG_MULTI_RES_ENCODING
-+                    cpi->current_ref_frames[GOLDEN_FRAME] =
-+                        cpi->current_ref_frames[LAST_FRAME];
-+#endif
-                 }
-             }
-             else /* if (cm->copy_buffer_to_gf == 2) */
-             {
-                 if(cm->alt_fb_idx != cm->gld_fb_idx)
-                 {
--                    yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FLAG;
--                    yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG;
-+                    yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FRAME;
-+                    yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME;
-                     cm->gld_fb_idx = cm->alt_fb_idx;
-+
-+#if CONFIG_MULTI_RES_ENCODING
-+                    cpi->current_ref_frames[GOLDEN_FRAME] =
-+                        cpi->current_ref_frames[ALTREF_FRAME];
-+#endif
-                 }
-             }
-         }
-@@ -3105,14 +3123,71 @@ static void update_reference_frames(VP8_COMMON *cm)
- 
-     if (cm->refresh_last_frame)
-     {
--        cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FLAG;
--        cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FLAG;
-+        cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FRAME;
-+        cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FRAME;
-         cm->lst_fb_idx = cm->new_fb_idx;
-+
-+#if CONFIG_MULTI_RES_ENCODING
-+        cpi->current_ref_frames[LAST_FRAME] = cm->current_video_frame;
-+#endif
-     }
-+
-+#if CONFIG_TEMPORAL_DENOISING
-+    if (cpi->oxcf.noise_sensitivity)
-+    {
-+        /* we shouldn't have to keep multiple copies as we know in advance which
-+         * buffer we should start - for now to get something up and running
-+         * I've chosen to copy the buffers
-+         */
-+        if (cm->frame_type == KEY_FRAME)
-+        {
-+            int i;
-+            vp8_yv12_copy_frame(
-+                    cpi->Source,
-+                    &cpi->denoiser.yv12_running_avg[LAST_FRAME]);
-+
-+            vp8_yv12_extend_frame_borders(
-+                    &cpi->denoiser.yv12_running_avg[LAST_FRAME]);
-+
-+            for (i = 2; i < MAX_REF_FRAMES - 1; i++)
-+                vp8_yv12_copy_frame(
-+                        &cpi->denoiser.yv12_running_avg[LAST_FRAME],
-+                        &cpi->denoiser.yv12_running_avg[i]);
-+        }
-+        else /* For non key frames */
-+        {
-+            vp8_yv12_extend_frame_borders(
-+                    &cpi->denoiser.yv12_running_avg[INTRA_FRAME]);
-+
-+            if (cm->refresh_alt_ref_frame || cm->copy_buffer_to_arf)
-+            {
-+                vp8_yv12_copy_frame(
-+                        &cpi->denoiser.yv12_running_avg[INTRA_FRAME],
-+                        &cpi->denoiser.yv12_running_avg[ALTREF_FRAME]);
-+            }
-+            if (cm->refresh_golden_frame || cm->copy_buffer_to_gf)
-+            {
-+                vp8_yv12_copy_frame(
-+                        &cpi->denoiser.yv12_running_avg[INTRA_FRAME],
-+                        &cpi->denoiser.yv12_running_avg[GOLDEN_FRAME]);
-+            }
-+            if(cm->refresh_last_frame)
-+            {
-+                vp8_yv12_copy_frame(
-+                        &cpi->denoiser.yv12_running_avg[INTRA_FRAME],
-+                        &cpi->denoiser.yv12_running_avg[LAST_FRAME]);
-+            }
-+        }
-+
-+    }
-+#endif
-+
- }
- 
- void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
- {
-+    const FRAME_TYPE frame_type = cm->frame_type;
-+
-     if (cm->no_lpf)
-     {
-         cm->filter_level = 0;
-@@ -3130,6 +3205,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
-         else
-             vp8cx_pick_filter_level(cpi->Source, cpi);
- 
-+        if (cm->filter_level > 0)
-+        {
-+            vp8cx_set_alt_lf_level(cpi, cm->filter_level);
-+        }
-+
-         vpx_usec_timer_mark(&timer);
-         cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
-     }
-@@ -3141,17 +3221,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
- 
-     if (cm->filter_level > 0)
-     {
--        vp8cx_set_alt_lf_level(cpi, cm->filter_level);
--        vp8_loop_filter_frame(cm, &cpi->mb.e_mbd);
-+        vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, frame_type);
-     }
- 
-     vp8_yv12_extend_frame_borders(cm->frame_to_show);
--#if CONFIG_TEMPORAL_DENOISING
--    if (cpi->oxcf.noise_sensitivity)
--    {
--      vp8_yv12_extend_frame_borders(&cpi->denoiser.yv12_running_avg);
--    }
--#endif
-+
- }
- 
- static void encode_frame_to_data_rate
-@@ -3184,13 +3258,14 @@ static void encode_frame_to_data_rate
-     int undershoot_seen = 0;
- #endif
- 
--    int drop_mark = cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100;
-+    int drop_mark = (int)(cpi->oxcf.drop_frames_water_mark *
-+                          cpi->oxcf.optimal_buffer_level / 100);
-     int drop_mark75 = drop_mark * 2 / 3;
-     int drop_mark50 = drop_mark / 4;
-     int drop_mark25 = drop_mark / 8;
- 
- 
--    // Clear down mmx registers to allow floating point in what follows
-+    /* Clear down mmx registers to allow floating point in what follows */
-     vp8_clear_system_state();
- 
- #if CONFIG_MULTITHREAD
-@@ -3202,108 +3277,125 @@ static void encode_frame_to_data_rate
-     }
- #endif
- 
--    // Test code for segmentation of gf/arf (0,0)
--    //segmentation_test_function( cpi);
--
-     if(cpi->force_next_frame_intra)
-     {
-         cm->frame_type = KEY_FRAME;  /* delayed intra frame */
-         cpi->force_next_frame_intra = 0;
-     }
- 
--    // For an alt ref frame in 2 pass we skip the call to the second pass function that sets the target bandwidth
-+    /* For an alt ref frame in 2 pass we skip the call to the second pass
-+     * function that sets the target bandwidth
-+     */
- #if !(CONFIG_REALTIME_ONLY)
- 
-     if (cpi->pass == 2)
-     {
-         if (cpi->common.refresh_alt_ref_frame)
-         {
--            cpi->per_frame_bandwidth = cpi->twopass.gf_bits;                           // Per frame bit target for the alt ref frame
--            cpi->target_bandwidth = cpi->twopass.gf_bits * cpi->output_frame_rate;      // per second target bitrate
-+            /* Per frame bit target for the alt ref frame */
-+            cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
-+            /* per second target bitrate */
-+            cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
-+                                          cpi->output_frame_rate);
-         }
-     }
-     else
- #endif
-         cpi->per_frame_bandwidth  = (int)(cpi->target_bandwidth / cpi->output_frame_rate);
- 
--    // Default turn off buffer to buffer copying
-+    /* Default turn off buffer to buffer copying */
-     cm->copy_buffer_to_gf = 0;
-     cm->copy_buffer_to_arf = 0;
- 
--    // Clear zbin over-quant value and mode boost values.
--    cpi->zbin_over_quant = 0;
--    cpi->zbin_mode_boost = 0;
-+    /* Clear zbin over-quant value and mode boost values. */
-+    cpi->mb.zbin_over_quant = 0;
-+    cpi->mb.zbin_mode_boost = 0;
- 
--    // Enable or disable mode based tweaking of the zbin
--    // For 2 Pass Only used where GF/ARF prediction quality
--    // is above a threshold
--    cpi->zbin_mode_boost_enabled = 1;
-+    /* Enable or disable mode based tweaking of the zbin
-+     * For 2 Pass Only used where GF/ARF prediction quality
-+     * is above a threshold
-+     */
-+    cpi->mb.zbin_mode_boost_enabled = 1;
-     if (cpi->pass == 2)
-     {
-         if ( cpi->gfu_boost <= 400 )
-         {
--            cpi->zbin_mode_boost_enabled = 0;
-+            cpi->mb.zbin_mode_boost_enabled = 0;
-         }
-     }
- 
--    // Current default encoder behaviour for the altref sign bias
-+    /* Current default encoder behaviour for the altref sign bias */
-     if (cpi->source_alt_ref_active)
-         cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1;
-     else
-         cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0;
- 
--    // Check to see if a key frame is signalled
--    // For two pass with auto key frame enabled cm->frame_type may already be set, but not for one pass.
-+    /* Check to see if a key frame is signalled
-+     * For two pass with auto key frame enabled cm->frame_type may already
-+     * be set, but not for one pass.
-+     */
-     if ((cm->current_video_frame == 0) ||
-         (cm->frame_flags & FRAMEFLAGS_KEY) ||
-         (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0)))
-     {
--        // Key frame from VFW/auto-keyframe/first frame
-+        /* Key frame from VFW/auto-keyframe/first frame */
-         cm->frame_type = KEY_FRAME;
-     }
- 
--    // Set default state for segment and mode based loop filter update flags
--    cpi->mb.e_mbd.update_mb_segmentation_map = 0;
--    cpi->mb.e_mbd.update_mb_segmentation_data = 0;
--    cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
-+#if CONFIG_MULTI_RES_ENCODING
-+    /* In multi-resolution encoding, frame_type is decided by lowest-resolution
-+     * encoder. Same frame_type is adopted while encoding at other resolution.
-+     */
-+    if (cpi->oxcf.mr_encoder_id)
-+    {
-+        LOWER_RES_FRAME_INFO* low_res_frame_info
-+                        = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info;
-+
-+        cm->frame_type = low_res_frame_info->frame_type;
- 
--    // Set various flags etc to special state if it is a key frame
-+        if(cm->frame_type != KEY_FRAME)
-+        {
-+            cpi->mr_low_res_mv_avail = 1;
-+            cpi->mr_low_res_mv_avail &= !(low_res_frame_info->is_frame_dropped);
-+
-+            if (cpi->ref_frame_flags & VP8_LAST_FRAME)
-+                cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[LAST_FRAME]
-+                         == low_res_frame_info->low_res_ref_frames[LAST_FRAME]);
-+
-+            if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
-+                cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[GOLDEN_FRAME]
-+                         == low_res_frame_info->low_res_ref_frames[GOLDEN_FRAME]);
-+
-+            if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
-+                cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[ALTREF_FRAME]
-+                         == low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]);
-+        }
-+    }
-+#endif
-+
-+    /* Set various flags etc to special state if it is a key frame */
-     if (cm->frame_type == KEY_FRAME)
-     {
-         int i;
- 
--        // Reset the loop filter deltas and segmentation map
-+        // Set the loop filter deltas and segmentation map update
-         setup_features(cpi);
- 
--        // If segmentation is enabled force a map update for key frames
--        if (cpi->mb.e_mbd.segmentation_enabled)
--        {
--            cpi->mb.e_mbd.update_mb_segmentation_map = 1;
--            cpi->mb.e_mbd.update_mb_segmentation_data = 1;
--        }
--
--        // The alternate reference frame cannot be active for a key frame
-+        /* The alternate reference frame cannot be active for a key frame */
-         cpi->source_alt_ref_active = 0;
- 
--        // Reset the RD threshold multipliers to default of * 1 (128)
-+        /* Reset the RD threshold multipliers to default of * 1 (128) */
-         for (i = 0; i < MAX_MODES; i++)
-         {
--            cpi->rd_thresh_mult[i] = 128;
-+            cpi->mb.rd_thresh_mult[i] = 128;
-         }
-     }
- 
--    // Test code for segmentation
--    //if ( (cm->frame_type == KEY_FRAME) || ((cm->current_video_frame % 2) == 0))
--    //if ( (cm->current_video_frame % 2) == 0 )
--    //  enable_segmentation(cpi);
--    //else
--    //  disable_segmentation(cpi);
--
- #if 0
--    // Experimental code for lagged compress and one pass
--    // Initialise one_pass GF frames stats
--    // Update stats used for GF selection
--    //if ( cpi->pass == 0 )
-+    /* Experimental code for lagged compress and one pass
-+     * Initialise one_pass GF frames stats
-+     * Update stats used for GF selection
-+     */
-     {
-         cpi->one_pass_frame_index = cm->current_video_frame % MAX_LAG_BUFFERS;
- 
-@@ -3323,8 +3415,9 @@ static void encode_frame_to_data_rate
- 
-     if (cpi->drop_frames_allowed)
-     {
--        // The reset to decimation 0 is only done here for one pass.
--        // Once it is set two pass leaves decimation on till the next kf.
-+        /* The reset to decimation 0 is only done here for one pass.
-+         * Once it is set two pass leaves decimation on till the next kf.
-+         */
-         if ((cpi->buffer_level > drop_mark) && (cpi->decimation_factor > 0))
-             cpi->decimation_factor --;
- 
-@@ -3343,14 +3436,17 @@ static void encode_frame_to_data_rate
-         {
-             cpi->decimation_factor = 1;
-         }
--        //vpx_log("Encoder: Decimation Factor: %d \n",cpi->decimation_factor);
-     }
- 
--    // The following decimates the frame rate according to a regular pattern (i.e. to 1/2 or 2/3 frame rate)
--    // This can be used to help prevent buffer under-run in CBR mode. Alternatively it might be desirable in
--    // some situations to drop frame rate but throw more bits at each frame.
--    //
--    // Note that dropping a key frame can be problematic if spatial resampling is also active
-+    /* The following decimates the frame rate according to a regular
-+     * pattern (i.e. to 1/2 or 2/3 frame rate) This can be used to help
-+     * prevent buffer under-run in CBR mode. Alternatively it might be
-+     * desirable in some situations to drop frame rate but throw more bits
-+     * at each frame.
-+     *
-+     * Note that dropping a key frame can be problematic if spatial
-+     * resampling is also active
-+     */
-     if (cpi->decimation_factor > 0)
-     {
-         switch (cpi->decimation_factor)
-@@ -3366,8 +3462,10 @@ static void encode_frame_to_data_rate
-             break;
-         }
- 
--        // Note that we should not throw out a key frame (especially when spatial resampling is enabled).
--        if ((cm->frame_type == KEY_FRAME)) // && cpi->oxcf.allow_spatial_resampling )
-+        /* Note that we should not throw out a key frame (especially when
-+         * spatial resampling is enabled).
-+         */
-+        if ((cm->frame_type == KEY_FRAME))
-         {
-             cpi->decimation_count = cpi->decimation_factor;
-         }
-@@ -3379,6 +3477,10 @@ static void encode_frame_to_data_rate
-             if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
-                 cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
- 
-+#if CONFIG_MULTI_RES_ENCODING
-+            vp8_store_drop_frame_info(cpi);
-+#endif
-+
-             cm->current_video_frame++;
-             cpi->frames_since_key++;
- 
-@@ -3392,7 +3494,9 @@ static void encode_frame_to_data_rate
-             {
-                 unsigned int i;
- 
--                // Propagate bits saved by dropping the frame to higher layers
-+                /* Propagate bits saved by dropping the frame to higher
-+                 * layers
-+                 */
-                 for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++)
-                 {
-                     LAYER_CONTEXT *lc = &cpi->layer_context[i];
-@@ -3408,24 +3512,32 @@ static void encode_frame_to_data_rate
-         else
-             cpi->decimation_count = cpi->decimation_factor;
-     }
-+    else
-+        cpi->decimation_count = 0;
- 
--    // Decide how big to make the frame
-+    /* Decide how big to make the frame */
-     if (!vp8_pick_frame_size(cpi))
-     {
-+        /*TODO: 2 drop_frame and return code could be put together. */
-+#if CONFIG_MULTI_RES_ENCODING
-+        vp8_store_drop_frame_info(cpi);
-+#endif
-         cm->current_video_frame++;
-         cpi->frames_since_key++;
-         return;
-     }
- 
--    // Reduce active_worst_allowed_q for CBR if our buffer is getting too full.
--    // This has a knock on effect on active best quality as well.
--    // For CBR if the buffer reaches its maximum level then we can no longer
--    // save up bits for later frames so we might as well use them up
--    // on the current frame.
-+    /* Reduce active_worst_allowed_q for CBR if our buffer is getting too full.
-+     * This has a knock on effect on active best quality as well.
-+     * For CBR if the buffer reaches its maximum level then we can no longer
-+     * save up bits for later frames so we might as well use them up
-+     * on the current frame.
-+     */
-     if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
-         (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode)
-     {
--        int Adjustment = cpi->active_worst_quality / 4;       // Max adjustment is 1/4
-+        /* Max adjustment is 1/4 */
-+        int Adjustment = cpi->active_worst_quality / 4;
- 
-         if (Adjustment)
-         {
-@@ -3433,10 +3545,16 @@ static void encode_frame_to_data_rate
- 
-             if (cpi->buffer_level < cpi->oxcf.maximum_buffer_size)
-             {
--                buff_lvl_step = (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level) / Adjustment;
-+                buff_lvl_step = (int)
-+                                ((cpi->oxcf.maximum_buffer_size -
-+                                  cpi->oxcf.optimal_buffer_level) /
-+                                  Adjustment);
- 
-                 if (buff_lvl_step)
--                    Adjustment = (cpi->buffer_level - cpi->oxcf.optimal_buffer_level) / buff_lvl_step;
-+                    Adjustment = (int)
-+                                 ((cpi->buffer_level -
-+                                 cpi->oxcf.optimal_buffer_level) /
-+                                 buff_lvl_step);
-                 else
-                     Adjustment = 0;
-             }
-@@ -3448,8 +3566,9 @@ static void encode_frame_to_data_rate
-         }
-     }
- 
--    // Set an active best quality and if necessary active worst quality
--    // There is some odd behavior for one pass here that needs attention.
-+    /* Set an active best quality and if necessary active worst quality
-+     * There is some odd behavior for one pass here that needs attention.
-+     */
-     if ( (cpi->pass == 2) || (cpi->ni_frames > 150))
-     {
-         vp8_clear_system_state();
-@@ -3465,9 +3584,10 @@ static void encode_frame_to_data_rate
-                 else
-                    cpi->active_best_quality = kf_high_motion_minq[Q];
- 
--                // Special case for key frames forced because we have reached
--                // the maximum key frame interval. Here force the Q to a range
--                // based on the ambient Q to reduce the risk of popping
-+                /* Special case for key frames forced because we have reached
-+                 * the maximum key frame interval. Here force the Q to a range
-+                 * based on the ambient Q to reduce the risk of popping
-+                 */
-                 if ( cpi->this_key_frame_forced )
-                 {
-                     if ( cpi->active_best_quality > cpi->avg_frame_qindex * 7/8)
-@@ -3476,7 +3596,7 @@ static void encode_frame_to_data_rate
-                         cpi->active_best_quality = cpi->avg_frame_qindex >> 2;
-                 }
-             }
--            // One pass more conservative
-+            /* One pass more conservative */
-             else
-                cpi->active_best_quality = kf_high_motion_minq[Q];
-         }
-@@ -3484,16 +3604,17 @@ static void encode_frame_to_data_rate
-         else if (cpi->oxcf.number_of_layers==1 &&
-                 (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame))
-         {
--            // Use the lower of cpi->active_worst_quality and recent
--            // average Q as basis for GF/ARF Q limit unless last frame was
--            // a key frame.
-+            /* Use the lower of cpi->active_worst_quality and recent
-+             * average Q as basis for GF/ARF Q limit unless last frame was
-+             * a key frame.
-+             */
-             if ( (cpi->frames_since_key > 1) &&
-                (cpi->avg_frame_qindex < cpi->active_worst_quality) )
-             {
-                 Q = cpi->avg_frame_qindex;
-             }
- 
--            // For constrained quality dont allow Q less than the cq level
-+            /* For constrained quality dont allow Q less than the cq level */
-             if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
-                  (Q < cpi->cq_target_quality) )
-             {
-@@ -3509,14 +3630,14 @@ static void encode_frame_to_data_rate
-                 else
-                     cpi->active_best_quality = gf_mid_motion_minq[Q];
- 
--                // Constrained quality use slightly lower active best.
-+                /* Constrained quality use slightly lower active best. */
-                 if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY )
-                 {
-                     cpi->active_best_quality =
-                         cpi->active_best_quality * 15/16;
-                 }
-             }
--            // One pass more conservative
-+            /* One pass more conservative */
-             else
-                 cpi->active_best_quality = gf_high_motion_minq[Q];
-         }
-@@ -3524,14 +3645,16 @@ static void encode_frame_to_data_rate
-         {
-             cpi->active_best_quality = inter_minq[Q];
- 
--            // For the constant/constrained quality mode we dont want
--            // q to fall below the cq level.
-+            /* For the constant/constrained quality mode we dont want
-+             * q to fall below the cq level.
-+             */
-             if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
-                 (cpi->active_best_quality < cpi->cq_target_quality) )
-             {
--                // If we are strongly undershooting the target rate in the last
--                // frames then use the user passed in cq value not the auto
--                // cq value.
-+                /* If we are strongly undershooting the target rate in the last
-+                 * frames then use the user passed in cq value not the auto
-+                 * cq value.
-+                 */
-                 if ( cpi->rolling_actual_bits < cpi->min_frame_bandwidth )
-                     cpi->active_best_quality = cpi->oxcf.cq_level;
-                 else
-@@ -3539,26 +3662,33 @@ static void encode_frame_to_data_rate
-             }
-         }
- 
--        // If CBR and the buffer is as full then it is reasonable to allow
--        // higher quality on the frames to prevent bits just going to waste.
-+        /* If CBR and the buffer is as full then it is reasonable to allow
-+         * higher quality on the frames to prevent bits just going to waste.
-+         */
-         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-         {
--            // Note that the use of >= here elliminates the risk of a devide
--            // by 0 error in the else if clause
-+            /* Note that the use of >= here elliminates the risk of a devide
-+             * by 0 error in the else if clause
-+             */
-             if (cpi->buffer_level >= cpi->oxcf.maximum_buffer_size)
-                 cpi->active_best_quality = cpi->best_quality;
- 
-             else if (cpi->buffer_level > cpi->oxcf.optimal_buffer_level)
-             {
--                int Fraction = ((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128) / (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level);
--                int min_qadjustment = ((cpi->active_best_quality - cpi->best_quality) * Fraction) / 128;
-+                int Fraction = (int)
-+                  (((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128)
-+                  / (cpi->oxcf.maximum_buffer_size -
-+                  cpi->oxcf.optimal_buffer_level));
-+                int min_qadjustment = ((cpi->active_best_quality -
-+                                        cpi->best_quality) * Fraction) / 128;
- 
-                 cpi->active_best_quality -= min_qadjustment;
-             }
-         }
-     }
--    // Make sure constrained quality mode limits are adhered to for the first
--    // few frames of one pass encodes
-+    /* Make sure constrained quality mode limits are adhered to for the first
-+     * few frames of one pass encodes
-+     */
-     else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
-     {
-         if ( (cm->frame_type == KEY_FRAME) ||
-@@ -3572,7 +3702,7 @@ static void encode_frame_to_data_rate
-         }
-     }
- 
--    // Clip the active best and worst quality values to limits
-+    /* Clip the active best and worst quality values to limits */
-     if (cpi->active_worst_quality > cpi->worst_quality)
-         cpi->active_worst_quality = cpi->worst_quality;
- 
-@@ -3582,14 +3712,14 @@ static void encode_frame_to_data_rate
-     if ( cpi->active_worst_quality < cpi->active_best_quality )
-         cpi->active_worst_quality = cpi->active_best_quality;
- 
--    // Determine initial Q to try
-+    /* Determine initial Q to try */
-     Q = vp8_regulate_q(cpi, cpi->this_frame_target);
- 
- #if !(CONFIG_REALTIME_ONLY)
- 
--    // Set highest allowed value for Zbin over quant
-+    /* Set highest allowed value for Zbin over quant */
-     if (cm->frame_type == KEY_FRAME)
--        zbin_oq_high = 0; //ZBIN_OQ_MAX/16
-+        zbin_oq_high = 0;
-     else if ((cpi->oxcf.number_of_layers == 1) && ((cm->refresh_alt_ref_frame ||
-               (cm->refresh_golden_frame && !cpi->source_alt_ref_active))))
-     {
-@@ -3599,15 +3729,21 @@ static void encode_frame_to_data_rate
-         zbin_oq_high = ZBIN_OQ_MAX;
- #endif
- 
--    // Setup background Q adjustment for error resilient mode.
--    // For multi-layer encodes only enable this for the base layer.
--    if (cpi->cyclic_refresh_mode_enabled && (cpi->current_layer==0))
-+    /* Setup background Q adjustment for error resilient mode.
-+     * For multi-layer encodes only enable this for the base layer.
-+     */
-+    if (cpi->cyclic_refresh_mode_enabled)
-+    {
-+      if (cpi->current_layer==0)
-         cyclic_background_refresh(cpi, Q, 0);
-+      else
-+        disable_segmentation(cpi);
-+    }
- 
-     vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit);
- 
- #if !(CONFIG_REALTIME_ONLY)
--    // Limit Q range for the adaptive loop.
-+    /* Limit Q range for the adaptive loop. */
-     bottom_index = cpi->active_best_quality;
-     top_index    = cpi->active_worst_quality;
-     q_low  = cpi->active_best_quality;
-@@ -3652,11 +3788,11 @@ static void encode_frame_to_data_rate
- 
-         if (cm->frame_type == KEY_FRAME)
-         {
--            vp8_de_noise(cpi->Source, cpi->Source, l , 1,  0);
-+            vp8_de_noise(cm, cpi->Source, cpi->Source, l , 1,  0);
-         }
-         else
-         {
--            vp8_de_noise(cpi->Source, cpi->Source, l , 1,  0);
-+            vp8_de_noise(cm, cpi->Source, cpi->Source, l , 1,  0);
- 
-             src = cpi->Source->y_buffer;
- 
-@@ -3675,16 +3811,11 @@ static void encode_frame_to_data_rate
- 
-     do
-     {
--        vp8_clear_system_state();  //__asm emms;
--
--        /*
--        if(cpi->is_src_frame_alt_ref)
--            Q = 127;
--            */
-+        vp8_clear_system_state();
- 
-         vp8_set_quantizer(cpi, Q);
- 
--        // setup skip prob for costing in mode/mv decision
-+        /* setup skip prob for costing in mode/mv decision */
-         if (cpi->common.mb_no_coeff_skip)
-         {
-             cpi->prob_skip_false = cpi->base_skip_false_prob[Q];
-@@ -3728,7 +3859,9 @@ static void encode_frame_to_data_rate
-                         */
-                 }
- 
--                //as this is for cost estimate, let's make sure it does not go extreme eitehr way
-+                /* as this is for cost estimate, let's make sure it does not
-+                 * go extreme eitehr way
-+                 */
-                 if (cpi->prob_skip_false < 5)
-                     cpi->prob_skip_false = 5;
- 
-@@ -3754,7 +3887,22 @@ static void encode_frame_to_data_rate
- 
-         if (cm->frame_type == KEY_FRAME)
-         {
--            resize_key_frame(cpi);
-+            if(resize_key_frame(cpi))
-+            {
-+              /* If the frame size has changed, need to reset Q, quantizer,
-+               * and background refresh.
-+               */
-+              Q = vp8_regulate_q(cpi, cpi->this_frame_target);
-+              if (cpi->cyclic_refresh_mode_enabled)
-+              {
-+                if (cpi->current_layer==0)
-+                  cyclic_background_refresh(cpi, Q, 0);
-+                else
-+                  disable_segmentation(cpi);
-+              }
-+              vp8_set_quantizer(cpi, Q);
-+            }
-+
-             vp8_setup_key_frame(cpi);
-         }
- 
-@@ -3773,7 +3921,7 @@ static void encode_frame_to_data_rate
- 
-             if (cm->refresh_entropy_probs == 0)
-             {
--                // save a copy for later refresh
-+                /* save a copy for later refresh */
-                 vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
-             }
- 
-@@ -3781,61 +3929,52 @@ static void encode_frame_to_data_rate
- 
-             vp8_update_coef_probs(cpi);
- 
--            // transform / motion compensation build reconstruction frame
--            // +pack coef partitions
-+            /* transform / motion compensation build reconstruction frame
-+             * +pack coef partitions
-+             */
-             vp8_encode_frame(cpi);
- 
-             /* cpi->projected_frame_size is not needed for RT mode */
-         }
- #else
--        // transform / motion compensation build reconstruction frame
-+        /* transform / motion compensation build reconstruction frame */
-         vp8_encode_frame(cpi);
- 
-         cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
-         cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
- #endif
--        vp8_clear_system_state();  //__asm emms;
-+        vp8_clear_system_state();
- 
--        // Test to see if the stats generated for this frame indicate that we should have coded a key frame
--        // (assuming that we didn't)!
--        if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME)
--        {
--            int key_frame_decision = decide_key_frame(cpi);
-+        /* Test to see if the stats generated for this frame indicate that
-+         * we should have coded a key frame (assuming that we didn't)!
-+         */
- 
--            if (cpi->compressor_speed == 2)
--            {
--                /* we don't do re-encoding in realtime mode
--                 * if key frame is decided then we force it on next frame */
--                cpi->force_next_frame_intra = key_frame_decision;
--            }
-+        if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME
-+            && cpi->compressor_speed != 2)
-+        {
- #if !(CONFIG_REALTIME_ONLY)
--            else if (key_frame_decision)
-+            if (decide_key_frame(cpi))
-             {
--                // Reset all our sizing numbers and recode
-+                /* Reset all our sizing numbers and recode */
-                 cm->frame_type = KEY_FRAME;
- 
-                 vp8_pick_frame_size(cpi);
- 
--                // Clear the Alt reference frame active flag when we have a key frame
-+                /* Clear the Alt reference frame active flag when we have
-+                 * a key frame
-+                 */
-                 cpi->source_alt_ref_active = 0;
- 
--                // Reset the loop filter deltas and segmentation map
-+                // Set the loop filter deltas and segmentation map update
-                 setup_features(cpi);
- 
--                // If segmentation is enabled force a map update for key frames
--                if (cpi->mb.e_mbd.segmentation_enabled)
--                {
--                    cpi->mb.e_mbd.update_mb_segmentation_map = 1;
--                    cpi->mb.e_mbd.update_mb_segmentation_data = 1;
--                }
--
-                 vp8_restore_coding_context(cpi);
- 
-                 Q = vp8_regulate_q(cpi, cpi->this_frame_target);
- 
-                 vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit);
- 
--                // Limit Q range for the adaptive loop.
-+                /* Limit Q range for the adaptive loop. */
-                 bottom_index = cpi->active_best_quality;
-                 top_index    = cpi->active_worst_quality;
-                 q_low  = cpi->active_best_quality;
-@@ -3854,7 +3993,7 @@ static void encode_frame_to_data_rate
-         if (frame_over_shoot_limit == 0)
-             frame_over_shoot_limit = 1;
- 
--        // Are we are overshooting and up against the limit of active max Q.
-+        /* Are we are overshooting and up against the limit of active max Q. */
-         if (((cpi->pass != 2) || (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) &&
-             (Q == cpi->active_worst_quality)                     &&
-             (cpi->active_worst_quality < cpi->worst_quality)      &&
-@@ -3862,50 +4001,52 @@ static void encode_frame_to_data_rate
-         {
-             int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit;
- 
--            // If so is there any scope for relaxing it
-+            /* If so is there any scope for relaxing it */
-             while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0))
-             {
-                 cpi->active_worst_quality++;
--
--                over_size_percent = (int)(over_size_percent * 0.96);        // Assume 1 qstep = about 4% on frame size.
-+                /* Assume 1 qstep = about 4% on frame size. */
-+                over_size_percent = (int)(over_size_percent * 0.96);
-             }
- #if !(CONFIG_REALTIME_ONLY)
-             top_index = cpi->active_worst_quality;
- #endif
--            // If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop.
-+            /* If we have updated the active max Q do not call
-+             * vp8_update_rate_correction_factors() this loop.
-+             */
-             active_worst_qchanged = 1;
-         }
-         else
-             active_worst_qchanged = 0;
- 
- #if !(CONFIG_REALTIME_ONLY)
--        // Special case handling for forced key frames
-+        /* Special case handling for forced key frames */
-         if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced )
-         {
-             int last_q = Q;
-             int kf_err = vp8_calc_ss_err(cpi->Source,
-                                          &cm->yv12_fb[cm->new_fb_idx]);
- 
--            // The key frame is not good enough
-+            /* The key frame is not good enough */
-             if ( kf_err > ((cpi->ambient_err * 7) >> 3) )
-             {
--                // Lower q_high
-+                /* Lower q_high */
-                 q_high = (Q > q_low) ? (Q - 1) : q_low;
- 
--                // Adjust Q
-+                /* Adjust Q */
-                 Q = (q_high + q_low) >> 1;
-             }
--            // The key frame is much better than the previous frame
-+            /* The key frame is much better than the previous frame */
-             else if ( kf_err < (cpi->ambient_err >> 1) )
-             {
--                // Raise q_low
-+                /* Raise q_low */
-                 q_low = (Q < q_high) ? (Q + 1) : q_high;
- 
--                // Adjust Q
-+                /* Adjust Q */
-                 Q = (q_high + q_low + 1) >> 1;
-             }
- 
--            // Clamp Q to upper and lower limits:
-+            /* Clamp Q to upper and lower limits: */
-             if (Q > q_high)
-                 Q = q_high;
-             else if (Q < q_low)
-@@ -3914,7 +4055,9 @@ static void encode_frame_to_data_rate
-             Loop = Q != last_q;
-         }
- 
--        // Is the projected frame size out of range and are we allowed to attempt to recode.
-+        /* Is the projected frame size out of range and are we allowed
-+         * to attempt to recode.
-+         */
-         else if ( recode_loop_test( cpi,
-                                frame_over_shoot_limit, frame_under_shoot_limit,
-                                Q, top_index, bottom_index ) )
-@@ -3922,45 +4065,57 @@ static void encode_frame_to_data_rate
-             int last_q = Q;
-             int Retries = 0;
- 
--            // Frame size out of permitted range:
--            // Update correction factor & compute new Q to try...
-+            /* Frame size out of permitted range. Update correction factor
-+             * & compute new Q to try...
-+             */
- 
--            // Frame is too large
-+            /* Frame is too large */
-             if (cpi->projected_frame_size > cpi->this_frame_target)
-             {
--                //if ( cpi->zbin_over_quant == 0 )
--                q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value
-+                /* Raise Qlow as to at least the current value */
-+                q_low = (Q < q_high) ? (Q + 1) : q_high;
- 
--                if (cpi->zbin_over_quant > 0)            // If we are using over quant do the same for zbin_oq_low
--                    zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
-+                /* If we are using over quant do the same for zbin_oq_low */
-+                if (cpi->mb.zbin_over_quant > 0)
-+                    zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ?
-+                        (cpi->mb.zbin_over_quant + 1) : zbin_oq_high;
- 
--                //if ( undershoot_seen || (Q == MAXQ) )
-                 if (undershoot_seen)
-                 {
--                    // Update rate_correction_factor unless cpi->active_worst_quality has changed.
-+                    /* Update rate_correction_factor unless
-+                     * cpi->active_worst_quality has changed.
-+                     */
-                     if (!active_worst_qchanged)
-                         vp8_update_rate_correction_factors(cpi, 1);
- 
-                     Q = (q_high + q_low + 1) / 2;
- 
--                    // Adjust cpi->zbin_over_quant (only allowed when Q is max)
-+                    /* Adjust cpi->zbin_over_quant (only allowed when Q
-+                     * is max)
-+                     */
-                     if (Q < MAXQ)
--                        cpi->zbin_over_quant = 0;
-+                        cpi->mb.zbin_over_quant = 0;
-                     else
-                     {
--                        zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
--                        cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;
-+                        zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ?
-+                            (cpi->mb.zbin_over_quant + 1) : zbin_oq_high;
-+                        cpi->mb.zbin_over_quant =
-+                            (zbin_oq_high + zbin_oq_low) / 2;
-                     }
-                 }
-                 else
-                 {
--                    // Update rate_correction_factor unless cpi->active_worst_quality has changed.
-+                    /* Update rate_correction_factor unless
-+                     * cpi->active_worst_quality has changed.
-+                     */
-                     if (!active_worst_qchanged)
-                         vp8_update_rate_correction_factors(cpi, 0);
- 
-                     Q = vp8_regulate_q(cpi, cpi->this_frame_target);
- 
--                    while (((Q < q_low) || (cpi->zbin_over_quant < zbin_oq_low)) && (Retries < 10))
-+                    while (((Q < q_low) ||
-+                        (cpi->mb.zbin_over_quant < zbin_oq_low)) &&
-+                        (Retries < 10))
-                     {
-                         vp8_update_rate_correction_factors(cpi, 0);
-                         Q = vp8_regulate_q(cpi, cpi->this_frame_target);
-@@ -3970,47 +4125,60 @@ static void encode_frame_to_data_rate
- 
-                 overshoot_seen = 1;
-             }
--            // Frame is too small
-+            /* Frame is too small */
-             else
-             {
--                if (cpi->zbin_over_quant == 0)
--                    q_high = (Q > q_low) ? (Q - 1) : q_low; // Lower q_high if not using over quant
--                else                                    // else lower zbin_oq_high
--                    zbin_oq_high = (cpi->zbin_over_quant > zbin_oq_low) ? (cpi->zbin_over_quant - 1) : zbin_oq_low;
-+                if (cpi->mb.zbin_over_quant == 0)
-+                    /* Lower q_high if not using over quant */
-+                    q_high = (Q > q_low) ? (Q - 1) : q_low;
-+                else
-+                    /* else lower zbin_oq_high */
-+                    zbin_oq_high = (cpi->mb.zbin_over_quant > zbin_oq_low) ?
-+                        (cpi->mb.zbin_over_quant - 1) : zbin_oq_low;
- 
-                 if (overshoot_seen)
-                 {
--                    // Update rate_correction_factor unless cpi->active_worst_quality has changed.
-+                    /* Update rate_correction_factor unless
-+                     * cpi->active_worst_quality has changed.
-+                     */
-                     if (!active_worst_qchanged)
-                         vp8_update_rate_correction_factors(cpi, 1);
- 
-                     Q = (q_high + q_low) / 2;
- 
--                    // Adjust cpi->zbin_over_quant (only allowed when Q is max)
-+                    /* Adjust cpi->zbin_over_quant (only allowed when Q
-+                     * is max)
-+                     */
-                     if (Q < MAXQ)
--                        cpi->zbin_over_quant = 0;
-+                        cpi->mb.zbin_over_quant = 0;
-                     else
--                        cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;
-+                        cpi->mb.zbin_over_quant =
-+                            (zbin_oq_high + zbin_oq_low) / 2;
-                 }
-                 else
-                 {
--                    // Update rate_correction_factor unless cpi->active_worst_quality has changed.
-+                    /* Update rate_correction_factor unless
-+                     * cpi->active_worst_quality has changed.
-+                     */
-                     if (!active_worst_qchanged)
-                         vp8_update_rate_correction_factors(cpi, 0);
- 
-                     Q = vp8_regulate_q(cpi, cpi->this_frame_target);
- 
--                    // Special case reset for qlow for constrained quality.
--                    // This should only trigger where there is very substantial
--                    // undershoot on a frame and the auto cq level is above
--                    // the user passsed in value.
-+                    /* Special case reset for qlow for constrained quality.
-+                     * This should only trigger where there is very substantial
-+                     * undershoot on a frame and the auto cq level is above
-+                     * the user passsed in value.
-+                     */
-                     if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
-                          (Q < q_low) )
-                     {
-                         q_low = Q;
-                     }
- 
--                    while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10))
-+                    while (((Q > q_high) ||
-+                        (cpi->mb.zbin_over_quant > zbin_oq_high)) &&
-+                        (Retries < 10))
-                     {
-                         vp8_update_rate_correction_factors(cpi, 0);
-                         Q = vp8_regulate_q(cpi, cpi->this_frame_target);
-@@ -4021,14 +4189,16 @@ static void encode_frame_to_data_rate
-                 undershoot_seen = 1;
-             }
- 
--            // Clamp Q to upper and lower limits:
-+            /* Clamp Q to upper and lower limits: */
-             if (Q > q_high)
-                 Q = q_high;
-             else if (Q < q_low)
-                 Q = q_low;
- 
--            // Clamp cpi->zbin_over_quant
--            cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ? zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ? zbin_oq_high : cpi->zbin_over_quant;
-+            /* Clamp cpi->zbin_over_quant */
-+            cpi->mb.zbin_over_quant = (cpi->mb.zbin_over_quant < zbin_oq_low) ?
-+                zbin_oq_low : (cpi->mb.zbin_over_quant > zbin_oq_high) ?
-+                    zbin_oq_high : cpi->mb.zbin_over_quant;
- 
-             Loop = Q != last_q;
-         }
-@@ -4051,30 +4221,20 @@ static void encode_frame_to_data_rate
-     while (Loop == 1);
- 
- #if 0
--    // Experimental code for lagged and one pass
--    // Update stats used for one pass GF selection
--    {
--        /*
--            int frames_so_far;
--            double frame_intra_error;
--            double frame_coded_error;
--            double frame_pcnt_inter;
--            double frame_pcnt_motion;
--            double frame_mvr;
--            double frame_mvr_abs;
--            double frame_mvc;
--            double frame_mvc_abs;
--        */
--
-+    /* Experimental code for lagged and one pass
-+     * Update stats used for one pass GF selection
-+     */
-+    {
-         cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_coded_error = (double)cpi->prediction_error;
-         cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_intra_error = (double)cpi->intra_error;
-         cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_pcnt_inter = (double)(100 - cpi->this_frame_percent_intra) / 100.0;
-     }
- #endif
- 
--    // Special case code to reduce pulsing when key frames are forced at a
--    // fixed interval. Note the reconstruction error if it is the frame before
--    // the force key frame
-+    /* Special case code to reduce pulsing when key frames are forced at a
-+     * fixed interval. Note the reconstruction error if it is the frame before
-+     * the force key frame
-+     */
-     if ( cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0) )
-     {
-         cpi->ambient_err = vp8_calc_ss_err(cpi->Source,
-@@ -4113,13 +4273,38 @@ static void encode_frame_to_data_rate
-         }
-     }
- 
-+    /* Count last ref frame 0,0 usage on current encoded frame. */
-+    {
-+        int mb_row;
-+        int mb_col;
-+        /* Point to beginning of MODE_INFO arrays. */
-+        MODE_INFO *tmp = cm->mi;
-+
-+        cpi->zeromv_count = 0;
-+
-+        if(cm->frame_type != KEY_FRAME)
-+        {
-+            for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
-+            {
-+                for (mb_col = 0; mb_col < cm->mb_cols; mb_col ++)
-+                {
-+                    if(tmp->mbmi.mode == ZEROMV)
-+                        cpi->zeromv_count++;
-+                    tmp++;
-+                }
-+                tmp++;
-+            }
-+        }
-+    }
-+
- #if CONFIG_MULTI_RES_ENCODING
-     vp8_cal_dissimilarity(cpi);
- #endif
- 
--    // Update the GF useage maps.
--    // This is done after completing the compression of a frame when all
--    // modes etc. are finalized but before loop filter
-+    /* Update the GF useage maps.
-+     * This is done after completing the compression of a frame when all
-+     * modes etc. are finalized but before loop filter
-+     */
-     if (cpi->oxcf.number_of_layers == 1)
-         vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
- 
-@@ -4134,9 +4319,10 @@ static void encode_frame_to_data_rate
-     }
- #endif
- 
--    // For inter frames the current default behavior is that when
--    // cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer
--    // This is purely an encoder decision at present.
-+    /* For inter frames the current default behavior is that when
-+     * cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer
-+     * This is purely an encoder decision at present.
-+     */
-     if (!cpi->oxcf.error_resilient_mode && cm->refresh_golden_frame)
-         cm->copy_buffer_to_arf  = 2;
-     else
-@@ -4147,7 +4333,8 @@ static void encode_frame_to_data_rate
- #if CONFIG_MULTITHREAD
-     if (cpi->b_multi_threaded)
-     {
--        sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */
-+        /* start loopfilter in separate thread */
-+        sem_post(&cpi->h_event_start_lpf);
-         cpi->b_lpf_running = 1;
-     }
-     else
-@@ -4156,7 +4343,7 @@ static void encode_frame_to_data_rate
-         vp8_loopfilter_frame(cpi, cm);
-     }
- 
--    update_reference_frames(cm);
-+    update_reference_frames(cpi);
- 
- #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
-     if (cpi->oxcf.error_resilient_mode)
-@@ -4171,7 +4358,7 @@ static void encode_frame_to_data_rate
-         sem_wait(&cpi->h_event_end_lpf);
- #endif
- 
--    // build the bitstream
-+    /* build the bitstream */
-     vp8_pack_bitstream(cpi, dest, dest_end, size);
- 
- #if CONFIG_MULTITHREAD
-@@ -4187,7 +4374,7 @@ static void encode_frame_to_data_rate
-      * needed in motion search besides loopfilter */
-     cm->last_frame_type = cm->frame_type;
- 
--    // Update rate control heuristics
-+    /* Update rate control heuristics */
-     cpi->total_byte_count += (*size);
-     cpi->projected_frame_size = (*size) << 3;
- 
-@@ -4208,18 +4395,21 @@ static void encode_frame_to_data_rate
-         vp8_adjust_key_frame_context(cpi);
-     }
- 
--    // Keep a record of ambient average Q.
-+    /* Keep a record of ambient average Q. */
-     if (cm->frame_type != KEY_FRAME)
-         cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2;
- 
--    // Keep a record from which we can calculate the average Q excluding GF updates and key frames
-+    /* Keep a record from which we can calculate the average Q excluding
-+     * GF updates and key frames
-+     */
-     if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) ||
-         (!cm->refresh_golden_frame && !cm->refresh_alt_ref_frame)))
-     {
-         cpi->ni_frames++;
- 
--        // Calculate the average Q for normal inter frames (not key or GFU
--        // frames).
-+        /* Calculate the average Q for normal inter frames (not key or GFU
-+         * frames).
-+         */
-         if ( cpi->pass == 2 )
-         {
-             cpi->ni_tot_qi += Q;
-@@ -4227,81 +4417,62 @@ static void encode_frame_to_data_rate
-         }
-         else
-         {
--            // Damp value for first few frames
-+            /* Damp value for first few frames */
-             if (cpi->ni_frames > 150 )
-             {
-                 cpi->ni_tot_qi += Q;
-                 cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames);
-             }
--            // For one pass, early in the clip ... average the current frame Q
--            // value with the worstq entered by the user as a dampening measure
-+            /* For one pass, early in the clip ... average the current frame Q
-+             * value with the worstq entered by the user as a dampening measure
-+             */
-             else
-             {
-                 cpi->ni_tot_qi += Q;
-                 cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2;
-             }
- 
--            // If the average Q is higher than what was used in the last frame
--            // (after going through the recode loop to keep the frame size within range)
--            // then use the last frame value - 1.
--            // The -1 is designed to stop Q and hence the data rate, from progressively
--            // falling away during difficult sections, but at the same time reduce the number of
--            // itterations around the recode loop.
-+            /* If the average Q is higher than what was used in the last
-+             * frame (after going through the recode loop to keep the frame
-+             * size within range) then use the last frame value - 1. The -1
-+             * is designed to stop Q and hence the data rate, from
-+             * progressively falling away during difficult sections, but at
-+             * the same time reduce the number of itterations around the
-+             * recode loop.
-+             */
-             if (Q > cpi->ni_av_qi)
-                 cpi->ni_av_qi = Q - 1;
-         }
-     }
- 
--#if 0
--
--    // If the frame was massively oversize and we are below optimal buffer level drop next frame
--    if ((cpi->drop_frames_allowed) &&
--        (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
--        (cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) &&
--        (cpi->projected_frame_size > (4 * cpi->this_frame_target)))
--    {
--        cpi->drop_frame = 1;
--    }
--
--#endif
--
--    // Set the count for maximum consecutive dropped frames based upon the ratio of
--    // this frame size to the target average per frame bandwidth.
--    // (cpi->av_per_frame_bandwidth > 0) is just a sanity check to prevent / 0.
--    if (cpi->drop_frames_allowed && (cpi->av_per_frame_bandwidth > 0))
--    {
--        cpi->max_drop_count = cpi->projected_frame_size / cpi->av_per_frame_bandwidth;
--
--        if (cpi->max_drop_count > cpi->max_consec_dropped_frames)
--            cpi->max_drop_count = cpi->max_consec_dropped_frames;
--    }
--
--    // Update the buffer level variable.
--    // Non-viewable frames are a special case and are treated as pure overhead.
-+    /* Update the buffer level variable. */
-+    /* Non-viewable frames are a special case and are treated as pure overhead. */
-     if ( !cm->show_frame )
-         cpi->bits_off_target -= cpi->projected_frame_size;
-     else
-         cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size;
- 
--    // Clip the buffer level to the maximum specified buffer size
-+    /* Clip the buffer level to the maximum specified buffer size */
-     if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
-         cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
- 
--    // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass.
-+    /* Rolling monitors of whether we are over or underspending used to
-+     * help regulate min and Max Q in two pass.
-+     */
-     cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
-     cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4;
-     cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32;
-     cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32;
- 
--    // Actual bits spent
-+    /* Actual bits spent */
-     cpi->total_actual_bits += cpi->projected_frame_size;
- 
--    // Debug stats
-+    /* Debug stats */
-     cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size);
- 
-     cpi->buffer_level = cpi->bits_off_target;
- 
--    // Propagate values to higher temporal layers
-+    /* Propagate values to higher temporal layers */
-     if (cpi->oxcf.number_of_layers > 1)
-     {
-         unsigned int i;
-@@ -4309,12 +4480,13 @@ static void encode_frame_to_data_rate
-         for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++)
-         {
-             LAYER_CONTEXT *lc = &cpi->layer_context[i];
--            int bits_off_for_this_layer = lc->target_bandwidth / lc->frame_rate
--                                                - cpi->projected_frame_size;
-+            int bits_off_for_this_layer =
-+               (int)(lc->target_bandwidth / lc->frame_rate -
-+                     cpi->projected_frame_size);
- 
-             lc->bits_off_target += bits_off_for_this_layer;
- 
--            // Clip buffer level to maximum buffer size for the layer
-+            /* Clip buffer level to maximum buffer size for the layer */
-             if (lc->bits_off_target > lc->maximum_buffer_size)
-                 lc->bits_off_target = lc->maximum_buffer_size;
- 
-@@ -4324,7 +4496,9 @@ static void encode_frame_to_data_rate
-         }
-     }
- 
--    // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames
-+    /* Update bits left to the kf and gf groups to account for overshoot
-+     * or undershoot on these frames
-+     */
-     if (cm->frame_type == KEY_FRAME)
-     {
-         cpi->twopass.kf_group_bits += cpi->this_frame_target - cpi->projected_frame_size;
-@@ -4357,7 +4531,7 @@ static void encode_frame_to_data_rate
-             cpi->last_skip_false_probs[0] = cpi->prob_skip_false;
-             cpi->last_skip_probs_q[0] = cm->base_qindex;
- 
--            //update the baseline
-+            /* update the baseline */
-             cpi->base_skip_false_prob[cm->base_qindex] = cpi->prob_skip_false;
- 
-         }
-@@ -4367,7 +4541,7 @@ static void encode_frame_to_data_rate
-     {
-         FILE *f = fopen("tmp.stt", "a");
- 
--        vp8_clear_system_state();  //__asm emms;
-+        vp8_clear_system_state();
- 
-         if (cpi->twopass.total_left_stats.coded_error != 0.0)
-             fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
-@@ -4383,7 +4557,6 @@ static void encode_frame_to_data_rate
-                        cpi->active_best_quality, cpi->active_worst_quality,
-                        cpi->ni_av_qi, cpi->cq_target_quality,
-                        cpi->zbin_over_quant,
--                       //cpi->avg_frame_qindex, cpi->zbin_over_quant,
-                        cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
-                        cm->frame_type, cpi->gfu_boost,
-                        cpi->twopass.est_max_qcorrection_factor,
-@@ -4406,7 +4579,6 @@ static void encode_frame_to_data_rate
-                        cpi->active_best_quality, cpi->active_worst_quality,
-                        cpi->ni_av_qi, cpi->cq_target_quality,
-                        cpi->zbin_over_quant,
--                       //cpi->avg_frame_qindex, cpi->zbin_over_quant,
-                        cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
-                        cm->frame_type, cpi->gfu_boost,
-                        cpi->twopass.est_max_qcorrection_factor,
-@@ -4436,10 +4608,6 @@ static void encode_frame_to_data_rate
- 
- #endif
- 
--    // If this was a kf or Gf note the Q
--    if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
--        cm->last_kf_gf_q = cm->base_qindex;
--
-     if (cm->refresh_golden_frame == 1)
-         cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN;
-     else
-@@ -4451,49 +4619,55 @@ static void encode_frame_to_data_rate
-         cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF;
- 
- 
--    if (cm->refresh_last_frame & cm->refresh_golden_frame) // both refreshed
-+    if (cm->refresh_last_frame & cm->refresh_golden_frame)
-+        /* both refreshed */
-         cpi->gold_is_last = 1;
--    else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other
-+    else if (cm->refresh_last_frame ^ cm->refresh_golden_frame)
-+        /* 1 refreshed but not the other */
-         cpi->gold_is_last = 0;
- 
--    if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) // both refreshed
-+    if (cm->refresh_last_frame & cm->refresh_alt_ref_frame)
-+        /* both refreshed */
-         cpi->alt_is_last = 1;
--    else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) // 1 refreshed but not the other
-+    else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame)
-+        /* 1 refreshed but not the other */
-         cpi->alt_is_last = 0;
- 
--    if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) // both refreshed
-+    if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame)
-+        /* both refreshed */
-         cpi->gold_is_alt = 1;
--    else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other
-+    else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame)
-+        /* 1 refreshed but not the other */
-         cpi->gold_is_alt = 0;
- 
--    cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
-+    cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME;
- 
-     if (cpi->gold_is_last)
--        cpi->ref_frame_flags &= ~VP8_GOLD_FLAG;
-+        cpi->ref_frame_flags &= ~VP8_GOLD_FRAME;
- 
-     if (cpi->alt_is_last)
--        cpi->ref_frame_flags &= ~VP8_ALT_FLAG;
-+        cpi->ref_frame_flags &= ~VP8_ALTR_FRAME;
- 
-     if (cpi->gold_is_alt)
--        cpi->ref_frame_flags &= ~VP8_ALT_FLAG;
-+        cpi->ref_frame_flags &= ~VP8_ALTR_FRAME;
- 
- 
-     if (!cpi->oxcf.error_resilient_mode)
-     {
-         if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME))
--            // Update the alternate reference frame stats as appropriate.
-+            /* Update the alternate reference frame stats as appropriate. */
-             update_alt_ref_frame_stats(cpi);
-         else
--            // Update the Golden frame stats as appropriate.
-+            /* Update the Golden frame stats as appropriate. */
-             update_golden_frame_stats(cpi);
-     }
- 
-     if (cm->frame_type == KEY_FRAME)
-     {
--        // Tell the caller that the frame was coded as a key frame
-+        /* Tell the caller that the frame was coded as a key frame */
-         *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY;
- 
--        // As this frame is a key frame  the next defaults to an inter frame.
-+        /* As this frame is a key frame  the next defaults to an inter frame. */
-         cm->frame_type = INTER_FRAME;
- 
-         cpi->last_frame_percent_intra = 100;
-@@ -4505,20 +4679,24 @@ static void encode_frame_to_data_rate
-         cpi->last_frame_percent_intra = cpi->this_frame_percent_intra;
-     }
- 
--    // Clear the one shot update flags for segmentation map and mode/ref loop filter deltas.
-+    /* Clear the one shot update flags for segmentation map and mode/ref
-+     * loop filter deltas.
-+     */
-     cpi->mb.e_mbd.update_mb_segmentation_map = 0;
-     cpi->mb.e_mbd.update_mb_segmentation_data = 0;
-     cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
- 
- 
--    // Dont increment frame counters if this was an altref buffer update not a real frame
-+    /* Dont increment frame counters if this was an altref buffer update
-+     * not a real frame
-+     */
-     if (cm->show_frame)
-     {
-         cm->current_video_frame++;
-         cpi->frames_since_key++;
-     }
- 
--    // reset to normal state now that we are done.
-+    /* reset to normal state now that we are done. */
- 
- 
- 
-@@ -4534,67 +4712,11 @@ static void encode_frame_to_data_rate
-     }
- #endif
- 
--    // DEBUG
--    //vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show);
-+    /* DEBUG */
-+    /* vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show); */
- 
- 
- }
--
--
--static void check_gf_quality(VP8_COMP *cpi)
--{
--    VP8_COMMON *cm = &cpi->common;
--    int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols);
--    int gf_ref_usage_pct = (cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] * 100) / (cm->mb_rows * cm->mb_cols);
--    int last_ref_zz_useage = (cpi->inter_zz_count * 100) / (cm->mb_rows * cm->mb_cols);
--
--    // Gf refresh is not currently being signalled
--    if (cpi->gf_update_recommended == 0)
--    {
--        if (cpi->common.frames_since_golden > 7)
--        {
--            // Low use of gf
--            if ((gf_active_pct < 10) || ((gf_active_pct + gf_ref_usage_pct) < 15))
--            {
--                // ...but last frame zero zero usage is reasonbable so a new gf might be appropriate
--                if (last_ref_zz_useage >= 25)
--                {
--                    cpi->gf_bad_count ++;
--
--                    if (cpi->gf_bad_count >= 8)   // Check that the condition is stable
--                    {
--                        cpi->gf_update_recommended = 1;
--                        cpi->gf_bad_count = 0;
--                    }
--                }
--                else
--                    cpi->gf_bad_count = 0;        // Restart count as the background is not stable enough
--            }
--            else
--                cpi->gf_bad_count = 0;            // Gf useage has picked up so reset count
--        }
--    }
--    // If the signal is set but has not been read should we cancel it.
--    else if (last_ref_zz_useage < 15)
--    {
--        cpi->gf_update_recommended = 0;
--        cpi->gf_bad_count = 0;
--    }
--
--#if 0
--    {
--        FILE *f = fopen("gfneeded.stt", "a");
--        fprintf(f, "%10d %10d %10d %10d %10ld \n",
--                cm->current_video_frame,
--                cpi->common.frames_since_golden,
--                gf_active_pct, gf_ref_usage_pct,
--                cpi->gf_update_recommended);
--        fclose(f);
--    }
--
--#endif
--}
--
- #if !(CONFIG_REALTIME_ONLY)
- static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned char * dest_end, unsigned int *frame_flags)
- {
-@@ -4614,7 +4736,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
- }
- #endif
- 
--//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.
-+/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
- #if HAVE_NEON
- extern void vp8_push_neon(int64_t *store);
- extern void vp8_pop_neon(int64_t *store);
-@@ -4721,7 +4843,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-     cpi->source = NULL;
- 
- #if !(CONFIG_REALTIME_ONLY)
--    // Should we code an alternate reference frame
-+    /* Should we code an alternate reference frame */
-     if (cpi->oxcf.error_resilient_mode == 0 &&
-         cpi->oxcf.play_alternate &&
-         cpi->source_alt_ref_pending)
-@@ -4742,7 +4864,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-             cm->refresh_golden_frame = 0;
-             cm->refresh_last_frame = 0;
-             cm->show_frame = 0;
--            cpi->source_alt_ref_pending = 0;  // Clear Pending alt Ref flag.
-+            /* Clear Pending alt Ref flag. */
-+            cpi->source_alt_ref_pending = 0;
-             cpi->is_src_frame_alt_ref = 0;
-         }
-     }
-@@ -4814,7 +4937,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-         cpi->last_end_time_stamp_seen = cpi->source->ts_start;
-     }
- 
--    // adjust frame rates based on timestamps given
-+    /* adjust frame rates based on timestamps given */
-     if (cm->show_frame)
-     {
-         int64_t this_duration;
-@@ -4832,9 +4955,10 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-             this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen;
-             last_duration = cpi->last_end_time_stamp_seen
-                             - cpi->last_time_stamp_seen;
--            // do a step update if the duration changes by 10%
-+            /* do a step update if the duration changes by 10% */
-             if (last_duration)
--                step = ((this_duration - last_duration) * 10 / last_duration);
-+                step = (int)(((this_duration - last_duration) *
-+                            10 / last_duration));
-         }
- 
-         if (this_duration)
-@@ -4849,7 +4973,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-                  * frame rate. If we haven't seen 1 second yet, then average
-                  * over the whole interval seen.
-                  */
--                interval = cpi->source->ts_end - cpi->first_time_stamp_ever;
-+                interval = (double)(cpi->source->ts_end -
-+                                    cpi->first_time_stamp_ever);
-                 if(interval > 10000000.0)
-                     interval = 10000000;
- 
-@@ -4862,9 +4987,9 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
- 
-             if (cpi->oxcf.number_of_layers > 1)
-             {
--                int i;
-+                unsigned int i;
- 
--                // Update frame rates for each layer
-+                /* Update frame rates for each layer */
-                 for (i=0; i<cpi->oxcf.number_of_layers; i++)
-                 {
-                     LAYER_CONTEXT *lc = &cpi->layer_context[i];
-@@ -4886,7 +5011,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
- 
-         update_layer_contexts (cpi);
- 
--        // Restore layer specific context & set frame rate
-+        /* Restore layer specific context & set frame rate */
-         layer = cpi->oxcf.layer_id[
-                             cm->current_video_frame % cpi->oxcf.periodicity];
-         restore_layer_context (cpi, layer);
-@@ -4895,12 +5020,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
- 
-     if (cpi->compressor_speed == 2)
-     {
--        if (cpi->oxcf.number_of_layers == 1)
--            check_gf_quality(cpi);
-         vpx_usec_timer_start(&tsctimer);
-         vpx_usec_timer_start(&ticktimer);
-     }
- 
-+    cpi->lf_zeromv_pct = (cpi->zeromv_count * 100)/cm->MBs;
-+
- #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
-     {
-         int i;
-@@ -4924,11 +5049,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-     }
- #endif
- 
--    // start with a 0 size frame
-+    /* start with a 0 size frame */
-     *size = 0;
- 
--    // Clear down mmx registers
--    vp8_clear_system_state();  //__asm emms;
-+    /* Clear down mmx registers */
-+    vp8_clear_system_state();
- 
-     cm->frame_type = INTER_FRAME;
-     cm->frame_flags = *frame_flags;
-@@ -4937,7 +5062,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
- 
-     if (cm->refresh_alt_ref_frame)
-     {
--        //cm->refresh_golden_frame = 1;
-         cm->refresh_golden_frame = 0;
-         cm->refresh_last_frame = 0;
-     }
-@@ -4982,7 +5106,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-         vpx_usec_timer_mark(&tsctimer);
-         vpx_usec_timer_mark(&ticktimer);
- 
--        duration = vpx_usec_timer_elapsed(&ticktimer);
-+        duration = (int)(vpx_usec_timer_elapsed(&ticktimer));
-         duration2 = (unsigned int)((double)duration / 2);
- 
-         if (cm->frame_type != KEY_FRAME)
-@@ -4995,7 +5119,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
- 
-         if (duration2)
-         {
--            //if(*frame_flags!=1)
-             {
- 
-                 if (cpi->avg_pick_mode_time == 0)
-@@ -5012,8 +5135,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-         vpx_memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc));
-     }
- 
--    // Save the contexts separately for alt ref, gold and last.
--    // (TODO jbb -> Optimize this with pointers to avoid extra copies. )
-+    /* Save the contexts separately for alt ref, gold and last. */
-+    /* (TODO jbb -> Optimize this with pointers to avoid extra copies. ) */
-     if(cm->refresh_alt_ref_frame)
-         vpx_memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc));
- 
-@@ -5023,12 +5146,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-     if(cm->refresh_last_frame)
-         vpx_memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc));
- 
--    // if its a dropped frame honor the requests on subsequent frames
-+    /* if its a dropped frame honor the requests on subsequent frames */
-     if (*size > 0)
-     {
-         cpi->droppable = !frame_is_reference(cpi);
- 
--        // return to normal state
-+        /* return to normal state */
-         cm->refresh_entropy_probs = 1;
-         cm->refresh_alt_ref_frame = 0;
-         cm->refresh_golden_frame = 0;
-@@ -5037,7 +5160,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
- 
-     }
- 
--    // Save layer specific state
-+    /* Save layer specific state */
-     if (cpi->oxcf.number_of_layers > 1)
-         save_layer_context (cpi);
- 
-@@ -5062,14 +5185,14 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
- 
-             if (cpi->b_calculate_psnr)
-             {
--                double ye,ue,ve;
-+                uint64_t ye,ue,ve;
-                 double frame_psnr;
-                 YV12_BUFFER_CONFIG      *orig = cpi->Source;
-                 YV12_BUFFER_CONFIG      *recon = cpi->common.frame_to_show;
-                 int y_samples = orig->y_height * orig->y_width ;
-                 int uv_samples = orig->uv_height * orig->uv_width ;
-                 int t_samples = y_samples + 2 * uv_samples;
--                int64_t sq_error, sq_error2;
-+                double sq_error, sq_error2;
- 
-                 ye = calc_plane_error(orig->y_buffer, orig->y_stride,
-                   recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height);
-@@ -5080,13 +5203,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-                 ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
-                   recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height);
- 
--                sq_error = ye + ue + ve;
-+                sq_error = (double)(ye + ue + ve);
- 
-                 frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error);
- 
--                cpi->total_y += vp8_mse2psnr(y_samples, 255.0, ye);
--                cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, ue);
--                cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve);
-+                cpi->total_y += vp8_mse2psnr(y_samples, 255.0, (double)ye);
-+                cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, (double)ue);
-+                cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, (double)ve);
-                 cpi->total_sq_error += sq_error;
-                 cpi->total  += frame_psnr;
- #if CONFIG_POSTPROC
-@@ -5095,7 +5218,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-                     double frame_psnr2, frame_ssim2 = 0;
-                     double weight = 0;
- 
--                    vp8_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0);
-+                    vp8_deblock(cm, cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0);
-                     vp8_clear_system_state();
- 
-                     ye = calc_plane_error(orig->y_buffer, orig->y_stride,
-@@ -5107,13 +5230,16 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
-                     ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
-                       pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height);
- 
--                    sq_error2 = ye + ue + ve;
-+                    sq_error2 = (double)(ye + ue + ve);
- 
-                     frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error2);
- 
--                    cpi->totalp_y += vp8_mse2psnr(y_samples, 255.0, ye);
--                    cpi->totalp_u += vp8_mse2psnr(uv_samples, 255.0, ue);
--                    cpi->totalp_v += vp8_mse2psnr(uv_samples, 255.0, ve);
-+                    cpi->totalp_y += vp8_mse2psnr(y_samples,
-+                                                  255.0, (double)ye);
-+                    cpi->totalp_u += vp8_mse2psnr(uv_samples,
-+                                                  255.0, (double)ue);
-+                    cpi->totalp_v += vp8_mse2psnr(uv_samples,
-+                                                  255.0, (double)ve);
-                     cpi->total_sq_error2 += sq_error2;
-                     cpi->totalp  += frame_psnr2;
- 
-@@ -5125,7 +5251,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
- 
-                     if (cpi->oxcf.number_of_layers > 1)
-                     {
--                         int i;
-+                         unsigned int i;
- 
-                          for (i=cpi->current_layer;
-                                        i<cpi->oxcf.number_of_layers; i++)
-@@ -5153,7 +5279,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
- 
-                 if (cpi->oxcf.number_of_layers > 1)
-                 {
--                    int i;
-+                    unsigned int i;
- 
-                     for (i=cpi->current_layer;
-                          i<cpi->oxcf.number_of_layers; i++)
-@@ -5251,7 +5377,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla
-             ret = -1;
-         }
- 
--#endif //!CONFIG_POSTPROC
-+#endif
-         vp8_clear_system_state();
-         return ret;
-     }
-@@ -5260,29 +5386,53 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla
- int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4])
- {
-     signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
-+    int internal_delta_q[MAX_MB_SEGMENTS];
-+    const int range = 63;
-+    int i;
- 
-+    // This method is currently incompatible with the cyclic refresh method
-+    if ( cpi->cyclic_refresh_mode_enabled )
-+        return -1;
-+
-+    // Check number of rows and columns match
-     if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
-         return -1;
- 
-+    // Range check the delta Q values and convert the external Q range values
-+    // to internal ones.
-+    if ( (abs(delta_q[0]) > range) || (abs(delta_q[1]) > range) ||
-+         (abs(delta_q[2]) > range) || (abs(delta_q[3]) > range) )
-+        return -1;
-+
-+    // Range check the delta lf values
-+    if ( (abs(delta_lf[0]) > range) || (abs(delta_lf[1]) > range) ||
-+         (abs(delta_lf[2]) > range) || (abs(delta_lf[3]) > range) )
-+        return -1;
-+
-     if (!map)
-     {
-         disable_segmentation(cpi);
-         return 0;
-     }
- 
--    // Set the segmentation Map
-+    // Translate the external delta q values to internal values.
-+    for ( i = 0; i < MAX_MB_SEGMENTS; i++ )
-+        internal_delta_q[i] =
-+            ( delta_q[i] >= 0 ) ? q_trans[delta_q[i]] : -q_trans[-delta_q[i]];
-+
-+    /* Set the segmentation Map */
-     set_segmentation_map(cpi, map);
- 
--    // Activate segmentation.
-+    /* Activate segmentation. */
-     enable_segmentation(cpi);
- 
--    // Set up the quant segment data
--    feature_data[MB_LVL_ALT_Q][0] = delta_q[0];
--    feature_data[MB_LVL_ALT_Q][1] = delta_q[1];
--    feature_data[MB_LVL_ALT_Q][2] = delta_q[2];
--    feature_data[MB_LVL_ALT_Q][3] = delta_q[3];
-+    /* Set up the quant segment data */
-+    feature_data[MB_LVL_ALT_Q][0] = internal_delta_q[0];
-+    feature_data[MB_LVL_ALT_Q][1] = internal_delta_q[1];
-+    feature_data[MB_LVL_ALT_Q][2] = internal_delta_q[2];
-+    feature_data[MB_LVL_ALT_Q][3] = internal_delta_q[3];
- 
--    // Set up the loop segment data s
-+    /* Set up the loop segment data s */
-     feature_data[MB_LVL_ALT_LF][0] = delta_lf[0];
-     feature_data[MB_LVL_ALT_LF][1] = delta_lf[1];
-     feature_data[MB_LVL_ALT_LF][2] = delta_lf[2];
-@@ -5293,8 +5443,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne
-     cpi->segment_encode_breakout[2] = threshold[2];
-     cpi->segment_encode_breakout[3] = threshold[3];
- 
--    // Initialise the feature data structure
--    // SEGMENT_DELTADATA    0, SEGMENT_ABSDATA      1
-+    /* Initialise the feature data structure */
-     set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA);
- 
-     return 0;
-@@ -5316,7 +5465,6 @@ int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, uns
-     }
-     else
-     {
--        //cpi->active_map_enabled = 0;
-         return -1 ;
-     }
- }
-@@ -5346,7 +5494,9 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest)
-     unsigned char *src = source->y_buffer;
-     unsigned char *dst = dest->y_buffer;
- 
--    // Loop through the Y plane raw and reconstruction data summing (square differences)
-+    /* Loop through the Y plane raw and reconstruction data summing
-+     * (square differences)
-+     */
-     for (i = 0; i < source->y_height; i += 16)
-     {
-         for (j = 0; j < source->y_width; j += 16)
-diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
-index 900141b..fb8ad35 100644
---- a/vp8/encoder/onyx_int.h
-+++ b/vp8/encoder/onyx_int.h
-@@ -25,6 +25,7 @@
- #include "vp8/common/threading.h"
- #include "vpx_ports/mem.h"
- #include "vpx/internal/vpx_codec_internal.h"
-+#include "vpx/vp8.h"
- #include "mcomp.h"
- #include "vp8/common/findnearmv.h"
- #include "lookahead.h"
-@@ -32,7 +33,6 @@
- #include "vp8/encoder/denoising.h"
- #endif
- 
--//#define SPEEDSTATS 1
- #define MIN_GF_INTERVAL             4
- #define DEFAULT_GF_INTERVAL         7
- 
-@@ -43,7 +43,7 @@
- #define AF_THRESH   25
- #define AF_THRESH2  100
- #define ARF_DECAY_THRESH 12
--#define MAX_MODES 20
-+
- 
- #define MIN_THRESHMULT  32
- #define MAX_THRESHMULT  512
-@@ -73,7 +73,6 @@ typedef struct
-     int mvcosts[2][MVvals+1];
- 
- #ifdef MODE_STATS
--    // Stats
-     int y_modes[5];
-     int uv_modes[4];
-     int b_modes[10];
-@@ -232,22 +231,22 @@ enum
- 
- typedef struct
- {
--    // Layer configuration
-+    /* Layer configuration */
-     double frame_rate;
-     int target_bandwidth;
- 
--    // Layer specific coding parameters
--    int starting_buffer_level;
--    int optimal_buffer_level;
--    int maximum_buffer_size;
--    int starting_buffer_level_in_ms;
--    int optimal_buffer_level_in_ms;
--    int maximum_buffer_size_in_ms;
-+    /* Layer specific coding parameters */
-+    int64_t starting_buffer_level;
-+    int64_t optimal_buffer_level;
-+    int64_t maximum_buffer_size;
-+    int64_t starting_buffer_level_in_ms;
-+    int64_t optimal_buffer_level_in_ms;
-+    int64_t maximum_buffer_size_in_ms;
- 
-     int avg_frame_size_for_layer;
- 
--    int buffer_level;
--    int bits_off_target;
-+    int64_t buffer_level;
-+    int64_t bits_off_target;
- 
-     int64_t total_actual_bits;
-     int total_target_vs_actual;
-@@ -307,7 +306,7 @@ typedef struct VP8_COMP
- 
-     MACROBLOCK mb;
-     VP8_COMMON common;
--    vp8_writer bc[9]; // one boolcoder for each partition
-+    vp8_writer bc[9]; /* one boolcoder for each partition */
- 
-     VP8_CONFIG oxcf;
- 
-@@ -321,16 +320,20 @@ typedef struct VP8_COMP
-     YV12_BUFFER_CONFIG scaled_source;
-     YV12_BUFFER_CONFIG *last_frame_unscaled_source;
- 
--    int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref
--    int source_alt_ref_active;  // an alt ref frame has been encoded and is usable
--
--    int is_src_frame_alt_ref;   // source of frame to encode is an exact copy of an alt ref frame
-+    /* frame in src_buffers has been identified to be encoded as an alt ref */
-+    int source_alt_ref_pending;
-+    /* an alt ref frame has been encoded and is usable */
-+    int source_alt_ref_active;
-+    /* source of frame to encode is an exact copy of an alt ref frame */
-+    int is_src_frame_alt_ref;
- 
--    int gold_is_last; // golden frame same as last frame ( short circuit gold searches)
--    int alt_is_last;  // Alt reference frame same as last ( short circuit altref search)
--    int gold_is_alt;  // don't do both alt and gold search ( just do gold).
-+    /* golden frame same as last frame ( short circuit gold searches) */
-+    int gold_is_last;
-+    /* Alt reference frame same as last ( short circuit altref search) */
-+    int alt_is_last;
-+    /* don't do both alt and gold search ( just do gold). */
-+    int gold_is_alt;
- 
--    //int refresh_alt_ref_frame;
-     YV12_BUFFER_CONFIG pick_lf_lvl_frame;
- 
-     TOKENEXTRA *tok;
-@@ -342,55 +345,62 @@ typedef struct VP8_COMP
-     unsigned int this_key_frame_forced;
-     unsigned int next_key_frame_forced;
- 
--    // Ambient reconstruction err target for force key frames
-+    /* Ambient reconstruction err target for force key frames */
-     int ambient_err;
- 
-     unsigned int mode_check_freq[MAX_MODES];
--    unsigned int mode_test_hit_counts[MAX_MODES];
-     unsigned int mode_chosen_counts[MAX_MODES];
--    unsigned int mbs_tested_so_far;
- 
--    int rd_thresh_mult[MAX_MODES];
-     int rd_baseline_thresh[MAX_MODES];
--    int rd_threshes[MAX_MODES];
- 
-     int RDMULT;
-     int RDDIV ;
- 
-     CODING_CONTEXT coding_context;
- 
--    // Rate targetting variables
--    int64_t prediction_error;
-+    /* Rate targetting variables */
-     int64_t last_prediction_error;
--    int64_t intra_error;
-     int64_t last_intra_error;
- 
-     int this_frame_target;
-     int projected_frame_size;
--    int last_q[2];                   // Separate values for Intra/Inter
-+    int last_q[2];                   /* Separate values for Intra/Inter */
- 
-     double rate_correction_factor;
-     double key_frame_rate_correction_factor;
-     double gf_rate_correction_factor;
- 
--    int frames_till_gf_update_due;      // Count down till next GF
--    int current_gf_interval;          // GF interval chosen when we coded the last GF
-+    /* Count down till next GF */
-+    int frames_till_gf_update_due;
-+
-+    /* GF interval chosen when we coded the last GF */
-+    int current_gf_interval;
- 
--    int gf_overspend_bits;            // Total bits overspent becasue of GF boost (cumulative)
-+    /* Total bits overspent becasue of GF boost (cumulative) */
-+    int gf_overspend_bits;
- 
--    int non_gf_bitrate_adjustment;     // Used in the few frames following a GF to recover the extra bits spent in that GF
-+    /* Used in the few frames following a GF to recover the extra bits
-+     * spent in that GF
-+     */
-+    int non_gf_bitrate_adjustment;
- 
--    int kf_overspend_bits;            // Extra bits spent on key frames that need to be recovered on inter frames
--    int kf_bitrate_adjustment;        // Current number of bit s to try and recover on each inter frame.
-+    /* Extra bits spent on key frames that need to be recovered */
-+    int kf_overspend_bits;
-+
-+    /* Current number of bit s to try and recover on each inter frame. */
-+    int kf_bitrate_adjustment;
-     int max_gf_interval;
-     int baseline_gf_interval;
--    int active_arnr_frames;           // <= cpi->oxcf.arnr_max_frames
-+    int active_arnr_frames;
- 
-     int64_t key_frame_count;
-     int prior_key_frame_distance[KEY_FRAME_CONTEXT];
--    int per_frame_bandwidth;          // Current section per frame bandwidth target
--    int av_per_frame_bandwidth;        // Average frame size target for clip
--    int min_frame_bandwidth;          // Minimum allocation that should be used for any frame
-+    /* Current section per frame bandwidth target */
-+    int per_frame_bandwidth;
-+    /* Average frame size target for clip */
-+    int av_per_frame_bandwidth;
-+    /* Minimum allocation that should be used for any frame */
-+    int min_frame_bandwidth;
-     int inter_frame_target;
-     double output_frame_rate;
-     int64_t last_time_stamp_seen;
-@@ -402,12 +412,6 @@ typedef struct VP8_COMP
-     int ni_frames;
-     int avg_frame_qindex;
- 
--    int zbin_over_quant;
--    int zbin_mode_boost;
--    int zbin_mode_boost_enabled;
--    int last_zbin_over_quant;
--    int last_zbin_mode_boost;
--
-     int64_t total_byte_count;
- 
-     int buffered_mode;
-@@ -415,7 +419,7 @@ typedef struct VP8_COMP
-     double frame_rate;
-     double ref_frame_rate;
-     int64_t buffer_level;
--    int bits_off_target;
-+    int64_t bits_off_target;
- 
-     int rolling_target_bits;
-     int rolling_actual_bits;
-@@ -424,7 +428,7 @@ typedef struct VP8_COMP
-     int long_rolling_actual_bits;
- 
-     int64_t total_actual_bits;
--    int total_target_vs_actual;        // debug stats
-+    int total_target_vs_actual; /* debug stats */
- 
-     int worst_quality;
-     int active_worst_quality;
-@@ -433,22 +437,9 @@ typedef struct VP8_COMP
- 
-     int cq_target_quality;
- 
--    int drop_frames_allowed;          // Are we permitted to drop frames?
--    int drop_frame;                  // Drop this frame?
--    int drop_count;                  // How many frames have we dropped?
--    int max_drop_count;               // How many frames should we drop?
--    int max_consec_dropped_frames;     // Limit number of consecutive frames that can be dropped.
--
--
--    int ymode_count [VP8_YMODES];        /* intra MB type cts this frame */
--    int uv_mode_count[VP8_UV_MODES];       /* intra MB type cts this frame */
-+    int drop_frames_allowed; /* Are we permitted to drop frames? */
-+    int drop_frame;          /* Drop this frame? */
- 
--    unsigned int MVcount [2] [MVvals];  /* (row,col) MV cts this frame */
--
--    unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
--
--    //DECLARE_ALIGNED(16, int, coef_counts_backup [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]);   //not used any more
--    //save vp8_tree_probs_from_distribution result for each frame to avoid repeat calculation
-     vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-     char update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
- 
-@@ -462,7 +453,7 @@ typedef struct VP8_COMP
-     struct vpx_codec_pkt_list  *output_pkt_list;
- 
- #if 0
--    // Experimental code for lagged and one pass
-+    /* Experimental code for lagged and one pass */
-     ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS];
-     int one_pass_frame_index;
- #endif
-@@ -470,17 +461,14 @@ typedef struct VP8_COMP
-     int decimation_factor;
-     int decimation_count;
- 
--    // for real time encoding
--    int avg_encode_time;              //microsecond
--    int avg_pick_mode_time;            //microsecond
-+    /* for real time encoding */
-+    int avg_encode_time;     /* microsecond */
-+    int avg_pick_mode_time;  /* microsecond */
-     int Speed;
--    unsigned int cpu_freq;           //Mhz
-     int compressor_speed;
- 
--    int interquantizer;
-     int auto_gold;
-     int auto_adjust_gold_quantizer;
--    int goldfreq;
-     int auto_worst_q;
-     int cpu_used;
-     int pass;
-@@ -494,29 +482,28 @@ typedef struct VP8_COMP
-     int last_skip_probs_q[3];
-     int recent_ref_frame_usage[MAX_REF_FRAMES];
- 
--    int count_mb_ref_frame_usage[MAX_REF_FRAMES];
-     int this_frame_percent_intra;
-     int last_frame_percent_intra;
- 
-     int ref_frame_flags;
- 
-     SPEED_FEATURES sf;
--    int error_bins[1024];
- 
--    // Data used for real time conferencing mode to help determine if it would be good to update the gf
--    int inter_zz_count;
--    int gf_bad_count;
--    int gf_update_recommended;
--    int skip_true_count;
-+    /* Count ZEROMV on all reference frames. */
-+    int zeromv_count;
-+    int lf_zeromv_pct;
- 
-     unsigned char *segmentation_map;
--    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            // Segment data (can be deltas or absolute values)
--    int  segment_encode_breakout[MAX_MB_SEGMENTS];                    // segment threashold for encode breakout
-+    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
-+    int  segment_encode_breakout[MAX_MB_SEGMENTS];
- 
-     unsigned char *active_map;
-     unsigned int active_map_enabled;
--    // Video conferencing cyclic refresh mode flags etc
--    // This is a mode designed to clean up the background over time in live encoding scenarious. It uses segmentation
-+
-+    /* Video conferencing cyclic refresh mode flags. This is a mode
-+     * designed to clean up the background over time in live encoding
-+     * scenarious. It uses segmentation.
-+     */
-     int cyclic_refresh_mode_enabled;
-     int cyclic_refresh_mode_max_mbs_perframe;
-     int cyclic_refresh_mode_index;
-@@ -524,7 +511,7 @@ typedef struct VP8_COMP
-     signed char *cyclic_refresh_map;
- 
- #if CONFIG_MULTITHREAD
--    // multithread data
-+    /* multithread data */
-     int * mt_current_mb_col;
-     int mt_sync_range;
-     int b_multi_threaded;
-@@ -538,7 +525,7 @@ typedef struct VP8_COMP
-     ENCODETHREAD_DATA *en_thread_data;
-     LPFTHREAD_DATA lpf_thread_data;
- 
--    //events
-+    /* events */
-     sem_t *h_event_start_encoding;
-     sem_t h_event_end_encoding;
-     sem_t h_event_start_lpf;
-@@ -549,7 +536,6 @@ typedef struct VP8_COMP
-     unsigned int partition_sz[MAX_PARTITIONS];
-     unsigned char *partition_d[MAX_PARTITIONS];
-     unsigned char *partition_d_end[MAX_PARTITIONS];
--    // end of multithread data
- 
- 
-     fractional_mv_step_fp *find_fractional_mv_step;
-@@ -557,10 +543,10 @@ typedef struct VP8_COMP
-     vp8_refining_search_fn_t refining_search_sad;
-     vp8_diamond_search_fn_t diamond_search_sad;
-     vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS];
--    unsigned int time_receive_data;
--    unsigned int time_compress_data;
--    unsigned int time_pick_lpf;
--    unsigned int time_encode_mb_row;
-+    uint64_t time_receive_data;
-+    uint64_t time_compress_data;
-+    uint64_t time_pick_lpf;
-+    uint64_t time_encode_mb_row;
- 
-     int base_skip_false_prob[128];
- 
-@@ -594,16 +580,16 @@ typedef struct VP8_COMP
-         int gf_decay_rate;
-         int static_scene_max_gf_interval;
-         int kf_bits;
--        int gf_group_error_left;           // Remaining error from uncoded frames in a gf group. Two pass use only
--
--        // Projected total bits available for a key frame group of frames
-+        /* Remaining error from uncoded frames in a gf group. */
-+        int gf_group_error_left;
-+        /* Projected total bits available for a key frame group of frames */
-         int64_t kf_group_bits;
--
--        // Error score of frames still to be coded in kf group
-+        /* Error score of frames still to be coded in kf group */
-         int64_t kf_group_error_left;
--
--        int gf_group_bits;                // Projected Bits available for a group of frames including 1 GF or ARF
--        int gf_bits;                     // Bits for the golden frame or ARF - 2 pass only
-+        /* Projected Bits available for a group including 1 GF or ARF */
-+        int gf_group_bits;
-+        /* Bits for the golden frame or ARF */
-+        int gf_bits;
-         int alt_extra_bits;
-         double est_max_qcorrection_factor;
-     } twopass;
-@@ -641,24 +627,25 @@ typedef struct VP8_COMP
- #endif
-     int b_calculate_psnr;
- 
--    // Per MB activity measurement
-+    /* Per MB activity measurement */
-     unsigned int activity_avg;
-     unsigned int * mb_activity_map;
--    int * mb_norm_activity_map;
- 
--    // Record of which MBs still refer to last golden frame either
--    // directly or through 0,0
-+    /* Record of which MBs still refer to last golden frame either
-+     * directly or through 0,0
-+     */
-     unsigned char *gf_active_flags;
-     int gf_active_count;
- 
-     int output_partition;
- 
--    //Store last frame's MV info for next frame MV prediction
-+    /* Store last frame's MV info for next frame MV prediction */
-     int_mv *lfmv;
-     int *lf_ref_frame_sign_bias;
-     int *lf_ref_frame;
- 
--    int force_next_frame_intra; /* force next frame to intra when kf_auto says so */
-+    /* force next frame to intra when kf_auto says so */
-+    int force_next_frame_intra;
- 
-     int droppable;
- 
-@@ -666,7 +653,7 @@ typedef struct VP8_COMP
-     VP8_DENOISER denoiser;
- #endif
- 
--    // Coding layer state variables
-+    /* Coding layer state variables */
-     unsigned int current_layer;
-     LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS];
- 
-@@ -687,17 +674,29 @@ typedef struct VP8_COMP
- #if CONFIG_MULTI_RES_ENCODING
-     /* Number of MBs per row at lower-resolution level */
-     int    mr_low_res_mb_cols;
-+    /* Indicate if lower-res mv info is available */
-+    unsigned char  mr_low_res_mv_avail;
-+    /* The frame number of each reference frames */
-+    unsigned int current_ref_frames[MAX_REF_FRAMES];
- #endif
- 
-+    struct rd_costs_struct
-+    {
-+        int mvcosts[2][MVvals+1];
-+        int mvsadcosts[2][MVfpvals+1];
-+        int mbmode_cost[2][MB_MODE_COUNT];
-+        int intra_uv_mode_cost[2][MB_MODE_COUNT];
-+        int bmode_costs[10][10][10];
-+        int inter_bmode_costs[B_MODE_COUNT];
-+        int token_costs[BLOCK_TYPES][COEF_BANDS]
-+        [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
-+    } rd_costs;
- } VP8_COMP;
- 
--void control_data_rate(VP8_COMP *cpi);
--
--void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char *dest_end, unsigned long *size);
--
--int rd_cost_intra_mb(MACROBLOCKD *x);
-+void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
-+                        unsigned char *dest_end, unsigned long *size);
- 
--void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);
-+void vp8_tokenize_mb(VP8_COMP *, MACROBLOCK *, TOKENEXTRA **);
- 
- void vp8_set_speed_features(VP8_COMP *cpi);
- 
-diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
-index dafb645..673de2b 100644
---- a/vp8/encoder/pickinter.c
-+++ b/vp8/encoder/pickinter.c
-@@ -61,7 +61,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
- }
- 
- 
--static int get_inter_mbpred_error(MACROBLOCK *mb,
-+int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
-                                   const vp8_variance_fn_ptr_t *vfp,
-                                   unsigned int *sse,
-                                   int_mv this_mv)
-@@ -132,7 +132,7 @@ static int pick_intra4x4block(
-     MACROBLOCK *x,
-     int ib,
-     B_PREDICTION_MODE *best_mode,
--    unsigned int *mode_costs,
-+    const int *mode_costs,
- 
-     int *bestrate,
-     int *bestdistortion)
-@@ -141,20 +141,24 @@ static int pick_intra4x4block(
-     BLOCKD *b = &x->e_mbd.block[ib];
-     BLOCK *be = &x->block[ib];
-     int dst_stride = x->e_mbd.dst.y_stride;
--    unsigned char *base_dst = x->e_mbd.dst.y_buffer;
-+    unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
-     B_PREDICTION_MODE mode;
--    int best_rd = INT_MAX;       // 1<<30
-+    int best_rd = INT_MAX;
-     int rate;
-     int distortion;
- 
--    for (mode = B_DC_PRED; mode <= B_HE_PRED /*B_HU_PRED*/; mode++)
-+    unsigned char *Above = dst - dst_stride;
-+    unsigned char *yleft = dst - 1;
-+    unsigned char top_left = Above[-1];
-+
-+    for (mode = B_DC_PRED; mode <= B_HE_PRED; mode++)
-     {
-         int this_rd;
- 
-         rate = mode_costs[mode];
--        vp8_intra4x4_predict
--                     (base_dst + b->offset, dst_stride,
--                      mode, b->predictor, 16);
-+
-+        vp8_intra4x4_predict(Above, yleft, dst_stride, mode,
-+                             b->predictor, 16, top_left);
-         distortion = get_prediction_error(be, b);
-         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
- 
-@@ -167,7 +171,7 @@ static int pick_intra4x4block(
-         }
-     }
- 
--    b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode);
-+    b->bmi.as_mode = *best_mode;
-     vp8_encode_intra4x4block(x, ib);
-     return best_rd;
- }
-@@ -185,7 +189,7 @@ static int pick_intra4x4mby_modes
-     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
-     int error;
-     int distortion = 0;
--    unsigned int *bmode_costs;
-+    const int *bmode_costs;
- 
-     intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
- 
-@@ -214,8 +218,9 @@ static int pick_intra4x4mby_modes
-         distortion += d;
-         mic->bmi[i].as_mode = best_mode;
- 
--        // Break out case where we have already exceeded best so far value
--        // that was passed in
-+        /* Break out case where we have already exceeded best so far value
-+         * that was passed in
-+         */
-         if (distortion > *best_dist)
-             break;
-     }
-@@ -384,15 +389,16 @@ static void pick_intra_mbuv_mode(MACROBLOCK *mb)
- 
- }
- 
--static void update_mvcount(VP8_COMP *cpi, MACROBLOCKD *xd, int_mv *best_ref_mv)
-+static void update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
- {
-+    MACROBLOCKD *xd = &x->e_mbd;
-     /* Split MV modes currently not supported when RD is nopt enabled,
-      * therefore, only need to modify MVcount in NEWMV mode. */
-     if (xd->mode_info_context->mbmi.mode == NEWMV)
-     {
--        cpi->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row -
-+        x->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row -
-                                       best_ref_mv->as_mv.row) >> 1)]++;
--        cpi->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col -
-+        x->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col -
-                                       best_ref_mv->as_mv.col) >> 1)]++;
-     }
- }
-@@ -405,10 +411,9 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim,
-                                MB_PREDICTION_MODE *parent_mode,
-                                int_mv *parent_ref_mv, int mb_row, int mb_col)
- {
--    LOWER_RES_INFO* store_mode_info
--                          = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info;
-+    LOWER_RES_MB_INFO* store_mode_info
-+                          = ((LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info)->mb_info;
-     unsigned int parent_mb_index;
--    //unsigned int parent_mb_index = map_640x480_to_320x240[mb_row][mb_col];
- 
-     /* Consider different down_sampling_factor.  */
-     {
-@@ -440,7 +445,6 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim,
-         /* Consider different down_sampling_factor.
-          * The result can be rounded to be more precise, but it takes more time.
-          */
--        //int round = cpi->oxcf.mr_down_sampling_factor.den/2;
-         (*parent_ref_mv).as_mv.row = store_mode_info[parent_mb_index].mv.as_mv.row
-                                   *cpi->oxcf.mr_down_sampling_factor.num
-                                   /cpi->oxcf.mr_down_sampling_factor.den;
-@@ -455,10 +459,18 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim,
- 
- static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x)
- {
--    if (sse < x->encode_breakout)
-+    MACROBLOCKD *xd = &x->e_mbd;
-+
-+    unsigned int threshold = (xd->block[0].dequant[1]
-+        * xd->block[0].dequant[1] >>4);
-+
-+    if(threshold < x->encode_breakout)
-+        threshold = x->encode_breakout;
-+
-+    if (sse < threshold )
-     {
--        // Check u and v to make sure skip is ok
--        int sse2 = 0;
-+        /* Check u and v to make sure skip is ok */
-+        unsigned int sse2 = 0;
- 
-         sse2 = VP8_UVSSE(x);
- 
-@@ -469,7 +481,8 @@ static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x)
-     }
- }
- 
--static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, VP8_COMP *cpi, MACROBLOCK *x)
-+static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2,
-+                               VP8_COMP *cpi, MACROBLOCK *x, int rd_adj)
- {
-     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
-     int_mv mv = x->e_mbd.mode_info_context->mbmi.mv;
-@@ -486,16 +499,70 @@ static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, V
- 
-     if((this_mode != NEWMV) ||
-         !(cpi->sf.half_pixel_search) || cpi->common.full_pixel==1)
--        *distortion2 = get_inter_mbpred_error(x,
-+        *distortion2 = vp8_get_inter_mbpred_error(x,
-                                               &cpi->fn_ptr[BLOCK_16X16],
-                                               sse, mv);
- 
-     this_rd = RDCOST(x->rdmult, x->rddiv, rate2, *distortion2);
- 
-+    /* Adjust rd to bias to ZEROMV */
-+    if(this_mode == ZEROMV)
-+    {
-+        /* Bias to ZEROMV on LAST_FRAME reference when it is available. */
-+        if ((cpi->ref_frame_flags & VP8_LAST_FRAME &
-+            cpi->common.refresh_last_frame)
-+            && x->e_mbd.mode_info_context->mbmi.ref_frame != LAST_FRAME)
-+            rd_adj = 100;
-+
-+        // rd_adj <= 100
-+        this_rd = ((int64_t)this_rd) * rd_adj / 100;
-+    }
-+
-     check_for_encode_breakout(*sse, x);
-     return this_rd;
- }
- 
-+static void calculate_zeromv_rd_adjustment(VP8_COMP *cpi, MACROBLOCK *x,
-+                                    int *rd_adjustment)
-+{
-+    MODE_INFO *mic = x->e_mbd.mode_info_context;
-+    int_mv mv_l, mv_a, mv_al;
-+    int local_motion_check = 0;
-+
-+    if (cpi->lf_zeromv_pct > 40)
-+    {
-+        /* left mb */
-+        mic -= 1;
-+        mv_l = mic->mbmi.mv;
-+
-+        if (mic->mbmi.ref_frame != INTRA_FRAME)
-+            if( abs(mv_l.as_mv.row) < 8 && abs(mv_l.as_mv.col) < 8)
-+                local_motion_check++;
-+
-+        /* above-left mb */
-+        mic -= x->e_mbd.mode_info_stride;
-+        mv_al = mic->mbmi.mv;
-+
-+        if (mic->mbmi.ref_frame != INTRA_FRAME)
-+            if( abs(mv_al.as_mv.row) < 8 && abs(mv_al.as_mv.col) < 8)
-+                local_motion_check++;
-+
-+        /* above mb */
-+        mic += 1;
-+        mv_a = mic->mbmi.mv;
-+
-+        if (mic->mbmi.ref_frame != INTRA_FRAME)
-+            if( abs(mv_a.as_mv.row) < 8 && abs(mv_a.as_mv.col) < 8)
-+                local_motion_check++;
-+
-+        if (((!x->e_mbd.mb_to_top_edge || !x->e_mbd.mb_to_left_edge)
-+            && local_motion_check >0) ||  local_motion_check >2 )
-+            *rd_adjustment = 80;
-+        else if (local_motion_check > 0)
-+            *rd_adjustment = 90;
-+    }
-+}
-+
- void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-                          int recon_uvoffset, int *returnrate,
-                          int *returndistortion, int *returnintra, int mb_row,
-@@ -513,7 +580,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-     MB_PREDICTION_MODE this_mode;
-     int num00;
-     int mdcounts[4];
--    int best_rd = INT_MAX; // 1 << 30;
-+    int best_rd = INT_MAX;
-+    int rd_adjustment = 100;
-     int best_intra_rd = INT_MAX;
-     int mode_index;
-     int rate;
-@@ -523,14 +591,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-     int best_mode_index = 0;
-     unsigned int sse = INT_MAX, best_rd_sse = INT_MAX;
- #if CONFIG_TEMPORAL_DENOISING
--    unsigned int zero_mv_sse = 0, best_sse = INT_MAX;
-+    unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX;
- #endif
- 
-     int_mv mvp;
- 
-     int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
-     int saddone=0;
--    int sr=0;    //search range got from mv_pred(). It uses step_param levels. (0-7)
-+    /* search range got from mv_pred(). It uses step_param levels. (0-7) */
-+    int sr=0;
- 
-     unsigned char *plane[4][3];
-     int ref_frame_map[4];
-@@ -539,12 +608,39 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
- #if CONFIG_MULTI_RES_ENCODING
-     int dissim = INT_MAX;
-     int parent_ref_frame = 0;
-+    int parent_ref_valid = cpi->oxcf.mr_encoder_id && cpi->mr_low_res_mv_avail;
-     int_mv parent_ref_mv;
-     MB_PREDICTION_MODE parent_mode = 0;
- 
--    if (cpi->oxcf.mr_encoder_id)
-+    if (parent_ref_valid)
-+    {
-+        int parent_ref_flag;
-+
-         get_lower_res_motion_info(cpi, xd, &dissim, &parent_ref_frame,
-                                   &parent_mode, &parent_ref_mv, mb_row, mb_col);
-+
-+        /* TODO(jkoleszar): The references available (ref_frame_flags) to the
-+         * lower res encoder should match those available to this encoder, but
-+         * there seems to be a situation where this mismatch can happen in the
-+         * case of frame dropping and temporal layers. For example,
-+         * GOLD being disallowed in ref_frame_flags, but being returned as
-+         * parent_ref_frame.
-+         *
-+         * In this event, take the conservative approach of disabling the
-+         * lower res info for this MB.
-+         */
-+        parent_ref_flag = 0;
-+        if (parent_ref_frame == LAST_FRAME)
-+            parent_ref_flag = (cpi->ref_frame_flags & VP8_LAST_FRAME);
-+        else if (parent_ref_frame == GOLDEN_FRAME)
-+            parent_ref_flag = (cpi->ref_frame_flags & VP8_GOLD_FRAME);
-+        else if (parent_ref_frame == ALTREF_FRAME)
-+            parent_ref_flag = (cpi->ref_frame_flags & VP8_ALTR_FRAME);
-+
-+        //assert(!parent_ref_frame || parent_ref_flag);
-+        if (parent_ref_frame && !parent_ref_flag)
-+            parent_ref_valid = 0;
-+    }
- #endif
- 
-     mode_mv = mode_mv_sb[sign_bias];
-@@ -553,6 +649,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-     vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
- 
-     /* Setup search priorities */
-+#if CONFIG_MULTI_RES_ENCODING
-+    if (parent_ref_valid && parent_ref_frame && dissim < 8)
-+    {
-+        ref_frame_map[0] = -1;
-+        ref_frame_map[1] = parent_ref_frame;
-+        ref_frame_map[2] = -1;
-+        ref_frame_map[3] = -1;
-+    } else
-+#endif
-     get_reference_search_order(cpi, ref_frame_map);
- 
-     /* Check to see if there is at least 1 valid reference frame that we need
-@@ -574,22 +679,29 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
- 
-     get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
- 
--    cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame
-+    /* Count of the number of MBs tested so far this frame */
-+    x->mbs_tested_so_far++;
- 
-     *returnintra = INT_MAX;
-     x->skip = 0;
- 
-     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
- 
--    // if we encode a new mv this is important
--    // find the best new motion vector
-+    /* If the frame has big static background and current MB is in low
-+     * motion area, its mode decision is biased to ZEROMV mode.
-+     */
-+    calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment);
-+
-+    /* if we encode a new mv this is important
-+     * find the best new motion vector
-+     */
-     for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
-     {
-         int frame_cost;
-         int this_rd = INT_MAX;
-         int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
- 
--        if (best_rd <= cpi->rd_threshes[mode_index])
-+        if (best_rd <= x->rd_threshes[mode_index])
-             continue;
- 
-         if (this_ref_frame < 0)
-@@ -597,23 +709,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
- 
-         x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
- 
--#if CONFIG_MULTI_RES_ENCODING
--        if (cpi->oxcf.mr_encoder_id)
--        {
--            /* If parent MB is intra, child MB is intra. */
--            if (!parent_ref_frame && this_ref_frame)
--                continue;
--
--            /* If parent MB is inter, and it is unlikely there are multiple
--             * objects in parent MB, we use parent ref frame as child MB's
--             * ref frame. */
--            if (parent_ref_frame && dissim < 8
--                && parent_ref_frame != this_ref_frame)
--                continue;
--        }
--#endif
--
--        // everything but intra
-+        /* everything but intra */
-         if (x->e_mbd.mode_info_context->mbmi.ref_frame)
-         {
-             x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
-@@ -628,7 +724,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             }
- 
- #if CONFIG_MULTI_RES_ENCODING
--            if (cpi->oxcf.mr_encoder_id)
-+            if (parent_ref_valid)
-             {
-                 if (vp8_mode_order[mode_index] == NEARESTMV &&
-                     mode_mv[NEARESTMV].as_int ==0)
-@@ -638,7 +734,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-                     continue;
- 
-                 if (vp8_mode_order[mode_index] == NEWMV && parent_mode == ZEROMV
--                    && best_ref_mv.as_int==0) //&& dissim==0
-+                    && best_ref_mv.as_int==0)
-                     continue;
-                 else if(vp8_mode_order[mode_index] == NEWMV && dissim==0
-                     && best_ref_mv.as_int==parent_ref_mv.as_int)
-@@ -650,22 +746,22 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-         /* Check to see if the testing frequency for this mode is at its max
-          * If so then prevent it from being tested and increase the threshold
-          * for its testing */
--        if (cpi->mode_test_hit_counts[mode_index] &&
-+        if (x->mode_test_hit_counts[mode_index] &&
-                                          (cpi->mode_check_freq[mode_index] > 1))
-         {
--            if (cpi->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] *
--                                         cpi->mode_test_hit_counts[mode_index]))
-+            if (x->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] *
-+                                         x->mode_test_hit_counts[mode_index]))
-             {
-                 /* Increase the threshold for coding this mode to make it less
-                  * likely to be chosen */
--                cpi->rd_thresh_mult[mode_index] += 4;
-+                x->rd_thresh_mult[mode_index] += 4;
- 
--                if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
--                    cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-+                if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
-+                    x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
- 
--                cpi->rd_threshes[mode_index] =
-+                x->rd_threshes[mode_index] =
-                                  (cpi->rd_baseline_thresh[mode_index] >> 7) *
--                                 cpi->rd_thresh_mult[mode_index];
-+                                 x->rd_thresh_mult[mode_index];
-                 continue;
-             }
-         }
-@@ -673,7 +769,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-         /* We have now reached the point where we are going to test the current
-          * mode so increment the counter for the number of times it has been
-          * tested */
--        cpi->mode_test_hit_counts[mode_index] ++;
-+        x->mode_test_hit_counts[mode_index] ++;
- 
-         rate2 = 0;
-         distortion2 = 0;
-@@ -728,7 +824,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
- 
-         case SPLITMV:
- 
--            // Split MV modes currently not supported when RD is nopt enabled.
-+            /* Split MV modes currently not supported when RD is not enabled. */
-             break;
- 
-         case DC_PRED:
-@@ -777,13 +873,22 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
- 
-             int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8)? 3 : 2) : 1;
- 
--            // Further step/diamond searches as necessary
-+            /* Further step/diamond searches as necessary */
-             step_param = cpi->sf.first_step + speed_adjust;
- 
- #if CONFIG_MULTI_RES_ENCODING
--            if (cpi->oxcf.mr_encoder_id)
-+            /* If lower-res drops this frame, then higher-res encoder does
-+               motion search without any previous knowledge. Also, since
-+               last frame motion info is not stored, then we can not
-+               use improved_mv_pred. */
-+            if (cpi->oxcf.mr_encoder_id && !parent_ref_valid)
-+                cpi->sf.improved_mv_pred = 0;
-+
-+            if (parent_ref_valid && parent_ref_frame)
-             {
--                // Use parent MV as predictor. Adjust search range accordingly.
-+                /* Use parent MV as predictor. Adjust search range
-+                 * accordingly.
-+                 */
-                 mvp.as_int = parent_ref_mv.as_int;
-                 mvp_full.as_mv.col = parent_ref_mv.as_mv.col>>3;
-                 mvp_full.as_mv.row = parent_ref_mv.as_mv.row>>3;
-@@ -808,7 +913,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-                                 &near_sadidx[0]);
- 
-                     sr += speed_adjust;
--                    //adjust search range according to sr from mv prediction
-+                    /* adjust search range according to sr from mv prediction */
-                     if(sr > step_param)
-                         step_param = sr;
- 
-@@ -823,7 +928,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             }
- 
- #if CONFIG_MULTI_RES_ENCODING
--            if (cpi->oxcf.mr_encoder_id && dissim <= 2 &&
-+            if (parent_ref_valid && parent_ref_frame && dissim <= 2 &&
-                 MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row),
-                     abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4)
-             {
-@@ -860,7 +965,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-                  * change the behavior in lowest-resolution encoder.
-                  * Will improve it later.
-                  */
--                if (!cpi->oxcf.mr_encoder_id)
-+                 /* Set step_param to 0 to ensure large-range motion search
-+                    when encoder drops this frame at lower-resolution.
-+                  */
-+                if (!parent_ref_valid)
-                     step_param = 0;
- #endif
-                     bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv,
-@@ -877,10 +985,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-                                           x->mvcost, &best_ref_mv);
-                     mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
- 
--                    // Further step/diamond searches as necessary
--                    n = 0;
--                    //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
--
-+                    /* Further step/diamond searches as necessary */
-                     n = num00;
-                     num00 = 0;
- 
-@@ -927,7 +1032,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
- 
-             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
- 
--            // mv cost;
-+            /* mv cost; */
-             rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv,
-                                      cpi->mb.mvcost, 128);
-         }
-@@ -954,7 +1059,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
-             x->e_mbd.mode_info_context->mbmi.mv.as_int =
-                                                     mode_mv[this_mode].as_int;
--            this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x);
-+            this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x,
-+                                          rd_adjustment);
- 
-             break;
-         default:
-@@ -964,31 +1070,33 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
- #if CONFIG_TEMPORAL_DENOISING
-         if (cpi->oxcf.noise_sensitivity)
-         {
--          // Store for later use by denoiser.
--          if (this_mode == ZEROMV &&
--              x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
--          {
--            zero_mv_sse = sse;
--          }
--
--          // Store the best NEWMV in x for later use in the denoiser.
--          // We are restricted to the LAST_FRAME since the denoiser only keeps
--          // one filter state.
--          if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
--              x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
--          {
--            best_sse = sse;
--            x->e_mbd.best_sse_inter_mode = NEWMV;
--            x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
--            x->e_mbd.need_to_clamp_best_mvs =
--                x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
--          }
-+
-+            /* Store for later use by denoiser. */
-+            if (this_mode == ZEROMV && sse < zero_mv_sse )
-+            {
-+                zero_mv_sse = sse;
-+                x->best_zeromv_reference_frame =
-+                        x->e_mbd.mode_info_context->mbmi.ref_frame;
-+            }
-+
-+            /* Store the best NEWMV in x for later use in the denoiser. */
-+            if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
-+                    sse < best_sse)
-+            {
-+                best_sse = sse;
-+                x->best_sse_inter_mode = NEWMV;
-+                x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
-+                x->need_to_clamp_best_mvs =
-+                    x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
-+                x->best_reference_frame =
-+                    x->e_mbd.mode_info_context->mbmi.ref_frame;
-+            }
-         }
- #endif
- 
-         if (this_rd < best_rd || x->skip)
-         {
--            // Note index of best mode
-+            /* Note index of best mode */
-             best_mode_index = mode_index;
- 
-             *returnrate = rate2;
-@@ -1001,12 +1109,12 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             /* Testing this mode gave rise to an improvement in best error
-              * score. Lower threshold a bit for next time
-              */
--            cpi->rd_thresh_mult[mode_index] =
--                     (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
--                     cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
--            cpi->rd_threshes[mode_index] =
-+            x->rd_thresh_mult[mode_index] =
-+                     (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
-+                     x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
-+            x->rd_threshes[mode_index] =
-                                    (cpi->rd_baseline_thresh[mode_index] >> 7) *
--                                   cpi->rd_thresh_mult[mode_index];
-+                                   x->rd_thresh_mult[mode_index];
-         }
- 
-         /* If the mode did not help improve the best error case then raise the
-@@ -1014,33 +1122,33 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-          */
-         else
-         {
--            cpi->rd_thresh_mult[mode_index] += 4;
-+            x->rd_thresh_mult[mode_index] += 4;
- 
--            if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
--                cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-+            if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
-+                x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
- 
--            cpi->rd_threshes[mode_index] =
-+            x->rd_threshes[mode_index] =
-                          (cpi->rd_baseline_thresh[mode_index] >> 7) *
--                         cpi->rd_thresh_mult[mode_index];
-+                         x->rd_thresh_mult[mode_index];
-         }
- 
-         if (x->skip)
-             break;
-     }
- 
--    // Reduce the activation RD thresholds for the best choice mode
-+    /* Reduce the activation RD thresholds for the best choice mode */
-     if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
-     {
--        int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 3);
-+        int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 3);
- 
--        cpi->rd_thresh_mult[best_mode_index] =
--                        (cpi->rd_thresh_mult[best_mode_index]
-+        x->rd_thresh_mult[best_mode_index] =
-+                        (x->rd_thresh_mult[best_mode_index]
-                         >= (MIN_THRESHMULT + best_adjustment)) ?
--                        cpi->rd_thresh_mult[best_mode_index] - best_adjustment :
-+                        x->rd_thresh_mult[best_mode_index] - best_adjustment :
-                         MIN_THRESHMULT;
--        cpi->rd_threshes[best_mode_index] =
-+        x->rd_threshes[best_mode_index] =
-                         (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
--                        cpi->rd_thresh_mult[best_mode_index];
-+                        x->rd_thresh_mult[best_mode_index];
-     }
- 
- 
-@@ -1052,43 +1160,54 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             this_rdbin = 1023;
-         }
- 
--        cpi->error_bins[this_rdbin] ++;
-+        x->error_bins[this_rdbin] ++;
-     }
- 
- #if CONFIG_TEMPORAL_DENOISING
-     if (cpi->oxcf.noise_sensitivity)
-     {
--      if (x->e_mbd.best_sse_inter_mode == DC_PRED) {
--        // No best MV found.
--        x->e_mbd.best_sse_inter_mode = best_mbmode.mode;
--        x->e_mbd.best_sse_mv = best_mbmode.mv;
--        x->e_mbd.need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs;
--        best_sse = best_rd_sse;
--      }
--      vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
--                              recon_yoffset, recon_uvoffset);
--
--      // Reevaluate ZEROMV after denoising.
--      if (best_mbmode.ref_frame == INTRA_FRAME)
--      {
--        int this_rd = 0;
--        rate2 = 0;
--        distortion2 = 0;
--        x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
--        rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
--        this_mode = ZEROMV;
--        rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
--        x->e_mbd.mode_info_context->mbmi.mode = this_mode;
--        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
--        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
--        this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x);
-+        if (x->best_sse_inter_mode == DC_PRED)
-+        {
-+            /* No best MV found. */
-+            x->best_sse_inter_mode = best_mbmode.mode;
-+            x->best_sse_mv = best_mbmode.mv;
-+            x->need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs;
-+            x->best_reference_frame = best_mbmode.ref_frame;
-+            best_sse = best_rd_sse;
-+        }
-+        vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
-+                                recon_yoffset, recon_uvoffset);
- 
--        if (this_rd < best_rd || x->skip)
-+
-+        /* Reevaluate ZEROMV after denoising. */
-+        if (best_mbmode.ref_frame == INTRA_FRAME &&
-+            x->best_zeromv_reference_frame != INTRA_FRAME)
-         {
--            vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
--                       sizeof(MB_MODE_INFO));
-+            int this_rd = 0;
-+            int this_ref_frame = x->best_zeromv_reference_frame;
-+            rate2 = x->ref_frame_cost[this_ref_frame] +
-+                    vp8_cost_mv_ref(ZEROMV, mdcounts);
-+            distortion2 = 0;
-+
-+            /* set up the proper prediction buffers for the frame */
-+            x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
-+            x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
-+            x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
-+            x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
-+
-+            x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
-+            x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
-+            x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
-+            this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x,
-+                                          rd_adjustment);
-+
-+            if (this_rd < best_rd)
-+            {
-+                vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
-+                           sizeof(MB_MODE_INFO));
-+            }
-         }
--      }
-+
-     }
- #endif
- 
-@@ -1122,11 +1241,11 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-       != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
-         best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
- 
--    update_mvcount(cpi, &x->e_mbd, &best_ref_mv);
-+    update_mvcount(cpi, x, &best_ref_mv);
- }
- 
- 
--void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
-+void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)
- {
-     int error4x4, error16x16 = INT_MAX;
-     int rate, best_rate = 0, distortion, best_sse;
-diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
-index 3d83782..35011ca 100644
---- a/vp8/encoder/pickinter.h
-+++ b/vp8/encoder/pickinter.h
-@@ -18,6 +18,10 @@ extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-                                 int recon_uvoffset, int *returnrate,
-                                 int *returndistortion, int *returnintra,
-                                 int mb_row, int mb_col);
--extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
-+extern void vp8_pick_intra_mode(MACROBLOCK *x, int *rate);
- 
-+extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
-+                                      const vp8_variance_fn_ptr_t *vfp,
-+                                      unsigned int *sse,
-+                                      int_mv this_mv);
- #endif
-diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
-index 21af45a..4121349 100644
---- a/vp8/encoder/picklpf.c
-+++ b/vp8/encoder/picklpf.c
-@@ -74,7 +74,9 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
-     src += srcoffset;
-     dst += dstoffset;
- 
--    // Loop through the Y plane raw and reconstruction data summing (square differences)
-+    /* Loop through the Y plane raw and reconstruction data summing
-+     * (square differences)
-+     */
-     for (i = 0; i < linestocopy; i += 16)
-     {
-         for (j = 0; j < source->y_width; j += 16)
-@@ -92,7 +94,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
-     return Total;
- }
- 
--// Enforce a minimum filter level based upon baseline Q
-+/* Enforce a minimum filter level based upon baseline Q */
- static int get_min_filter_level(VP8_COMP *cpi, int base_qindex)
- {
-     int min_filter_level;
-@@ -113,14 +115,15 @@ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex)
-     return min_filter_level;
- }
- 
--// Enforce a maximum filter level based upon baseline Q
-+/* Enforce a maximum filter level based upon baseline Q */
- static int get_max_filter_level(VP8_COMP *cpi, int base_qindex)
- {
--    // PGW August 2006: Highest filter values almost always a bad idea
-+    /* PGW August 2006: Highest filter values almost always a bad idea */
- 
--    // jbb chg: 20100118 - not so any more with this overquant stuff allow high values
--    // with lots of intra coming in.
--    int max_filter_level = MAX_LOOP_FILTER ;//* 3 / 4;
-+    /* jbb chg: 20100118 - not so any more with this overquant stuff allow
-+     * high values with lots of intra coming in.
-+     */
-+    int max_filter_level = MAX_LOOP_FILTER;
-     (void)base_qindex;
- 
-     if (cpi->twopass.section_intra_rating > 8)
-@@ -155,7 +158,9 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-         cm->last_sharpness_level = cm->sharpness_level;
-     }
- 
--    // Start the search at the previous frame filter level unless it is now out of range.
-+    /* Start the search at the previous frame filter level unless it is
-+     * now out of range.
-+     */
-     if (cm->filter_level < min_filter_level)
-         cm->filter_level = min_filter_level;
-     else if (cm->filter_level > max_filter_level)
-@@ -164,7 +169,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-     filt_val = cm->filter_level;
-     best_filt_val = filt_val;
- 
--    // Get the err using the previous frame's filter value.
-+    /* Get the err using the previous frame's filter value. */
- 
-     /* Copy the unfiltered / processed recon buffer to the new buffer */
-     vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
-@@ -174,17 +179,17 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
- 
-     filt_val -= 1 + (filt_val > 10);
- 
--    // Search lower filter levels
-+    /* Search lower filter levels */
-     while (filt_val >= min_filter_level)
-     {
--        // Apply the loop filter
-+        /* Apply the loop filter */
-         vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
-         vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
- 
--        // Get the err for filtered frame
-+        /* Get the err for filtered frame */
-         filt_err = calc_partial_ssl_err(sd, cm->frame_to_show);
- 
--        // Update the best case record or exit loop.
-+        /* Update the best case record or exit loop. */
-         if (filt_err < best_err)
-         {
-             best_err = filt_err;
-@@ -193,32 +198,34 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-         else
-             break;
- 
--        // Adjust filter level
-+        /* Adjust filter level */
-         filt_val -= 1 + (filt_val > 10);
-     }
- 
--    // Search up (note that we have already done filt_val = cm->filter_level)
-+    /* Search up (note that we have already done filt_val = cm->filter_level) */
-     filt_val = cm->filter_level + 1 + (filt_val > 10);
- 
-     if (best_filt_val == cm->filter_level)
-     {
--        // Resist raising filter level for very small gains
-+        /* Resist raising filter level for very small gains */
-         best_err -= (best_err >> 10);
- 
-         while (filt_val < max_filter_level)
-         {
--            // Apply the loop filter
-+            /* Apply the loop filter */
-             vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show);
- 
-             vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
- 
--            // Get the err for filtered frame
-+            /* Get the err for filtered frame */
-             filt_err = calc_partial_ssl_err(sd, cm->frame_to_show);
- 
--            // Update the best case record or exit loop.
-+            /* Update the best case record or exit loop. */
-             if (filt_err < best_err)
-             {
--                // Do not raise filter level if improvement is < 1 part in 4096
-+                /* Do not raise filter level if improvement is < 1 part
-+                 * in 4096
-+                 */
-                 best_err = filt_err - (filt_err >> 10);
- 
-                 best_filt_val = filt_val;
-@@ -226,7 +233,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-             else
-                 break;
- 
--            // Adjust filter level
-+            /* Adjust filter level */
-             filt_val += 1 + (filt_val > 10);
-         }
-     }
-@@ -243,7 +250,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-     cm->frame_to_show = saved_frame;
- }
- 
--// Stub function for now Alt LF not used
-+/* Stub function for now Alt LF not used */
- void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val)
- {
-     MACROBLOCKD *mbd = &cpi->mb.e_mbd;
-@@ -266,12 +273,14 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
- 
-     int filter_step;
-     int filt_high = 0;
--    int filt_mid = cm->filter_level;      // Start search at previous frame filter level
-+    /* Start search at previous frame filter level */
-+    int filt_mid = cm->filter_level;
-     int filt_low = 0;
-     int filt_best;
-     int filt_direction = 0;
- 
--    int Bias = 0;                       // Bias against raising loop filter and in favor of lowering it
-+    /* Bias against raising loop filter and in favor of lowering it */
-+    int Bias = 0;
- 
-     int ss_err[MAX_LOOP_FILTER + 1];
- 
-@@ -287,7 +296,9 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-     else
-         cm->sharpness_level = cpi->oxcf.Sharpness;
- 
--    // Start the search at the previous frame filter level unless it is now out of range.
-+    /* Start the search at the previous frame filter level unless it is
-+     * now out of range.
-+     */
-     filt_mid = cm->filter_level;
- 
-     if (filt_mid < min_filter_level)
-@@ -295,10 +306,10 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-     else if (filt_mid > max_filter_level)
-         filt_mid = max_filter_level;
- 
--    // Define the initial step size
-+    /* Define the initial step size */
-     filter_step = (filt_mid < 16) ? 4 : filt_mid / 4;
- 
--    // Get baseline error score
-+    /* Get baseline error score */
- 
-     /* Copy the unfiltered / processed recon buffer to the new buffer */
-     vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
-@@ -314,9 +325,8 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
- 
-     while (filter_step > 0)
-     {
--        Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; //PGW change 12/12/06 for small images
-+        Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
- 
--        // jbb chg: 20100118 - in sections with lots of new material coming in don't bias as much to a low filter value
-         if (cpi->twopass.section_intra_rating < 20)
-             Bias = Bias * cpi->twopass.section_intra_rating / 20;
- 
-@@ -327,7 +337,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-         {
-             if(ss_err[filt_low] == 0)
-             {
--                // Get Low filter error score
-+                /* Get Low filter error score */
-                 vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
-                 vp8cx_set_alt_lf_level(cpi, filt_low);
-                 vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
-@@ -338,10 +348,12 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-             else
-                 filt_err = ss_err[filt_low];
- 
--            // If value is close to the best so far then bias towards a lower loop filter value.
-+            /* If value is close to the best so far then bias towards a
-+             * lower loop filter value.
-+             */
-             if ((filt_err - Bias) < best_err)
-             {
--                // Was it actually better than the previous best?
-+                /* Was it actually better than the previous best? */
-                 if (filt_err < best_err)
-                     best_err = filt_err;
- 
-@@ -349,7 +361,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-             }
-         }
- 
--        // Now look at filt_high
-+        /* Now look at filt_high */
-         if ((filt_direction >= 0) && (filt_high != filt_mid))
-         {
-             if(ss_err[filt_high] == 0)
-@@ -364,7 +376,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-             else
-                 filt_err = ss_err[filt_high];
- 
--            // Was it better than the previous best?
-+            /* Was it better than the previous best? */
-             if (filt_err < (best_err - Bias))
-             {
-                 best_err = filt_err;
-@@ -372,7 +384,9 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
-             }
-         }
- 
--        // Half the step distance if the best filter value was the same as last time
-+        /* Half the step distance if the best filter value was the same
-+         * as last time
-+         */
-         if (filt_best == filt_mid)
-         {
-             filter_step = filter_step / 2;
-diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c
-index 5119bb8..5bb49ad 100644
---- a/vp8/encoder/psnr.c
-+++ b/vp8/encoder/psnr.c
-@@ -22,7 +22,7 @@ double vp8_mse2psnr(double Samples, double Peak, double Mse)
-     if ((double)Mse > 0.0)
-         psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
-     else
--        psnr = MAX_PSNR;      // Limit to prevent / 0
-+        psnr = MAX_PSNR;      /* Limit to prevent / 0 */
- 
-     if (psnr > MAX_PSNR)
-         psnr = MAX_PSNR;
-diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
-index 766d2b2..33c8ef0 100644
---- a/vp8/encoder/quantize.c
-+++ b/vp8/encoder/quantize.c
-@@ -44,21 +44,21 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
-         z    = coeff_ptr[rc];
-         zbin = zbin_ptr[rc] ;
- 
--        sz = (z >> 31);                                 // sign of z
--        x  = (z ^ sz) - sz;                             // x = abs(z)
-+        sz = (z >> 31);                              /* sign of z */
-+        x  = (z ^ sz) - sz;                          /* x = abs(z) */
- 
-         if (x >= zbin)
-         {
-             x += round_ptr[rc];
-             y  = (((x * quant_ptr[rc]) >> 16) + x)
--                 >> quant_shift_ptr[rc];                // quantize (x)
--            x  = (y ^ sz) - sz;                         // get the sign back
--            qcoeff_ptr[rc] = x;                          // write to destination
--            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
-+                 >> quant_shift_ptr[rc];             /* quantize (x) */
-+            x  = (y ^ sz) - sz;                      /* get the sign back */
-+            qcoeff_ptr[rc] = x;                      /* write to destination */
-+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];   /* dequantized value */
- 
-             if (y)
-             {
--                eob = i;                                // last nonzero coeffs
-+                eob = i;                             /* last nonzero coeffs */
-             }
-         }
-     }
-@@ -84,17 +84,17 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
-         rc   = vp8_default_zig_zag1d[i];
-         z    = coeff_ptr[rc];
- 
--        sz = (z >> 31);                                 // sign of z
--        x  = (z ^ sz) - sz;                             // x = abs(z)
-+        sz = (z >> 31);                              /* sign of z */
-+        x  = (z ^ sz) - sz;                          /* x = abs(z) */
- 
--        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
--        x  = (y ^ sz) - sz;                         // get the sign back
--        qcoeff_ptr[rc] = x;                          // write to destination
--        dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
-+        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */
-+        x  = (y ^ sz) - sz;                          /* get the sign back */
-+        qcoeff_ptr[rc] = x;                          /* write to destination */
-+        dqcoeff_ptr[rc] = x * dequant_ptr[rc];       /* dequantized value */
- 
-         if (y)
-         {
--            eob = i;                                // last nonzero coeffs
-+            eob = i;                                 /* last nonzero coeffs */
-         }
-     }
-     *d->eob = (char)(eob + 1);
-@@ -132,22 +132,22 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
-         zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
- 
-         zbin_boost_ptr ++;
--        sz = (z >> 31);                                 // sign of z
--        x  = (z ^ sz) - sz;                             // x = abs(z)
-+        sz = (z >> 31);                              /* sign of z */
-+        x  = (z ^ sz) - sz;                          /* x = abs(z) */
- 
-         if (x >= zbin)
-         {
-             x += round_ptr[rc];
-             y  = (((x * quant_ptr[rc]) >> 16) + x)
--                 >> quant_shift_ptr[rc];                // quantize (x)
--            x  = (y ^ sz) - sz;                         // get the sign back
--            qcoeff_ptr[rc]  = x;                        // write to destination
--            dqcoeff_ptr[rc] = x * dequant_ptr[rc];      // dequantized value
-+                 >> quant_shift_ptr[rc];             /* quantize (x) */
-+            x  = (y ^ sz) - sz;                      /* get the sign back */
-+            qcoeff_ptr[rc]  = x;                     /* write to destination */
-+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];   /* dequantized value */
- 
-             if (y)
-             {
--                eob = i;                                // last nonzero coeffs
--                zbin_boost_ptr = b->zrun_zbin_boost;    // reset zero runlength
-+                eob = i;                             /* last nonzero coeffs */
-+                zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */
-             }
-         }
-     }
-@@ -240,26 +240,23 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
-         rc   = vp8_default_zig_zag1d[i];
-         z    = coeff_ptr[rc];
- 
--        //if ( i == 0 )
--        //    zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2;
--        //else
-         zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
- 
-         zbin_boost_ptr ++;
--        sz = (z >> 31);                                 // sign of z
--        x  = (z ^ sz) - sz;                             // x = abs(z)
-+        sz = (z >> 31);                              /* sign of z */
-+        x  = (z ^ sz) - sz;                          /* x = abs(z) */
- 
-         if (x >= zbin)
-         {
--            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
--            x  = (y ^ sz) - sz;                         // get the sign back
--            qcoeff_ptr[rc]  = x;                         // write to destination
--            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
-+            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */
-+            x  = (y ^ sz) - sz;                      /* get the sign back */
-+            qcoeff_ptr[rc]  = x;                     /* write to destination */
-+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];   /* dequantized value */
- 
-             if (y)
-             {
--                eob = i;                                // last nonzero coeffs
--                zbin_boost_ptr = &b->zrun_zbin_boost[0];    // reset zero runlength
-+                eob = i;                             /* last nonzero coeffs */
-+                zbin_boost_ptr = &b->zrun_zbin_boost[0]; /* reset zrl */
-             }
-         }
-     }
-@@ -441,7 +438,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
- 
-     for (Q = 0; Q < QINDEX_RANGE; Q++)
-     {
--        // dc values
-+        /* dc values */
-         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-         cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
-         invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
-@@ -469,7 +466,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
-         cpi->common.UVdequant[Q][0] = quant_val;
-         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
- 
--        // all the ac values = ;
-+        /* all the ac values = ; */
-         quant_val = vp8_ac_yquant(Q);
-         cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val;
-         invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1,
-@@ -536,7 +533,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
- 
-     for (Q = 0; Q < QINDEX_RANGE; Q++)
-     {
--        // dc values
-+        /* dc values */
-         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-         cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
-         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-@@ -558,7 +555,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
-         cpi->common.UVdequant[Q][0] = quant_val;
-         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
- 
--        // all the ac values = ;
-+        /* all the ac values = ; */
-         for (i = 1; i < 16; i++)
-         {
-             int rc = vp8_default_zig_zag1d[i];
-@@ -590,20 +587,20 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
- 
- #define ZBIN_EXTRA_Y \
-     (( cpi->common.Y1dequant[QIndex][1] *  \
--    ( cpi->zbin_over_quant +  \
--      cpi->zbin_mode_boost +  \
-+    ( x->zbin_over_quant +  \
-+      x->zbin_mode_boost +  \
-       x->act_zbin_adj ) ) >> 7)
- 
- #define ZBIN_EXTRA_UV \
-     (( cpi->common.UVdequant[QIndex][1] *  \
--    ( cpi->zbin_over_quant +  \
--      cpi->zbin_mode_boost +  \
-+    ( x->zbin_over_quant +  \
-+      x->zbin_mode_boost +  \
-       x->act_zbin_adj ) ) >> 7)
- 
- #define ZBIN_EXTRA_Y2 \
-     (( cpi->common.Y2dequant[QIndex][1] *  \
--    ( (cpi->zbin_over_quant / 2) +  \
--       cpi->zbin_mode_boost +  \
-+    ( (x->zbin_over_quant / 2) +  \
-+       x->zbin_mode_boost +  \
-        x->act_zbin_adj ) ) >> 7)
- 
- void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
-@@ -613,18 +610,18 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
-     MACROBLOCKD *xd = &x->e_mbd;
-     int zbin_extra;
- 
--    // Select the baseline MB Q index.
-+    /* Select the baseline MB Q index. */
-     if (xd->segmentation_enabled)
-     {
--        // Abs Value
-+        /* Abs Value */
-         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
--
-             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
--        // Delta Value
-+        /* Delta Value */
-         else
-         {
-             QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
--            QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
-+            /* Clamp to valid range */
-+            QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;
-         }
-     }
-     else
-@@ -657,13 +654,13 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
-          * This will also require modifications to the x86 and neon assembly.
-          * */
-         for (i = 0; i < 16; i++)
--            x->e_mbd.block[i].dequant = xd->dequant_y1; //cpi->common.Y1dequant[QIndex];
-+            x->e_mbd.block[i].dequant = xd->dequant_y1;
-         for (i = 16; i < 24; i++)
--            x->e_mbd.block[i].dequant = xd->dequant_uv; //cpi->common.UVdequant[QIndex];
--        x->e_mbd.block[24].dequant = xd->dequant_y2; //cpi->common.Y2dequant[QIndex];
-+            x->e_mbd.block[i].dequant = xd->dequant_uv;
-+        x->e_mbd.block[24].dequant = xd->dequant_y2;
- #endif
- 
--        // Y
-+        /* Y */
-         zbin_extra = ZBIN_EXTRA_Y;
- 
-         for (i = 0; i < 16; i++)
-@@ -677,7 +674,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
-             x->block[i].zbin_extra = (short)zbin_extra;
-         }
- 
--        // UV
-+        /* UV */
-         zbin_extra = ZBIN_EXTRA_UV;
- 
-         for (i = 16; i < 24; i++)
-@@ -691,7 +688,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
-             x->block[i].zbin_extra = (short)zbin_extra;
-         }
- 
--        // Y2
-+        /* Y2 */
-         zbin_extra = ZBIN_EXTRA_Y2;
- 
-         x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
-@@ -705,35 +702,35 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
-         /* save this macroblock QIndex for vp8_update_zbin_extra() */
-         x->q_index = QIndex;
- 
--        cpi->last_zbin_over_quant = cpi->zbin_over_quant;
--        cpi->last_zbin_mode_boost = cpi->zbin_mode_boost;
-+        x->last_zbin_over_quant = x->zbin_over_quant;
-+        x->last_zbin_mode_boost = x->zbin_mode_boost;
-         x->last_act_zbin_adj = x->act_zbin_adj;
- 
- 
- 
-     }
--    else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant
--            || cpi->last_zbin_mode_boost != cpi->zbin_mode_boost
-+    else if(x->last_zbin_over_quant != x->zbin_over_quant
-+            || x->last_zbin_mode_boost != x->zbin_mode_boost
-             || x->last_act_zbin_adj != x->act_zbin_adj)
-     {
--        // Y
-+        /* Y */
-         zbin_extra = ZBIN_EXTRA_Y;
- 
-         for (i = 0; i < 16; i++)
-             x->block[i].zbin_extra = (short)zbin_extra;
- 
--        // UV
-+        /* UV */
-         zbin_extra = ZBIN_EXTRA_UV;
- 
-         for (i = 16; i < 24; i++)
-             x->block[i].zbin_extra = (short)zbin_extra;
- 
--        // Y2
-+        /* Y2 */
-         zbin_extra = ZBIN_EXTRA_Y2;
-         x->block[24].zbin_extra = (short)zbin_extra;
- 
--        cpi->last_zbin_over_quant = cpi->zbin_over_quant;
--        cpi->last_zbin_mode_boost = cpi->zbin_mode_boost;
-+        x->last_zbin_over_quant = x->zbin_over_quant;
-+        x->last_zbin_mode_boost = x->zbin_mode_boost;
-         x->last_act_zbin_adj = x->act_zbin_adj;
-     }
- }
-@@ -744,19 +741,19 @@ void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
-     int QIndex = x->q_index;
-     int zbin_extra;
- 
--    // Y
-+    /* Y */
-     zbin_extra = ZBIN_EXTRA_Y;
- 
-     for (i = 0; i < 16; i++)
-         x->block[i].zbin_extra = (short)zbin_extra;
- 
--    // UV
-+    /* UV */
-     zbin_extra = ZBIN_EXTRA_UV;
- 
-     for (i = 16; i < 24; i++)
-         x->block[i].zbin_extra = (short)zbin_extra;
- 
--    // Y2
-+    /* Y2 */
-     zbin_extra = ZBIN_EXTRA_Y2;
-     x->block[24].zbin_extra = (short)zbin_extra;
- }
-@@ -766,10 +763,10 @@ void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
- 
- void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
- {
--    // Clear Zbin mode boost for default case
--    cpi->zbin_mode_boost = 0;
-+    /* Clear Zbin mode boost for default case */
-+    cpi->mb.zbin_mode_boost = 0;
- 
--    // MB level quantizer setup
-+    /* MB level quantizer setup */
-     vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0);
- }
- 
-@@ -801,7 +798,7 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
-     cm->y2dc_delta_q = new_delta_q;
- 
- 
--    // Set Segment specific quatizers
-+    /* Set Segment specific quatizers */
-     mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];
-     mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1];
-     mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2];
-diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
-index 472e85f..a399a38 100644
---- a/vp8/encoder/ratectrl.c
-+++ b/vp8/encoder/ratectrl.c
-@@ -41,15 +41,16 @@ extern int inter_uv_modes[4];
- extern int inter_b_modes[10];
- #endif
- 
--// Bits Per MB at different Q (Multiplied by 512)
-+/* Bits Per MB at different Q (Multiplied by 512) */
- #define BPER_MB_NORMBITS    9
- 
--// Work in progress recalibration of baseline rate tables based on
--// the assumption that bits per mb is inversely proportional to the
--// quantizer value.
-+/* Work in progress recalibration of baseline rate tables based on
-+ * the assumption that bits per mb is inversely proportional to the
-+ * quantizer value.
-+ */
- const int vp8_bits_per_mb[2][QINDEX_RANGE] =
- {
--    // Intra case 450000/Qintra
-+    /* Intra case 450000/Qintra */
-     {
-         1125000,900000, 750000, 642857, 562500, 500000, 450000, 450000,
-         409090, 375000, 346153, 321428, 300000, 281250, 264705, 264705,
-@@ -68,7 +69,7 @@ const int vp8_bits_per_mb[2][QINDEX_RANGE] =
-         36885,  36290,  35714,  35156,  34615,  34090,  33582,  33088,
-         32608,  32142,  31468,  31034,  30405,  29801,  29220,  28662,
-     },
--    // Inter case 285000/Qinter
-+    /* Inter case 285000/Qinter */
-     {
-         712500, 570000, 475000, 407142, 356250, 316666, 285000, 259090,
-         237500, 219230, 203571, 190000, 178125, 167647, 158333, 150000,
-@@ -109,7 +110,7 @@ static const int kf_boost_qadjustment[QINDEX_RANGE] =
-     220, 220, 220, 220, 220, 220, 220, 220,
- };
- 
--//#define GFQ_ADJUSTMENT (Q+100)
-+/* #define GFQ_ADJUSTMENT (Q+100) */
- #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q]
- const int vp8_gf_boost_qadjustment[QINDEX_RANGE] =
- {
-@@ -173,7 +174,7 @@ static const int kf_gf_boost_qlimits[QINDEX_RANGE] =
-     600, 600, 600, 600, 600, 600, 600, 600,
- };
- 
--// % adjustment to target kf size based on seperation from previous frame
-+/* % adjustment to target kf size based on seperation from previous frame */
- static const int kf_boost_seperation_adjustment[16] =
- {
-     30,   40,   50,   55,   60,   65,   70,   75,
-@@ -224,10 +225,11 @@ void vp8_save_coding_context(VP8_COMP *cpi)
- {
-     CODING_CONTEXT *const cc = & cpi->coding_context;
- 
--    // Stores a snapshot of key state variables which can subsequently be
--    // restored with a call to vp8_restore_coding_context. These functions are
--    // intended for use in a re-code loop in vp8_compress_frame where the
--    // quantizer value is adjusted between loop iterations.
-+    /* Stores a snapshot of key state variables which can subsequently be
-+     * restored with a call to vp8_restore_coding_context. These functions are
-+     * intended for use in a re-code loop in vp8_compress_frame where the
-+     * quantizer value is adjusted between loop iterations.
-+     */
- 
-     cc->frames_since_key          = cpi->frames_since_key;
-     cc->filter_level             = cpi->common.filter_level;
-@@ -235,18 +237,16 @@ void vp8_save_coding_context(VP8_COMP *cpi)
-     cc->frames_since_golden       = cpi->common.frames_since_golden;
- 
-     vp8_copy(cc->mvc,      cpi->common.fc.mvc);
--    vp8_copy(cc->mvcosts,  cpi->mb.mvcosts);
-+    vp8_copy(cc->mvcosts,  cpi->rd_costs.mvcosts);
- 
--    vp8_copy(cc->kf_ymode_prob,   cpi->common.kf_ymode_prob);
-     vp8_copy(cc->ymode_prob,   cpi->common.fc.ymode_prob);
--    vp8_copy(cc->kf_uv_mode_prob,  cpi->common.kf_uv_mode_prob);
-     vp8_copy(cc->uv_mode_prob,  cpi->common.fc.uv_mode_prob);
- 
--    vp8_copy(cc->ymode_count, cpi->ymode_count);
--    vp8_copy(cc->uv_mode_count, cpi->uv_mode_count);
-+    vp8_copy(cc->ymode_count, cpi->mb.ymode_count);
-+    vp8_copy(cc->uv_mode_count, cpi->mb.uv_mode_count);
- 
- 
--    // Stats
-+    /* Stats */
- #ifdef MODE_STATS
-     vp8_copy(cc->y_modes,       y_modes);
-     vp8_copy(cc->uv_modes,      uv_modes);
-@@ -264,8 +264,9 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
- {
-     CODING_CONTEXT *const cc = & cpi->coding_context;
- 
--    // Restore key state variables to the snapshot state stored in the
--    // previous call to vp8_save_coding_context.
-+    /* Restore key state variables to the snapshot state stored in the
-+     * previous call to vp8_save_coding_context.
-+     */
- 
-     cpi->frames_since_key         =   cc->frames_since_key;
-     cpi->common.filter_level     =   cc->filter_level;
-@@ -274,17 +275,15 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
- 
-     vp8_copy(cpi->common.fc.mvc, cc->mvc);
- 
--    vp8_copy(cpi->mb.mvcosts, cc->mvcosts);
-+    vp8_copy(cpi->rd_costs.mvcosts, cc->mvcosts);
- 
--    vp8_copy(cpi->common.kf_ymode_prob,   cc->kf_ymode_prob);
-     vp8_copy(cpi->common.fc.ymode_prob,   cc->ymode_prob);
--    vp8_copy(cpi->common.kf_uv_mode_prob,  cc->kf_uv_mode_prob);
-     vp8_copy(cpi->common.fc.uv_mode_prob,  cc->uv_mode_prob);
- 
--    vp8_copy(cpi->ymode_count, cc->ymode_count);
--    vp8_copy(cpi->uv_mode_count, cc->uv_mode_count);
-+    vp8_copy(cpi->mb.ymode_count, cc->ymode_count);
-+    vp8_copy(cpi->mb.uv_mode_count, cc->uv_mode_count);
- 
--    // Stats
-+    /* Stats */
- #ifdef MODE_STATS
-     vp8_copy(y_modes, cc->y_modes);
-     vp8_copy(uv_modes, cc->uv_modes);
-@@ -301,36 +300,30 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
- 
- void vp8_setup_key_frame(VP8_COMP *cpi)
- {
--    // Setup for Key frame:
-+    /* Setup for Key frame: */
- 
-     vp8_default_coef_probs(& cpi->common);
- 
--
--    vp8_kf_default_bmode_probs(cpi->common.kf_bmode_prob);
--
-     vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
-     {
-         int flag[2] = {1, 1};
-         vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
-     }
- 
--    vpx_memset(cpi->common.fc.pre_mvc, 0, sizeof(cpi->common.fc.pre_mvc));  //initialize pre_mvc to all zero.
--
--    // Make sure we initialize separate contexts for altref,gold, and normal.
--    // TODO shouldn't need 3 different copies of structure to do this!
-+    /* Make sure we initialize separate contexts for altref,gold, and normal.
-+     * TODO shouldn't need 3 different copies of structure to do this!
-+     */
-     vpx_memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
-     vpx_memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc));
-     vpx_memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc));
- 
--    //cpi->common.filter_level = 0;      // Reset every key frame.
-     cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ;
- 
--    // Provisional interval before next GF
-+    /* Provisional interval before next GF */
-     if (cpi->auto_gold)
--        //cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL;
-         cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
-     else
--        cpi->frames_till_gf_update_due = cpi->goldfreq;
-+        cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL;
- 
-     cpi->common.refresh_golden_frame = 1;
-     cpi->common.refresh_alt_ref_frame = 1;
-@@ -355,12 +348,12 @@ static int estimate_bits_at_q(int frame_kind, int Q, int MBs,
- 
- static void calc_iframe_target_size(VP8_COMP *cpi)
- {
--    // boost defaults to half second
-+    /* boost defaults to half second */
-     int kf_boost;
--    int target;
-+    uint64_t target;
- 
--    // Clear down mmx registers to allow floating point in what follows
--    vp8_clear_system_state();  //__asm emms;
-+    /* Clear down mmx registers to allow floating point in what follows */
-+    vp8_clear_system_state();
- 
-     if (cpi->oxcf.fixed_q >= 0)
-     {
-@@ -371,10 +364,10 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
-     }
-     else if (cpi->pass == 2)
-     {
--        // New Two pass RC
-+        /* New Two pass RC */
-         target = cpi->per_frame_bandwidth;
-     }
--    // First Frame is a special case
-+    /* First Frame is a special case */
-     else if (cpi->common.current_video_frame == 0)
-     {
-         /* 1 Pass there is no information on which to base size so use
-@@ -388,29 +381,29 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
-     }
-     else
-     {
--        // if this keyframe was forced, use a more recent Q estimate
-+        /* if this keyframe was forced, use a more recent Q estimate */
-         int Q = (cpi->common.frame_flags & FRAMEFLAGS_KEY)
-                 ? cpi->avg_frame_qindex : cpi->ni_av_qi;
- 
--        int initial_boost = 24; // Corresponds to: |2.5 * per_frame_bandwidth|
--        // Boost depends somewhat on frame rate: only used for 1 layer case.
-+        int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */
-+        /* Boost depends somewhat on frame rate: only used for 1 layer case. */
-         if (cpi->oxcf.number_of_layers == 1) {
-           kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16));
-         }
-         else {
--          // Initial factor: set target size to: |2.5 * per_frame_bandwidth|.
-+          /* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */
-           kf_boost = initial_boost;
-         }
- 
--        // adjustment up based on q: this factor ranges from ~1.2 to 2.2.
-+        /* adjustment up based on q: this factor ranges from ~1.2 to 2.2. */
-         kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100;
- 
--        // frame separation adjustment ( down)
-+        /* frame separation adjustment ( down) */
-         if (cpi->frames_since_key  < cpi->output_frame_rate / 2)
-             kf_boost = (int)(kf_boost
-                        * cpi->frames_since_key / (cpi->output_frame_rate / 2));
- 
--        // Minimal target size is |2* per_frame_bandwidth|.
-+        /* Minimal target size is |2* per_frame_bandwidth|. */
-         if (kf_boost < 16)
-             kf_boost = 16;
- 
-@@ -427,10 +420,11 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
-             target = max_rate;
-     }
- 
--    cpi->this_frame_target = target;
-+    cpi->this_frame_target = (int)target;
- 
--    // TODO: if we separate rate targeting from Q targetting, move this.
--    // Reset the active worst quality to the baseline value for key frames.
-+    /* TODO: if we separate rate targeting from Q targetting, move this.
-+     * Reset the active worst quality to the baseline value for key frames.
-+     */
-     if (cpi->pass != 2)
-         cpi->active_worst_quality = cpi->worst_quality;
- 
-@@ -439,9 +433,6 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
-         FILE *f;
- 
-         f = fopen("kf_boost.stt", "a");
--        //fprintf(f, " %8d %10d %10d %10d %10d %10d %10d\n",
--        //  cpi->common.current_video_frame,  cpi->target_bandwidth, cpi->frames_to_key, kf_boost_qadjustment[cpi->ni_av_qi], cpi->kf_boost, (cpi->this_frame_target *100 / cpi->per_frame_bandwidth), cpi->this_frame_target );
--
-         fprintf(f, " %8u %10d %10d %10d\n",
-                 cpi->common.current_video_frame,  cpi->gfu_boost, cpi->baseline_gf_interval, cpi->source_alt_ref_pending);
- 
-@@ -451,14 +442,15 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
- }
- 
- 
--//  Do the best we can to define the parameters for the next GF based on what
--// information we have available.
-+/* Do the best we can to define the parameters for the next GF based on what
-+ * information we have available.
-+ */
- static void calc_gf_params(VP8_COMP *cpi)
- {
-     int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
-     int Boost = 0;
- 
--    int gf_frame_useage = 0;      // Golden frame useage since last GF
-+    int gf_frame_useage = 0;      /* Golden frame useage since last GF */
-     int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME]  +
-                   cpi->recent_ref_frame_usage[LAST_FRAME]   +
-                   cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
-@@ -466,33 +458,30 @@ static void calc_gf_params(VP8_COMP *cpi)
- 
-     int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
- 
--    // Reset the last boost indicator
--    //cpi->last_boost = 100;
--
-     if (tot_mbs)
-         gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs;
- 
-     if (pct_gf_active > gf_frame_useage)
-         gf_frame_useage = pct_gf_active;
- 
--    // Not two pass
-+    /* Not two pass */
-     if (cpi->pass != 2)
-     {
--        // Single Pass lagged mode: TBD
-+        /* Single Pass lagged mode: TBD */
-         if (0)
-         {
-         }
- 
--        // Single Pass compression: Has to use current and historical data
-+        /* Single Pass compression: Has to use current and historical data */
-         else
-         {
- #if 0
--            // Experimental code
-+            /* Experimental code */
-             int index = cpi->one_pass_frame_index;
-             int frames_to_scan = (cpi->max_gf_interval <= MAX_LAG_BUFFERS) ? cpi->max_gf_interval : MAX_LAG_BUFFERS;
- 
-+            /* ************** Experimental code - incomplete */
-             /*
--            // *************** Experimental code - incomplete
-             double decay_val = 1.0;
-             double IIAccumulator = 0.0;
-             double last_iiaccumulator = 0.0;
-@@ -535,48 +524,51 @@ static void calc_gf_params(VP8_COMP *cpi)
- #else
- 
-             /*************************************************************/
--            // OLD code
-+            /* OLD code */
- 
--            // Adjust boost based upon ambient Q
-+            /* Adjust boost based upon ambient Q */
-             Boost = GFQ_ADJUSTMENT;
- 
--            // Adjust based upon most recently measure intra useage
-+            /* Adjust based upon most recently measure intra useage */
-             Boost = Boost * gf_intra_usage_adjustment[(cpi->this_frame_percent_intra < 15) ? cpi->this_frame_percent_intra : 14] / 100;
- 
--            // Adjust gf boost based upon GF usage since last GF
-+            /* Adjust gf boost based upon GF usage since last GF */
-             Boost = Boost * gf_adjust_table[gf_frame_useage] / 100;
- #endif
-         }
- 
--        // golden frame boost without recode loop often goes awry.  be safe by keeping numbers down.
-+        /* golden frame boost without recode loop often goes awry.  be
-+         * safe by keeping numbers down.
-+         */
-         if (!cpi->sf.recode_loop)
-         {
-             if (cpi->compressor_speed == 2)
-                 Boost = Boost / 2;
-         }
- 
--        // Apply an upper limit based on Q for 1 pass encodes
-+        /* Apply an upper limit based on Q for 1 pass encodes */
-         if (Boost > kf_gf_boost_qlimits[Q] && (cpi->pass == 0))
-             Boost = kf_gf_boost_qlimits[Q];
- 
--        // Apply lower limits to boost.
-+        /* Apply lower limits to boost. */
-         else if (Boost < 110)
-             Boost = 110;
- 
--        // Note the boost used
-+        /* Note the boost used */
-         cpi->last_boost = Boost;
- 
-     }
- 
--    // Estimate next interval
--    // This is updated once the real frame size/boost is known.
-+    /* Estimate next interval
-+     * This is updated once the real frame size/boost is known.
-+     */
-     if (cpi->oxcf.fixed_q == -1)
-     {
--        if (cpi->pass == 2)         // 2 Pass
-+        if (cpi->pass == 2)         /* 2 Pass */
-         {
-             cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
-         }
--        else                            // 1 Pass
-+        else                            /* 1 Pass */
-         {
-             cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
- 
-@@ -602,10 +594,10 @@ static void calc_gf_params(VP8_COMP *cpi)
-     else
-         cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
- 
--    // ARF on or off
-+    /* ARF on or off */
-     if (cpi->pass != 2)
-     {
--        // For now Alt ref is not allowed except in 2 pass modes.
-+        /* For now Alt ref is not allowed except in 2 pass modes. */
-         cpi->source_alt_ref_pending = 0;
- 
-         /*if ( cpi->oxcf.fixed_q == -1)
-@@ -642,89 +634,34 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-         min_frame_target = cpi->per_frame_bandwidth / 4;
- 
- 
--    // Special alt reference frame case
-+    /* Special alt reference frame case */
-     if((cpi->common.refresh_alt_ref_frame) && (cpi->oxcf.number_of_layers == 1))
-     {
-         if (cpi->pass == 2)
-         {
--            cpi->per_frame_bandwidth = cpi->twopass.gf_bits;                       // Per frame bit target for the alt ref frame
-+            /* Per frame bit target for the alt ref frame */
-+            cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
-             cpi->this_frame_target = cpi->per_frame_bandwidth;
-         }
- 
-         /* One Pass ??? TBD */
--        /*else
--        {
--            int frames_in_section;
--            int allocation_chunks;
--            int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
--            int alt_boost;
--            int max_arf_rate;
--
--            alt_boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100);
--            alt_boost += (cpi->frames_till_gf_update_due * 50);
--
--            // If alt ref is not currently active then we have a pottential double hit with GF and ARF so reduce the boost a bit.
--            // A similar thing is done on GFs that preceed a arf update.
--            if ( !cpi->source_alt_ref_active )
--                alt_boost = alt_boost * 3 / 4;
--
--            frames_in_section = cpi->frames_till_gf_update_due+1;                                   // Standard frames + GF
--            allocation_chunks = (frames_in_section * 100) + alt_boost;
--
--            // Normalize Altboost and allocations chunck down to prevent overflow
--            while ( alt_boost > 1000 )
--            {
--                alt_boost /= 2;
--                allocation_chunks /= 2;
--            }
--
--            else
--            {
--                int bits_in_section;
--
--                if ( cpi->kf_overspend_bits > 0 )
--                {
--                    Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits;
--
--                    if ( Adjustment > (cpi->per_frame_bandwidth - min_frame_target) )
--                        Adjustment = (cpi->per_frame_bandwidth - min_frame_target);
--
--                    cpi->kf_overspend_bits -= Adjustment;
--
--                    // Calculate an inter frame bandwidth target for the next few frames designed to recover
--                    // any extra bits spent on the key frame.
--                    cpi->inter_frame_target = cpi->per_frame_bandwidth - Adjustment;
--                    if ( cpi->inter_frame_target < min_frame_target )
--                        cpi->inter_frame_target = min_frame_target;
--                }
--                else
--                    cpi->inter_frame_target = cpi->per_frame_bandwidth;
--
--                bits_in_section = cpi->inter_frame_target * frames_in_section;
--
--                // Avoid loss of precision but avoid overflow
--                if ( (bits_in_section>>7) > allocation_chunks )
--                    cpi->this_frame_target = alt_boost * (bits_in_section / allocation_chunks);
--                else
--                    cpi->this_frame_target = (alt_boost * bits_in_section) / allocation_chunks;
--            }
--        }
--        */
-     }
- 
--    // Normal frames (gf,and inter)
-+    /* Normal frames (gf,and inter) */
-     else
-     {
--        // 2 pass
-+        /* 2 pass */
-         if (cpi->pass == 2)
-         {
-             cpi->this_frame_target = cpi->per_frame_bandwidth;
-         }
--        // 1 pass
-+        /* 1 pass */
-         else
-         {
--            // Make rate adjustment to recover bits spent in key frame
--            // Test to see if the key frame inter data rate correction should still be in force
-+            /* Make rate adjustment to recover bits spent in key frame
-+             * Test to see if the key frame inter data rate correction
-+             * should still be in force
-+             */
-             if (cpi->kf_overspend_bits > 0)
-             {
-                 Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits;
-@@ -734,8 +671,10 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
- 
-                 cpi->kf_overspend_bits -= Adjustment;
- 
--                // Calculate an inter frame bandwidth target for the next few frames designed to recover
--                // any extra bits spent on the key frame.
-+                /* Calculate an inter frame bandwidth target for the next
-+                 * few frames designed to recover any extra bits spent on
-+                 * the key frame.
-+                 */
-                 cpi->this_frame_target = cpi->per_frame_bandwidth - Adjustment;
- 
-                 if (cpi->this_frame_target < min_frame_target)
-@@ -744,7 +683,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-             else
-                 cpi->this_frame_target = cpi->per_frame_bandwidth;
- 
--            // If appropriate make an adjustment to recover bits spent on a recent GF
-+            /* If appropriate make an adjustment to recover bits spent on a
-+             * recent GF
-+             */
-             if ((cpi->gf_overspend_bits > 0) && (cpi->this_frame_target > min_frame_target))
-             {
-                 int Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits;
-@@ -756,11 +697,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                 cpi->this_frame_target -= Adjustment;
-             }
- 
--            // Apply small + and - boosts for non gf frames
-+            /* Apply small + and - boosts for non gf frames */
-             if ((cpi->last_boost > 150) && (cpi->frames_till_gf_update_due > 0) &&
-                 (cpi->current_gf_interval >= (MIN_GF_INTERVAL << 1)))
-             {
--                // % Adjustment limited to the range 1% to 10%
-+                /* % Adjustment limited to the range 1% to 10% */
-                 Adjustment = (cpi->last_boost - 100) >> 5;
- 
-                 if (Adjustment < 1)
-@@ -768,7 +709,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                 else if (Adjustment > 10)
-                     Adjustment = 10;
- 
--                // Convert to bits
-+                /* Convert to bits */
-                 Adjustment = (cpi->this_frame_target * Adjustment) / 100;
- 
-                 if (Adjustment > (cpi->this_frame_target - min_frame_target))
-@@ -782,47 +723,53 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-         }
-     }
- 
--    // Sanity check that the total sum of adjustments is not above the maximum allowed
--    // That is that having allowed for KF and GF penalties we have not pushed the
--    // current interframe target to low. If the adjustment we apply here is not capable of recovering
--    // all the extra bits we have spent in the KF or GF then the remainder will have to be recovered over
--    // a longer time span via other buffer / rate control mechanisms.
-+    /* Sanity check that the total sum of adjustments is not above the
-+     * maximum allowed That is that having allowed for KF and GF penalties
-+     * we have not pushed the current interframe target to low. If the
-+     * adjustment we apply here is not capable of recovering all the extra
-+     * bits we have spent in the KF or GF then the remainder will have to
-+     * be recovered over a longer time span via other buffer / rate control
-+     * mechanisms.
-+     */
-     if (cpi->this_frame_target < min_frame_target)
-         cpi->this_frame_target = min_frame_target;
- 
-     if (!cpi->common.refresh_alt_ref_frame)
--        // Note the baseline target data rate for this inter frame.
-+        /* Note the baseline target data rate for this inter frame. */
-         cpi->inter_frame_target = cpi->this_frame_target;
- 
--    // One Pass specific code
-+    /* One Pass specific code */
-     if (cpi->pass == 0)
-     {
--        // Adapt target frame size with respect to any buffering constraints:
-+        /* Adapt target frame size with respect to any buffering constraints: */
-         if (cpi->buffered_mode)
-         {
--            int one_percent_bits = 1 + cpi->oxcf.optimal_buffer_level / 100;
-+            int one_percent_bits = (int)
-+                (1 + cpi->oxcf.optimal_buffer_level / 100);
- 
-             if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) ||
-                 (cpi->bits_off_target < cpi->oxcf.optimal_buffer_level))
-             {
-                 int percent_low = 0;
- 
--                // Decide whether or not we need to adjust the frame data rate target.
--                //
--                // If we are are below the optimal buffer fullness level and adherence
--                // to buffering constraints is important to the end usage then adjust
--                // the per frame target.
-+                /* Decide whether or not we need to adjust the frame data
-+                 * rate target.
-+                 *
-+                 * If we are are below the optimal buffer fullness level
-+                 * and adherence to buffering constraints is important to
-+                 * the end usage then adjust the per frame target.
-+                 */
-                 if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
-                     (cpi->buffer_level < cpi->oxcf.optimal_buffer_level))
-                 {
--                    percent_low =
--                        (cpi->oxcf.optimal_buffer_level - cpi->buffer_level) /
--                        one_percent_bits;
-+                    percent_low = (int)
-+                        ((cpi->oxcf.optimal_buffer_level - cpi->buffer_level) /
-+                        one_percent_bits);
-                 }
--                // Are we overshooting the long term clip data rate...
-+                /* Are we overshooting the long term clip data rate... */
-                 else if (cpi->bits_off_target < 0)
-                 {
--                    // Adjust per frame data target downwards to compensate.
-+                    /* Adjust per frame data target downwards to compensate. */
-                     percent_low = (int)(100 * -cpi->bits_off_target /
-                                        (cpi->total_byte_count * 8));
-                 }
-@@ -832,40 +779,46 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                 else if (percent_low < 0)
-                     percent_low = 0;
- 
--                // lower the target bandwidth for this frame.
-+                /* lower the target bandwidth for this frame. */
-                 cpi->this_frame_target -=
-                         (cpi->this_frame_target * percent_low) / 200;
- 
--                // Are we using allowing control of active_worst_allowed_q
--                // according to buffer level.
-+                /* Are we using allowing control of active_worst_allowed_q
-+                 * according to buffer level.
-+                 */
-                 if (cpi->auto_worst_q && cpi->ni_frames > 150)
-                 {
--                    int critical_buffer_level;
--
--                    // For streaming applications the most important factor is
--                    // cpi->buffer_level as this takes into account the
--                    // specified short term buffering constraints. However,
--                    // hitting the long term clip data rate target is also
--                    // important.
-+                    int64_t critical_buffer_level;
-+
-+                    /* For streaming applications the most important factor is
-+                     * cpi->buffer_level as this takes into account the
-+                     * specified short term buffering constraints. However,
-+                     * hitting the long term clip data rate target is also
-+                     * important.
-+                     */
-                     if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-                     {
--                        // Take the smaller of cpi->buffer_level and
--                        // cpi->bits_off_target
-+                        /* Take the smaller of cpi->buffer_level and
-+                         * cpi->bits_off_target
-+                         */
-                         critical_buffer_level =
-                             (cpi->buffer_level < cpi->bits_off_target)
-                             ? cpi->buffer_level : cpi->bits_off_target;
-                     }
--                    // For local file playback short term buffering constraints
--                    // are less of an issue
-+                    /* For local file playback short term buffering constraints
-+                     * are less of an issue
-+                     */
-                     else
-                     {
--                        // Consider only how we are doing for the clip as a
--                        // whole
-+                        /* Consider only how we are doing for the clip as a
-+                         * whole
-+                         */
-                         critical_buffer_level = cpi->bits_off_target;
-                     }
- 
--                    // Set the active worst quality based upon the selected
--                    // buffer fullness number.
-+                    /* Set the active worst quality based upon the selected
-+                     * buffer fullness number.
-+                     */
-                     if (critical_buffer_level < cpi->oxcf.optimal_buffer_level)
-                     {
-                         if ( critical_buffer_level >
-@@ -877,15 +830,16 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                                       (critical_buffer_level -
-                                        (cpi->oxcf.optimal_buffer_level >> 2));
- 
--                            // Step active worst quality down from
--                            // cpi->ni_av_qi when (critical_buffer_level ==
--                            // cpi->optimal_buffer_level) to
--                            // cpi->worst_quality when
--                            // (critical_buffer_level ==
--                            //     cpi->optimal_buffer_level >> 2)
-+                            /* Step active worst quality down from
-+                             * cpi->ni_av_qi when (critical_buffer_level ==
-+                             * cpi->optimal_buffer_level) to
-+                             * cpi->worst_quality when
-+                             * (critical_buffer_level ==
-+                             *     cpi->optimal_buffer_level >> 2)
-+                             */
-                             cpi->active_worst_quality =
-                                 cpi->worst_quality -
--                                ((qadjustment_range * above_base) /
-+                                (int)((qadjustment_range * above_base) /
-                                  (cpi->oxcf.optimal_buffer_level*3>>2));
-                         }
-                         else
-@@ -910,9 +864,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                 if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-                      && (cpi->buffer_level > cpi->oxcf.optimal_buffer_level))
-                 {
--                    percent_high = (cpi->buffer_level
-+                    percent_high = (int)((cpi->buffer_level
-                                     - cpi->oxcf.optimal_buffer_level)
--                                   / one_percent_bits;
-+                                   / one_percent_bits);
-                 }
-                 else if (cpi->bits_off_target > cpi->oxcf.optimal_buffer_level)
-                 {
-@@ -928,11 +882,14 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                 cpi->this_frame_target += (cpi->this_frame_target *
-                                           percent_high) / 200;
- 
--                // Are we allowing control of active_worst_allowed_q according
--                // to buffer level.
-+                /* Are we allowing control of active_worst_allowed_q according
-+                 * to buffer level.
-+                 */
-                 if (cpi->auto_worst_q && cpi->ni_frames > 150)
-                 {
--                    // When using the relaxed buffer model stick to the user specified value
-+                    /* When using the relaxed buffer model stick to the
-+                     * user specified value
-+                     */
-                     cpi->active_worst_quality = cpi->ni_av_qi;
-                 }
-                 else
-@@ -941,26 +898,27 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                 }
-             }
- 
--            // Set active_best_quality to prevent quality rising too high
-+            /* Set active_best_quality to prevent quality rising too high */
-             cpi->active_best_quality = cpi->best_quality;
- 
--            // Worst quality obviously must not be better than best quality
-+            /* Worst quality obviously must not be better than best quality */
-             if (cpi->active_worst_quality <= cpi->active_best_quality)
-                 cpi->active_worst_quality = cpi->active_best_quality + 1;
- 
-             if(cpi->active_worst_quality > 127)
-                 cpi->active_worst_quality = 127;
-         }
--        // Unbuffered mode (eg. video conferencing)
-+        /* Unbuffered mode (eg. video conferencing) */
-         else
-         {
--            // Set the active worst quality
-+            /* Set the active worst quality */
-             cpi->active_worst_quality = cpi->worst_quality;
-         }
- 
--        // Special trap for constrained quality mode
--        // "active_worst_quality" may never drop below cq level
--        // for any frame type.
-+        /* Special trap for constrained quality mode
-+         * "active_worst_quality" may never drop below cq level
-+         * for any frame type.
-+         */
-         if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
-              cpi->active_worst_quality < cpi->cq_target_quality)
-         {
-@@ -968,16 +926,19 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-         }
-     }
- 
--    // Test to see if we have to drop a frame
--    // The auto-drop frame code is only used in buffered mode.
--    // In unbufferd mode (eg vide conferencing) the descision to
--    // code or drop a frame is made outside the codec in response to real
--    // world comms or buffer considerations.
--    if (cpi->drop_frames_allowed && cpi->buffered_mode &&
-+    /* Test to see if we have to drop a frame
-+     * The auto-drop frame code is only used in buffered mode.
-+     * In unbufferd mode (eg vide conferencing) the descision to
-+     * code or drop a frame is made outside the codec in response to real
-+     * world comms or buffer considerations.
-+     */
-+    if (cpi->drop_frames_allowed &&
-         (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
--        ((cpi->common.frame_type != KEY_FRAME))) //|| !cpi->oxcf.allow_spatial_resampling) )
-+        ((cpi->common.frame_type != KEY_FRAME)))
-     {
--        // Check for a buffer underun-crisis in which case we have to drop a frame
-+        /* Check for a buffer underun-crisis in which case we have to drop
-+         * a frame
-+         */
-         if ((cpi->buffer_level < 0))
-         {
- #if 0
-@@ -988,41 +949,23 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                     (cpi->buffer_level * 100) / cpi->oxcf.optimal_buffer_level);
-             fclose(f);
- #endif
--            //vpx_log("Decoder: Drop frame due to bandwidth: %d \n",cpi->buffer_level, cpi->av_per_frame_bandwidth);
--
--            cpi->drop_frame = 1;
--        }
--
--#if 0
--        // Check for other drop frame crtieria (Note 2 pass cbr uses decimation on whole KF sections)
--        else if ((cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) &&
--                 (cpi->drop_count < cpi->max_drop_count) && (cpi->pass == 0))
--        {
-             cpi->drop_frame = 1;
--        }
--
--#endif
- 
--        if (cpi->drop_frame)
--        {
--            // Update the buffer level variable.
-+            /* Update the buffer level variable. */
-             cpi->bits_off_target += cpi->av_per_frame_bandwidth;
-             if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
--              cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
-+              cpi->bits_off_target = (int)cpi->oxcf.maximum_buffer_size;
-             cpi->buffer_level = cpi->bits_off_target;
-         }
--        else
--            cpi->drop_count = 0;
-     }
- 
--    // Adjust target frame size for Golden Frames:
-+    /* Adjust target frame size for Golden Frames: */
-     if (cpi->oxcf.error_resilient_mode == 0 &&
-         (cpi->frames_till_gf_update_due == 0) && !cpi->drop_frame)
-     {
--        //int Boost = 0;
-         int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
- 
--        int gf_frame_useage = 0;      // Golden frame useage since last GF
-+        int gf_frame_useage = 0;      /* Golden frame useage since last GF */
-         int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME]  +
-                       cpi->recent_ref_frame_usage[LAST_FRAME]   +
-                       cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
-@@ -1030,30 +973,29 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
- 
-         int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
- 
--        // Reset the last boost indicator
--        //cpi->last_boost = 100;
--
-         if (tot_mbs)
-             gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs;
- 
-         if (pct_gf_active > gf_frame_useage)
-             gf_frame_useage = pct_gf_active;
- 
--        // Is a fixed manual GF frequency being used
-+        /* Is a fixed manual GF frequency being used */
-         if (cpi->auto_gold)
-         {
--            // For one pass throw a GF if recent frame intra useage is low or the GF useage is high
-+            /* For one pass throw a GF if recent frame intra useage is
-+             * low or the GF useage is high
-+             */
-             if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5))
-                 cpi->common.refresh_golden_frame = 1;
- 
--            // Two pass GF descision
-+            /* Two pass GF descision */
-             else if (cpi->pass == 2)
-                 cpi->common.refresh_golden_frame = 1;
-         }
- 
- #if 0
- 
--        // Debug stats
-+        /* Debug stats */
-         if (0)
-         {
-             FILE *f;
-@@ -1070,7 +1012,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-         {
- #if 0
- 
--            if (0)   // p_gw
-+            if (0)
-             {
-                 FILE *f;
- 
-@@ -1086,16 +1028,20 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                 calc_gf_params(cpi);
-             }
- 
--            // If we are using alternate ref instead of gf then do not apply the boost
--            // It will instead be applied to the altref update
--            // Jims modified boost
-+            /* If we are using alternate ref instead of gf then do not apply the
-+             * boost It will instead be applied to the altref update Jims
-+             * modified boost
-+             */
-             if (!cpi->source_alt_ref_active)
-             {
-                 if (cpi->oxcf.fixed_q < 0)
-                 {
-                     if (cpi->pass == 2)
-                     {
--                        cpi->this_frame_target = cpi->per_frame_bandwidth;          // The spend on the GF is defined in the two pass code for two pass encodes
-+                        /* The spend on the GF is defined in the two pass
-+                         * code for two pass encodes
-+                         */
-+                        cpi->this_frame_target = cpi->per_frame_bandwidth;
-                     }
-                     else
-                     {
-@@ -1104,14 +1050,16 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                         int allocation_chunks = (frames_in_section * 100) + (Boost - 100);
-                         int bits_in_section = cpi->inter_frame_target * frames_in_section;
- 
--                        // Normalize Altboost and allocations chunck down to prevent overflow
-+                        /* Normalize Altboost and allocations chunck down to
-+                         * prevent overflow
-+                         */
-                         while (Boost > 1000)
-                         {
-                             Boost /= 2;
-                             allocation_chunks /= 2;
-                         }
- 
--                        // Avoid loss of precision but avoid overflow
-+                        /* Avoid loss of precision but avoid overflow */
-                         if ((bits_in_section >> 7) > allocation_chunks)
-                             cpi->this_frame_target = Boost * (bits_in_section / allocation_chunks);
-                         else
-@@ -1124,10 +1072,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
-                          * cpi->last_boost) / 100;
- 
-             }
--            // If there is an active ARF at this location use the minimum
--            // bits on this frame even if it is a contructed arf.
--            // The active maximum quantizer insures that an appropriate
--            // number of bits will be spent if needed for contstructed ARFs.
-+            /* If there is an active ARF at this location use the minimum
-+             * bits on this frame even if it is a contructed arf.
-+             * The active maximum quantizer insures that an appropriate
-+             * number of bits will be spent if needed for contstructed ARFs.
-+             */
-             else
-             {
-                 cpi->this_frame_target = 0;
-@@ -1151,8 +1100,8 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
- 
-     int    projected_size_based_on_q = 0;
- 
--    // Clear down mmx registers to allow floating point in what follows
--    vp8_clear_system_state();  //__asm emms;
-+    /* Clear down mmx registers to allow floating point in what follows */
-+    vp8_clear_system_state();
- 
-     if (cpi->common.frame_type == KEY_FRAME)
-     {
-@@ -1160,23 +1109,26 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
-     }
-     else
-     {
--        if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
-+        if (cpi->oxcf.number_of_layers == 1 &&
-+           (cpi->common.refresh_alt_ref_frame ||
-+            cpi->common.refresh_golden_frame))
-             rate_correction_factor = cpi->gf_rate_correction_factor;
-         else
-             rate_correction_factor = cpi->rate_correction_factor;
-     }
- 
--    // Work out how big we would have expected the frame to be at this Q given the current correction factor.
--    // Stay in double to avoid int overflow when values are large
--    //projected_size_based_on_q = ((int)(.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) >> BPER_MB_NORMBITS;
-+    /* Work out how big we would have expected the frame to be at this Q
-+     * given the current correction factor. Stay in double to avoid int
-+     * overflow when values are large
-+     */
-     projected_size_based_on_q = (int)(((.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) / (1 << BPER_MB_NORMBITS));
- 
--    // Make some allowance for cpi->zbin_over_quant
--    if (cpi->zbin_over_quant > 0)
-+    /* Make some allowance for cpi->zbin_over_quant */
-+    if (cpi->mb.zbin_over_quant > 0)
-     {
--        int Z = cpi->zbin_over_quant;
-+        int Z = cpi->mb.zbin_over_quant;
-         double Factor = 0.99;
--        double factor_adjustment = 0.01 / 256.0; //(double)ZBIN_OQ_MAX;
-+        double factor_adjustment = 0.01 / 256.0;
- 
-         while (Z > 0)
-         {
-@@ -1190,13 +1142,13 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
-         }
-     }
- 
--    // Work out a size correction factor.
--    //if ( cpi->this_frame_target > 0 )
--    //  correction_factor = (100 * cpi->projected_frame_size) / cpi->this_frame_target;
-+    /* Work out a size correction factor. */
-     if (projected_size_based_on_q > 0)
-         correction_factor = (100 * cpi->projected_frame_size) / projected_size_based_on_q;
- 
--    // More heavily damped adjustment used if we have been oscillating either side of target
-+    /* More heavily damped adjustment used if we have been oscillating
-+     * either side of target
-+     */
-     switch (damp_var)
-     {
-     case 0:
-@@ -1211,25 +1163,23 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
-         break;
-     }
- 
--    //if ( (correction_factor > 102) && (Q < cpi->active_worst_quality) )
-     if (correction_factor > 102)
-     {
--        // We are not already at the worst allowable quality
-+        /* We are not already at the worst allowable quality */
-         correction_factor = (int)(100.5 + ((correction_factor - 100) * adjustment_limit));
-         rate_correction_factor = ((rate_correction_factor * correction_factor) / 100);
- 
--        // Keep rate_correction_factor within limits
-+        /* Keep rate_correction_factor within limits */
-         if (rate_correction_factor > MAX_BPB_FACTOR)
-             rate_correction_factor = MAX_BPB_FACTOR;
-     }
--    //else if ( (correction_factor < 99) && (Q > cpi->active_best_quality) )
-     else if (correction_factor < 99)
-     {
--        // We are not already at the best allowable quality
-+        /* We are not already at the best allowable quality */
-         correction_factor = (int)(100.5 - ((100 - correction_factor) * adjustment_limit));
-         rate_correction_factor = ((rate_correction_factor * correction_factor) / 100);
- 
--        // Keep rate_correction_factor within limits
-+        /* Keep rate_correction_factor within limits */
-         if (rate_correction_factor < MIN_BPB_FACTOR)
-             rate_correction_factor = MIN_BPB_FACTOR;
-     }
-@@ -1238,7 +1188,9 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
-         cpi->key_frame_rate_correction_factor = rate_correction_factor;
-     else
-     {
--        if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
-+        if (cpi->oxcf.number_of_layers == 1 &&
-+           (cpi->common.refresh_alt_ref_frame ||
-+            cpi->common.refresh_golden_frame))
-             cpi->gf_rate_correction_factor = rate_correction_factor;
-         else
-             cpi->rate_correction_factor = rate_correction_factor;
-@@ -1250,8 +1202,8 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
- {
-     int Q = cpi->active_worst_quality;
- 
--    // Reset Zbin OQ value
--    cpi->zbin_over_quant = 0;
-+    /* Reset Zbin OQ value */
-+    cpi->mb.zbin_over_quant = 0;
- 
-     if (cpi->oxcf.fixed_q >= 0)
-     {
-@@ -1261,11 +1213,13 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
-         {
-             Q = cpi->oxcf.key_q;
-         }
--        else if (cpi->common.refresh_alt_ref_frame)
-+        else if (cpi->oxcf.number_of_layers == 1 &&
-+            cpi->common.refresh_alt_ref_frame)
-         {
-             Q = cpi->oxcf.alt_q;
-         }
--        else if (cpi->common.refresh_golden_frame)
-+        else if (cpi->oxcf.number_of_layers == 1  &&
-+            cpi->common.refresh_golden_frame)
-         {
-             Q = cpi->oxcf.gold_q;
-         }
-@@ -1279,20 +1233,25 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
-         int bits_per_mb_at_this_q;
-         double correction_factor;
- 
--        // Select the appropriate correction factor based upon type of frame.
-+        /* Select the appropriate correction factor based upon type of frame. */
-         if (cpi->common.frame_type == KEY_FRAME)
-             correction_factor = cpi->key_frame_rate_correction_factor;
-         else
-         {
--            if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
-+            if (cpi->oxcf.number_of_layers == 1 &&
-+               (cpi->common.refresh_alt_ref_frame ||
-+                cpi->common.refresh_golden_frame))
-                 correction_factor = cpi->gf_rate_correction_factor;
-             else
-                 correction_factor = cpi->rate_correction_factor;
-         }
- 
--        // Calculate required scaling factor based on target frame size and size of frame produced using previous Q
-+        /* Calculate required scaling factor based on target frame size and
-+         * size of frame produced using previous Q
-+         */
-         if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS))
--            target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS;       // Case where we would overflow int
-+            /* Case where we would overflow int */
-+            target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS;
-         else
-             target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs;
- 
-@@ -1317,18 +1276,23 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
-         while (++i <= cpi->active_worst_quality);
- 
- 
--        // If we are at MAXQ then enable Q over-run which seeks to claw back additional bits through things like
--        // the RD multiplier and zero bin size.
-+        /* If we are at MAXQ then enable Q over-run which seeks to claw
-+         * back additional bits through things like the RD multiplier
-+         * and zero bin size.
-+         */
-         if (Q >= MAXQ)
-         {
-             int zbin_oqmax;
- 
-             double Factor = 0.99;
--            double factor_adjustment = 0.01 / 256.0; //(double)ZBIN_OQ_MAX;
-+            double factor_adjustment = 0.01 / 256.0;
- 
-             if (cpi->common.frame_type == KEY_FRAME)
--                zbin_oqmax = 0; //ZBIN_OQ_MAX/16
--            else if (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active))
-+                zbin_oqmax = 0;
-+            else if (cpi->oxcf.number_of_layers == 1 &&
-+                (cpi->common.refresh_alt_ref_frame ||
-+                (cpi->common.refresh_golden_frame &&
-+                 !cpi->source_alt_ref_active)))
-                 zbin_oqmax = 16;
-             else
-                 zbin_oqmax = ZBIN_OQ_MAX;
-@@ -1347,25 +1311,29 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
-                 cpi->zbin_over_quant = (int)Oq;
-             }*/
- 
--            // Each incrment in the zbin is assumed to have a fixed effect on bitrate. This is not of course true.
--            // The effect will be highly clip dependent and may well have sudden steps.
--            // The idea here is to acheive higher effective quantizers than the normal maximum by expanding the zero
--            // bin and hence decreasing the number of low magnitude non zero coefficients.
--            while (cpi->zbin_over_quant < zbin_oqmax)
-+            /* Each incrment in the zbin is assumed to have a fixed effect
-+             * on bitrate. This is not of course true. The effect will be
-+             * highly clip dependent and may well have sudden steps. The
-+             * idea here is to acheive higher effective quantizers than the
-+             * normal maximum by expanding the zero bin and hence
-+             * decreasing the number of low magnitude non zero coefficients.
-+             */
-+            while (cpi->mb.zbin_over_quant < zbin_oqmax)
-             {
--                cpi->zbin_over_quant ++;
-+                cpi->mb.zbin_over_quant ++;
- 
--                if (cpi->zbin_over_quant > zbin_oqmax)
--                    cpi->zbin_over_quant = zbin_oqmax;
-+                if (cpi->mb.zbin_over_quant > zbin_oqmax)
-+                    cpi->mb.zbin_over_quant = zbin_oqmax;
- 
--                // Adjust bits_per_mb_at_this_q estimate
-+                /* Adjust bits_per_mb_at_this_q estimate */
-                 bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q);
-                 Factor += factor_adjustment;
- 
-                 if (Factor  >= 0.999)
-                     Factor = 0.999;
- 
--                if (bits_per_mb_at_this_q <= target_bits_per_mb)    // Break out if we get down to the target rate
-+                /* Break out if we get down to the target rate */
-+                if (bits_per_mb_at_this_q <= target_bits_per_mb)
-                     break;
-             }
- 
-@@ -1380,7 +1348,7 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi)
- {
-     int i;
- 
--    // Average key frame frequency
-+    /* Average key frame frequency */
-     int av_key_frame_frequency = 0;
- 
-     /* First key frame at start of sequence is a special case. We have no
-@@ -1431,11 +1399,11 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi)
- 
- void vp8_adjust_key_frame_context(VP8_COMP *cpi)
- {
--    // Clear down mmx registers to allow floating point in what follows
-+    /* Clear down mmx registers to allow floating point in what follows */
-     vp8_clear_system_state();
- 
--    // Do we have any key frame overspend to recover?
--    // Two-pass overspend handled elsewhere.
-+    /* Do we have any key frame overspend to recover? */
-+    /* Two-pass overspend handled elsewhere. */
-     if ((cpi->pass != 2)
-          && (cpi->projected_frame_size > cpi->per_frame_bandwidth))
-     {
-@@ -1469,10 +1437,12 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi)
- 
- void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit)
- {
--    // Set-up bounds on acceptable frame size:
-+    /* Set-up bounds on acceptable frame size: */
-     if (cpi->oxcf.fixed_q >= 0)
-     {
--        // Fixed Q scenario: frame size never outranges target (there is no target!)
-+        /* Fixed Q scenario: frame size never outranges target
-+         * (there is no target!)
-+         */
-         *frame_under_shoot_limit = 0;
-         *frame_over_shoot_limit  = INT_MAX;
-     }
-@@ -1494,18 +1464,22 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
-             }
-             else
-             {
--                // For CBR take buffer fullness into account
-+                /* For CBR take buffer fullness into account */
-                 if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-                 {
-                     if (cpi->buffer_level >= ((cpi->oxcf.optimal_buffer_level + cpi->oxcf.maximum_buffer_size) >> 1))
-                     {
--                        // Buffer is too full so relax overshoot and tighten undershoot
-+                        /* Buffer is too full so relax overshoot and tighten
-+                         * undershoot
-+                         */
-                         *frame_over_shoot_limit  = cpi->this_frame_target * 12 / 8;
-                         *frame_under_shoot_limit = cpi->this_frame_target * 6 / 8;
-                     }
-                     else if (cpi->buffer_level <= (cpi->oxcf.optimal_buffer_level >> 1))
-                     {
--                        // Buffer is too low so relax undershoot and tighten overshoot
-+                        /* Buffer is too low so relax undershoot and tighten
-+                         * overshoot
-+                         */
-                         *frame_over_shoot_limit  = cpi->this_frame_target * 10 / 8;
-                         *frame_under_shoot_limit = cpi->this_frame_target * 4 / 8;
-                     }
-@@ -1515,11 +1489,13 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
-                         *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8;
-                     }
-                 }
--                // VBR and CQ mode
--                // Note that tighter restrictions here can help quality but hurt encode speed
-+                /* VBR and CQ mode */
-+                /* Note that tighter restrictions here can help quality
-+                 * but hurt encode speed
-+                 */
-                 else
-                 {
--                    // Stron overshoot limit for constrained quality
-+                    /* Stron overshoot limit for constrained quality */
-                     if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
-                     {
-                         *frame_over_shoot_limit  = cpi->this_frame_target * 11 / 8;
-@@ -1534,9 +1510,10 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
-             }
-         }
- 
--        // For very small rate targets where the fractional adjustment
--        // (eg * 7/8) may be tiny make sure there is at least a minimum
--        // range.
-+        /* For very small rate targets where the fractional adjustment
-+         * (eg * 7/8) may be tiny make sure there is at least a minimum
-+         * range.
-+         */
-         *frame_over_shoot_limit += 200;
-         *frame_under_shoot_limit -= 200;
-         if ( *frame_under_shoot_limit < 0 )
-@@ -1546,7 +1523,7 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
- }
- 
- 
--// return of 0 means drop frame
-+/* return of 0 means drop frame */
- int vp8_pick_frame_size(VP8_COMP *cpi)
- {
-     VP8_COMMON *cm = &cpi->common;
-@@ -1557,11 +1534,10 @@ int vp8_pick_frame_size(VP8_COMP *cpi)
-     {
-         calc_pframe_target_size(cpi);
- 
--        // Check if we're dropping the frame:
-+        /* Check if we're dropping the frame: */
-         if (cpi->drop_frame)
-         {
-             cpi->drop_frame = 0;
--            cpi->drop_count++;
-             return 0;
-         }
-     }
-diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h
-index d4f7796..c43f08d 100644
---- a/vp8/encoder/ratectrl.h
-+++ b/vp8/encoder/ratectrl.h
-@@ -22,7 +22,7 @@ extern int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame);
- extern void vp8_adjust_key_frame_context(VP8_COMP *cpi);
- extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit);
- 
--// return of 0 means drop frame
-+/* return of 0 means drop frame */
- extern int vp8_pick_frame_size(VP8_COMP *cpi);
- 
- #endif
-diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
-index 2b706ba..ceb817c 100644
---- a/vp8/encoder/rdopt.c
-+++ b/vp8/encoder/rdopt.c
-@@ -21,6 +21,7 @@
- #include "onyx_int.h"
- #include "modecosts.h"
- #include "encodeintra.h"
-+#include "pickinter.h"
- #include "vp8/common/entropymode.h"
- #include "vp8/common/reconinter.h"
- #include "vp8/common/reconintra4x4.h"
-@@ -36,7 +37,6 @@
- #if CONFIG_TEMPORAL_DENOISING
- #include "denoising.h"
- #endif
--
- extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
- 
- #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
-@@ -149,8 +149,8 @@ const int vp8_ref_frame_order[MAX_MODES] =
- };
- 
- static void fill_token_costs(
--    unsigned int c      [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS],
--    const vp8_prob p    [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]
-+    int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
-+    const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]
- )
- {
-     int i, j, k;
-@@ -159,21 +159,26 @@ static void fill_token_costs(
-     for (i = 0; i < BLOCK_TYPES; i++)
-         for (j = 0; j < COEF_BANDS; j++)
-             for (k = 0; k < PREV_COEF_CONTEXTS; k++)
--                // check for pt=0 and band > 1 if block type 0 and 0 if blocktype 1
--                if(k==0 && j>(i==0) )
--                    vp8_cost_tokens2((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree,2);
-+
-+                /* check for pt=0 and band > 1 if block type 0
-+                 * and 0 if blocktype 1
-+                 */
-+                if (k == 0 && j > (i == 0))
-+                    vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2);
-                 else
--                    vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree);
-+                    vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree);
- }
- 
--static int rd_iifactor [ 32 ] =  {    4,   4,   3,   2,   1,   0,   0,   0,
--                                      0,   0,   0,   0,   0,   0,   0,   0,
--                                      0,   0,   0,   0,   0,   0,   0,   0,
--                                      0,   0,   0,   0,   0,   0,   0,   0,
--                                 };
-+static const int rd_iifactor[32] =
-+{
-+    4, 4, 3, 2, 1, 0, 0, 0,
-+    0, 0, 0, 0, 0, 0, 0, 0,
-+    0, 0, 0, 0, 0, 0, 0, 0,
-+    0, 0, 0, 0, 0, 0, 0, 0
-+};
- 
- /* values are now correlated to quantizer */
--static int sad_per_bit16lut[QINDEX_RANGE] =
-+static const int sad_per_bit16lut[QINDEX_RANGE] =
- {
-     2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,
-@@ -192,7 +197,7 @@ static int sad_per_bit16lut[QINDEX_RANGE] =
-     11, 11, 11, 11, 12, 12, 12, 12,
-     12, 12, 13, 13, 13, 13, 14, 14
- };
--static int sad_per_bit4lut[QINDEX_RANGE] =
-+static const int sad_per_bit4lut[QINDEX_RANGE] =
- {
-     2,  2,  2,  2,  2,  2,  3,  3,
-     3,  3,  3,  3,  3,  3,  3,  3,
-@@ -218,30 +223,30 @@ void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
-     cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
- }
- 
--void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
-+void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)
- {
-     int q;
-     int i;
-     double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
-     double rdconst = 2.80;
- 
--    vp8_clear_system_state();  //__asm emms;
-+    vp8_clear_system_state();
- 
--    // Further tests required to see if optimum is different
--    // for key frames, golden frames and arf frames.
--    // if (cpi->common.refresh_golden_frame ||
--    //     cpi->common.refresh_alt_ref_frame)
-+    /* Further tests required to see if optimum is different
-+     * for key frames, golden frames and arf frames.
-+     */
-     cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
- 
--    // Extend rate multiplier along side quantizer zbin increases
--    if (cpi->zbin_over_quant  > 0)
-+    /* Extend rate multiplier along side quantizer zbin increases */
-+    if (cpi->mb.zbin_over_quant  > 0)
-     {
-         double oq_factor;
-         double modq;
- 
--        // Experimental code using the same basic equation as used for Q above
--        // The units of cpi->zbin_over_quant are 1/128 of Q bin size
--        oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
-+        /* Experimental code using the same basic equation as used for Q above
-+         * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
-+         */
-+        oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
-         modq = (int)((double)capped_q * oq_factor);
-         cpi->RDMULT = (int)(rdconst * (modq * modq));
-     }
-@@ -260,6 +265,11 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
- 
-     vp8_set_speed_features(cpi);
- 
-+    for (i = 0; i < MAX_MODES; i++)
-+    {
-+        x->mode_test_hit_counts[i] = 0;
-+    }
-+
-     q = (int)pow(Qvalue, 1.25);
- 
-     if (q < 8)
-@@ -274,14 +284,14 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
-         {
-             if (cpi->sf.thresh_mult[i] < INT_MAX)
-             {
--                cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
-+                x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
-             }
-             else
-             {
--                cpi->rd_threshes[i] = INT_MAX;
-+                x->rd_threshes[i] = INT_MAX;
-             }
- 
--            cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
-+            cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
-         }
-     }
-     else
-@@ -292,19 +302,19 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
-         {
-             if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
-             {
--                cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
-+                x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
-             }
-             else
-             {
--                cpi->rd_threshes[i] = INT_MAX;
-+                x->rd_threshes[i] = INT_MAX;
-             }
- 
--            cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
-+            cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
-         }
-     }
- 
-     {
--      // build token cost array for the type of frame we have now
-+      /* build token cost array for the type of frame we have now */
-       FRAME_CONTEXT *l = &cpi->lfc_n;
- 
-       if(cpi->common.refresh_alt_ref_frame)
-@@ -323,12 +333,8 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
-       */
- 
- 
--      // TODO make these mode costs depend on last,alt or gold too.  (jbb)
-+      /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
-       vp8_init_mode_costs(cpi);
--
--      // TODO figure onnnnuut why making mv cost frame type dependent didn't help (jbb)
--      //vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) l->mvc, flags);
--
-     }
- 
- }
-@@ -353,14 +359,6 @@ void vp8_auto_select_speed(VP8_COMP *cpi)
- 
- #endif
- 
--    /*
--    // this is done during parameter valid check
--    if( cpi->oxcf.cpu_used > 16)
--        cpi->oxcf.cpu_used = 16;
--    if( cpi->oxcf.cpu_used < -16)
--        cpi->oxcf.cpu_used = -16;
--    */
--
-     if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
-     {
-         if (cpi->avg_pick_mode_time == 0)
-@@ -387,10 +385,10 @@ void vp8_auto_select_speed(VP8_COMP *cpi)
-                 cpi->avg_pick_mode_time = 0;
-                 cpi->avg_encode_time = 0;
- 
--                // In real-time mode, cpi->speed is in [4, 16].
--                if (cpi->Speed < 4)        //if ( cpi->Speed < 0 )
-+                /* In real-time mode, cpi->speed is in [4, 16]. */
-+                if (cpi->Speed < 4)
-                 {
--                    cpi->Speed = 4;        //cpi->Speed = 0;
-+                    cpi->Speed = 4;
-                 }
-             }
-         }
-@@ -546,7 +544,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
-     if (c < 16)
-         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
- 
--    pt = (c != !type); // is eob first coefficient;
-+    pt = (c != !type); /* is eob first coefficient; */
-     *a = *l = pt;
- 
-     return cost;
-@@ -592,7 +590,7 @@ static void macro_block_yrd( MACROBLOCK *mb,
-     vp8_subtract_mby( mb->src_diff, *(mb->block[0].base_src),
-         mb->block[0].src_stride,  mb->e_mbd.predictor, 16);
- 
--    // Fdct and building the 2nd order block
-+    /* Fdct and building the 2nd order block */
-     for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
-     {
-         mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
-@@ -600,25 +598,25 @@ static void macro_block_yrd( MACROBLOCK *mb,
-         *Y2DCPtr++ = beptr->coeff[16];
-     }
- 
--    // 2nd order fdct
-+    /* 2nd order fdct */
-     mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
- 
--    // Quantization
-+    /* Quantization */
-     for (b = 0; b < 16; b++)
-     {
-         mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
-     }
- 
--    // DC predication and Quantization of 2nd Order block
-+    /* DC predication and Quantization of 2nd Order block */
-     mb->quantize_b(mb_y2, x_y2);
- 
--    // Distortion
-+    /* Distortion */
-     d = vp8_mbblock_error(mb, 1) << 2;
-     d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
- 
-     *Distortion = (d >> 4);
- 
--    // rate
-+    /* rate */
-     *Rate = vp8_rdcost_mby(mb);
- }
- 
-@@ -632,12 +630,11 @@ static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
-     d[12] = p[12];
- }
- static int rd_pick_intra4x4block(
--    VP8_COMP *cpi,
-     MACROBLOCK *x,
-     BLOCK *be,
-     BLOCKD *b,
-     B_PREDICTION_MODE *best_mode,
--    unsigned int *bmode_costs,
-+    const int *bmode_costs,
-     ENTROPY_CONTEXT *a,
-     ENTROPY_CONTEXT *l,
- 
-@@ -660,7 +657,11 @@ static int rd_pick_intra4x4block(
-     DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16*4);
-     DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
-     int dst_stride = x->e_mbd.dst.y_stride;
--    unsigned char *base_dst = x->e_mbd.dst.y_buffer;
-+    unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
-+
-+    unsigned char *Above = dst - dst_stride;
-+    unsigned char *yleft = dst - 1;
-+    unsigned char top_left = Above[-1];
- 
-     for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
-     {
-@@ -669,8 +670,8 @@ static int rd_pick_intra4x4block(
- 
-         rate = bmode_costs[mode];
- 
--        vp8_intra4x4_predict(base_dst + b->offset, dst_stride, mode,
--                             b->predictor, 16);
-+        vp8_intra4x4_predict(Above, yleft, dst_stride, mode,
-+                             b->predictor, 16, top_left);
-         vp8_subtract_b(be, b, 16);
-         x->short_fdct4x4(be->src_diff, be->coeff, 32);
-         x->quantize_b(be, b);
-@@ -697,15 +698,14 @@ static int rd_pick_intra4x4block(
-             vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
-         }
-     }
--    b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode);
-+    b->bmi.as_mode = *best_mode;
- 
--    vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, base_dst + b->offset,
--                         dst_stride);
-+    vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
- 
-     return best_rd;
- }
- 
--static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
-+static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,
-                                      int *rate_y, int *Distortion, int best_rd)
- {
-     MACROBLOCKD *const xd = &mb->e_mbd;
-@@ -717,7 +717,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
-     ENTROPY_CONTEXT_PLANES t_above, t_left;
-     ENTROPY_CONTEXT *ta;
-     ENTROPY_CONTEXT *tl;
--    unsigned int *bmode_costs;
-+    const int *bmode_costs;
- 
-     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
-     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
-@@ -745,7 +745,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
-         }
- 
-         total_rd += rd_pick_intra4x4block(
--            cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
-+            mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
-             ta + vp8_block2above[i],
-             tl + vp8_block2left[i], &r, &ry, &d);
- 
-@@ -770,8 +770,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
- }
- 
- 
--static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
--                                      MACROBLOCK *x,
-+static int rd_pick_intra16x16mby_mode(MACROBLOCK *x,
-                                       int *Rate,
-                                       int *rate_y,
-                                       int *Distortion)
-@@ -784,7 +783,7 @@ static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
-     int this_rd;
-     MACROBLOCKD *xd = &x->e_mbd;
- 
--    //Y Search for 16x16 intra prediction mode
-+    /* Y Search for 16x16 intra prediction mode */
-     for (mode = DC_PRED; mode <= TM_PRED; mode++)
-     {
-         xd->mode_info_context->mbmi.mode = mode;
-@@ -873,7 +872,8 @@ static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
-     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
- }
- 
--static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
-+static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
-+                                    int *rate_tokenonly, int *distortion)
- {
-     MB_PREDICTION_MODE mode;
-     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
-@@ -981,8 +981,9 @@ static int labels2mode(
-             m = ABOVE4X4;
-         else
-         {
--            // the only time we should do costing for new motion vector or mode
--            // is when we are on a new label  (jbb May 08, 2007)
-+            /* the only time we should do costing for new motion vector
-+             * or mode is when we are on a new label  (jbb May 08, 2007)
-+             */
-             switch (m = this_mode)
-             {
-             case NEW4X4 :
-@@ -1001,7 +1002,7 @@ static int labels2mode(
-                 break;
-             }
- 
--            if (m == ABOVE4X4)  // replace above with left if same
-+            if (m == ABOVE4X4)  /* replace above with left if same */
-             {
-                 int_mv left_mv;
- 
-@@ -1062,9 +1063,6 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels
-             vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict);
-             vp8_subtract_b(be, bd, 16);
-             x->short_fdct4x4(be->src_diff, be->coeff, 32);
--
--            // set to 0 no way to account for 2nd order DC so discount
--            //be->coeff[0] = 0;
-             x->quantize_b(be, bd);
- 
-             distortion += vp8_block_error(be->coeff, bd->dqcoeff);
-@@ -1095,8 +1093,8 @@ typedef struct
-   int mvthresh;
-   int *mdcounts;
- 
--  int_mv sv_mvp[4];     // save 4 mvp from 8x8
--  int sv_istep[2];  // save 2 initial step_param for 16x8/8x16
-+  int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
-+  int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
- 
- } BEST_SEG_INFO;
- 
-@@ -1143,13 +1141,13 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
-     labels = vp8_mbsplits[segmentation];
-     label_count = vp8_mbsplit_count[segmentation];
- 
--    // 64 makes this threshold really big effectively
--    // making it so that we very rarely check mvs on
--    // segments.   setting this to 1 would make mv thresh
--    // roughly equal to what it is for macroblocks
-+    /* 64 makes this threshold really big effectively making it so that we
-+     * very rarely check mvs on segments.   setting this to 1 would make mv
-+     * thresh roughly equal to what it is for macroblocks
-+     */
-     label_mv_thresh = 1 * bsi->mvthresh / label_count ;
- 
--    // Segmentation method overheads
-+    /* Segmentation method overheads */
-     rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
-     rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
-     this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
-@@ -1162,7 +1160,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
-         B_PREDICTION_MODE mode_selected = ZERO4X4;
-         int bestlabelyrate = 0;
- 
--        // search for the best motion vector on this segment
-+        /* search for the best motion vector on this segment */
-         for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
-         {
-             int this_rd;
-@@ -1191,7 +1189,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
-                 BLOCK *c;
-                 BLOCKD *e;
- 
--                // Is the best so far sufficiently good that we cant justify doing and new motion search.
-+                /* Is the best so far sufficiently good that we cant justify
-+                 * doing a new motion search.
-+                 */
-                 if (best_label_rd < label_mv_thresh)
-                     break;
- 
-@@ -1206,7 +1206,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
-                         step_param = bsi->sv_istep[i];
-                     }
- 
--                    // use previous block's result as next block's MV predictor.
-+                    /* use previous block's result as next block's MV
-+                     * predictor.
-+                     */
-                     if (segmentation == BLOCK_4X4 && i>0)
-                     {
-                         bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int;
-@@ -1225,7 +1227,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
-                     mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3;
-                     mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3;
- 
--                    // find first label
-+                    /* find first label */
-                     n = vp8_mbsplit_offset[segmentation][i];
- 
-                     c = &x->block[n];
-@@ -1265,7 +1267,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
- 
-                     sseshift = segmentation_to_sseshift[segmentation];
- 
--                    // Should we do a full search (best quality only)
-+                    /* Should we do a full search (best quality only) */
-                     if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
-                     {
-                         /* Check if mvp_full is within the range. */
-@@ -1282,7 +1284,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
-                         }
-                         else
-                         {
--                            // The full search result is actually worse so re-instate the previous best vector
-+                            /* The full search result is actually worse so
-+                             * re-instate the previous best vector
-+                             */
-                             e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
-                         }
-                     }
-@@ -1302,7 +1306,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
-             rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
-                                bsi->ref_mv, x->mvcost);
- 
--            // Trap vectors that reach beyond the UMV borders
-+            /* Trap vectors that reach beyond the UMV borders */
-             if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
-                 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
-             {
-@@ -1354,7 +1358,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
-         bsi->segment_rd = this_segment_rd;
-         bsi->segment_num = segmentation;
- 
--        // store everything needed to come back to this!!
-+        /* store everything needed to come back to this!! */
-         for (i = 0; i < 16; i++)
-         {
-             bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
-@@ -1516,7 +1520,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
-     return bsi.segment_rd;
- }
- 
--//The improved MV prediction
-+/* The improved MV prediction */
- void vp8_mv_pred
- (
-     VP8_COMP *cpi,
-@@ -1550,7 +1554,9 @@ void vp8_mv_pred
-         near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
-         near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
- 
--        // read in 3 nearby block's MVs from current frame as prediction candidates.
-+        /* read in 3 nearby block's MVs from current frame as prediction
-+         * candidates.
-+         */
-         if (above->mbmi.ref_frame != INTRA_FRAME)
-         {
-             near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
-@@ -1573,12 +1579,12 @@ void vp8_mv_pred
-         }
-         vcnt++;
- 
--        // read in 5 nearby block's MVs from last frame.
-+        /* read in 5 nearby block's MVs from last frame. */
-         if(cpi->common.last_frame_type != KEY_FRAME)
-         {
-             mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
- 
--            // current in last frame
-+            /* current in last frame */
-             if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
-             {
-                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
-@@ -1587,7 +1593,7 @@ void vp8_mv_pred
-             }
-             vcnt++;
- 
--            // above in last frame
-+            /* above in last frame */
-             if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
-             {
-                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
-@@ -1596,7 +1602,7 @@ void vp8_mv_pred
-             }
-             vcnt++;
- 
--            // left in last frame
-+            /* left in last frame */
-             if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
-             {
-                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
-@@ -1605,7 +1611,7 @@ void vp8_mv_pred
-             }
-             vcnt++;
- 
--            // right in last frame
-+            /* right in last frame */
-             if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
-             {
-                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
-@@ -1614,7 +1620,7 @@ void vp8_mv_pred
-             }
-             vcnt++;
- 
--            // below in last frame
-+            /* below in last frame */
-             if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
-             {
-                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
-@@ -1655,7 +1661,9 @@ void vp8_mv_pred
-             mv.as_mv.col = mvy[vcnt/2];
- 
-             find = 1;
--            //sr is set to 0 to allow calling function to decide the search range.
-+            /* sr is set to 0 to allow calling function to decide the search
-+             * range.
-+             */
-             *sr = 0;
-         }
-     }
-@@ -1667,33 +1675,36 @@ void vp8_mv_pred
- 
- void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
- {
--
--    int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
-+    /* near_sad indexes:
-+     *   0-cf above, 1-cf left, 2-cf aboveleft,
-+     *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
-+     */
-+    int near_sad[8] = {0};
-     BLOCK *b = &x->block[0];
-     unsigned char *src_y_ptr = *(b->base_src);
- 
--    //calculate sad for current frame 3 nearby MBs.
-+    /* calculate sad for current frame 3 nearby MBs. */
-     if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
-     {
-         near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
-     }else if(xd->mb_to_top_edge==0)
--    {   //only has left MB for sad calculation.
-+    {   /* only has left MB for sad calculation. */
-         near_sad[0] = near_sad[2] = INT_MAX;
--        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
-+        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
-     }else if(xd->mb_to_left_edge ==0)
--    {   //only has left MB for sad calculation.
-+    {   /* only has left MB for sad calculation. */
-         near_sad[1] = near_sad[2] = INT_MAX;
--        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
-+        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
-     }else
-     {
--        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
--        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
--        near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff);
-+        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
-+        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
-+        near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX);
-     }
- 
-     if(cpi->common.last_frame_type != KEY_FRAME)
-     {
--        //calculate sad for last frame 5 nearby MBs.
-+        /* calculate sad for last frame 5 nearby MBs. */
-         unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
-         int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
- 
-@@ -1703,14 +1714,14 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
-         if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
- 
-         if(near_sad[4] != INT_MAX)
--            near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
-+            near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX);
-         if(near_sad[5] != INT_MAX)
--            near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
--        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
-+            near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX);
-+        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX);
-         if(near_sad[6] != INT_MAX)
--            near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff);
-+            near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX);
-         if(near_sad[7] != INT_MAX)
--            near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff);
-+            near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX);
-     }
- 
-     if(cpi->common.last_frame_type != KEY_FRAME)
-@@ -1732,18 +1743,18 @@ static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
-         {
-             if (x->partition_info->bmi[i].mode == NEW4X4)
-             {
--                cpi->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
-+                x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
-                                           - best_ref_mv->as_mv.row) >> 1)]++;
--                cpi->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
-+                x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
-                                           - best_ref_mv->as_mv.col) >> 1)]++;
-             }
-         }
-     }
-     else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
-     {
--        cpi->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
-+        x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
-                                           - best_ref_mv->as_mv.row) >> 1)]++;
--        cpi->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
-+        x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
-                                           - best_ref_mv->as_mv.col) >> 1)]++;
-     }
- }
-@@ -1766,7 +1777,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
-     {
-         unsigned int sse;
-         unsigned int var;
--        int threshold = (xd->block[0].dequant[1]
-+        unsigned int threshold = (xd->block[0].dequant[1]
-                     * xd->block[0].dequant[1] >>4);
- 
-         if(threshold < x->encode_breakout)
-@@ -1784,8 +1795,8 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
-             if ((sse - var < q2dc * q2dc >>4) ||
-                 (sse /2 > var && sse-var < 64))
-             {
--                // Check u and v to make sure skip is ok
--                int sse2=  VP8_UVSSE(x);
-+                /* Check u and v to make sure skip is ok */
-+                unsigned int sse2 = VP8_UVSSE(x);
-                 if (sse2 * 2 < threshold)
-                 {
-                     x->skip = 1;
-@@ -1805,17 +1816,15 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
-     }
- 
- 
--    //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts);   // Experimental debug code
--
--    // Add in the Mv/mode cost
-+    /* Add in the Mv/mode cost */
-     rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
- 
--    // Y cost and distortion
-+    /* Y cost and distortion */
-     macro_block_yrd(x, &rd->rate_y, &distortion);
-     rd->rate2 += rd->rate_y;
-     rd->distortion2 += distortion;
- 
--    // UV cost and distortion
-+    /* UV cost and distortion */
-     rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
-                      cpi->common.full_pixel);
-     rd->rate2 += rd->rate_uv;
-@@ -1832,9 +1841,11 @@ static int calculate_final_rd_costs(int this_rd,
-                                     VP8_COMP *cpi, MACROBLOCK *x)
- {
-     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
--    // Where skip is allowable add in the default per mb cost for the no skip case.
--    // where we then decide to skip we have to delete this and replace it with the
--    // cost of signallying a skip
-+
-+    /* Where skip is allowable add in the default per mb cost for the no
-+     * skip case. where we then decide to skip we have to delete this and
-+     * replace it with the cost of signalling a skip
-+     */
-     if (cpi->common.mb_no_coeff_skip)
-     {
-         *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
-@@ -1849,7 +1860,10 @@ static int calculate_final_rd_costs(int this_rd,
- 
-     if (!disable_skip)
-     {
--        // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate
-+        /* Test for the condition where skip block will be activated
-+         * because there are no non zero coefficients and make any
-+         * necessary adjustment for rate
-+         */
-         if (cpi->common.mb_no_coeff_skip)
-         {
-             int i;
-@@ -1874,10 +1888,10 @@ static int calculate_final_rd_costs(int this_rd,
-             if (tteob == 0)
-             {
-                 rd->rate2 -= (rd->rate_y + rd->rate_uv);
--                //for best_yrd calculation
-+                /* for best_yrd calculation */
-                 rd->rate_uv = 0;
- 
--                // Back out no skip flag costing and add in skip flag costing
-+                /* Back out no skip flag costing and add in skip flag costing */
-                 if (cpi->prob_skip_false)
-                 {
-                     int prob_skip_cost;
-@@ -1889,7 +1903,7 @@ static int calculate_final_rd_costs(int this_rd,
-                 }
-             }
-         }
--        // Calculate the final RD estimate for this mode
-+        /* Calculate the final RD estimate for this mode */
-         this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
-         if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame
-                                  == INTRA_FRAME)
-@@ -1953,7 +1967,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-     int_mv mvp;
-     int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
-     int saddone=0;
--    int sr=0;    //search range got from mv_pred(). It uses step_param levels. (0-7)
-+    /* search range got from mv_pred(). It uses step_param levels. (0-7) */
-+    int sr=0;
- 
-     unsigned char *plane[4][3];
-     int ref_frame_map[4];
-@@ -1962,6 +1977,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-     int intra_rd_penalty =  10* vp8_dc_quant(cpi->common.base_qindex,
-                                              cpi->common.y1dc_delta_q);
- 
-+#if CONFIG_TEMPORAL_DENOISING
-+    unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX,
-+            best_rd_sse = INT_MAX;
-+#endif
-+
-     mode_mv = mode_mv_sb[sign_bias];
-     best_ref_mv.as_int = 0;
-     best_mode.rd = INT_MAX;
-@@ -1994,7 +2014,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-     get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
- 
-     *returnintra = INT_MAX;
--    cpi->mbs_tested_so_far++;          // Count of the number of MBs tested so far this frame
-+    /* Count of the number of MBs tested so far this frame */
-+    x->mbs_tested_so_far++;
- 
-     x->skip = 0;
- 
-@@ -2005,14 +2026,16 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-         int other_cost = 0;
-         int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
- 
--        // Test best rd so far against threshold for trying this mode.
--        if (best_mode.rd <= cpi->rd_threshes[mode_index])
-+        /* Test best rd so far against threshold for trying this mode. */
-+        if (best_mode.rd <= x->rd_threshes[mode_index])
-             continue;
- 
-         if (this_ref_frame < 0)
-             continue;
- 
--        // These variables hold are rolling total cost and distortion for this mode
-+        /* These variables hold are rolling total cost and distortion for
-+         * this mode
-+         */
-         rd.rate2 = 0;
-         rd.distortion2 = 0;
- 
-@@ -2021,9 +2044,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-         x->e_mbd.mode_info_context->mbmi.mode = this_mode;
-         x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
- 
--        // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
--        // unless ARNR filtering is enabled in which case we want
--        // an unfiltered alternative
-+        /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
-+         * unless ARNR filtering is enabled in which case we want
-+         * an unfiltered alternative
-+         */
-         if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
-         {
-             if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
-@@ -2045,45 +2069,56 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             }
-         }
- 
--        // Check to see if the testing frequency for this mode is at its max
--        // If so then prevent it from being tested and increase the threshold for its testing
--        if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
-+        /* Check to see if the testing frequency for this mode is at its
-+         * max If so then prevent it from being tested and increase the
-+         * threshold for its testing
-+         */
-+        if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
-         {
--            if (cpi->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index])
-+            if (x->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index])
-             {
--                // Increase the threshold for coding this mode to make it less likely to be chosen
--                cpi->rd_thresh_mult[mode_index] += 4;
-+                /* Increase the threshold for coding this mode to make it
-+                 * less likely to be chosen
-+                 */
-+                x->rd_thresh_mult[mode_index] += 4;
- 
--                if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
--                    cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-+                if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
-+                    x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
- 
--                cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
-+                x->rd_threshes[mode_index] =
-+                    (cpi->rd_baseline_thresh[mode_index] >> 7) *
-+                    x->rd_thresh_mult[mode_index];
- 
-                 continue;
-             }
-         }
- 
--        // We have now reached the point where we are going to test the current mode so increment the counter for the number of times it has been tested
--        cpi->mode_test_hit_counts[mode_index] ++;
-+        /* We have now reached the point where we are going to test the
-+         * current mode so increment the counter for the number of times
-+         * it has been tested
-+         */
-+        x->mode_test_hit_counts[mode_index] ++;
- 
--        // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
--        if (cpi->zbin_mode_boost_enabled)
-+        /* Experimental code. Special case for gf and arf zeromv modes.
-+         * Increase zbin size to supress noise
-+         */
-+        if (x->zbin_mode_boost_enabled)
-         {
-             if ( this_ref_frame == INTRA_FRAME )
--                cpi->zbin_mode_boost = 0;
-+                x->zbin_mode_boost = 0;
-             else
-             {
-                 if (vp8_mode_order[mode_index] == ZEROMV)
-                 {
-                     if (this_ref_frame != LAST_FRAME)
--                        cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
-+                        x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
-                     else
--                        cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
-+                        x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
-                 }
-                 else if (vp8_mode_order[mode_index] == SPLITMV)
--                    cpi->zbin_mode_boost = 0;
-+                    x->zbin_mode_boost = 0;
-                 else
--                    cpi->zbin_mode_boost = MV_ZBIN_BOOST;
-+                    x->zbin_mode_boost = MV_ZBIN_BOOST;
-             }
- 
-             vp8_update_zbin_extra(cpi, x);
-@@ -2091,7 +2126,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
- 
-         if(!uv_intra_done && this_ref_frame == INTRA_FRAME)
-         {
--            rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate,
-+            rd_pick_intra_mbuv_mode(x, &uv_intra_rate,
-                                     &uv_intra_rate_tokenonly,
-                                     &uv_intra_distortion);
-             uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
-@@ -2113,9 +2148,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-         {
-             int tmp_rd;
- 
--            // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
-+            /* Note the rate value returned here includes the cost of
-+             * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
-+             */
-             int distortion;
--            tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rd.rate_y, &distortion, best_mode.yrd);
-+            tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd);
-             rd.rate2 += rate;
-             rd.distortion2 += distortion;
- 
-@@ -2140,8 +2177,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             int this_rd_thresh;
-             int distortion;
- 
--            this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? cpi->rd_threshes[THR_NEW1] : cpi->rd_threshes[THR_NEW3];
--            this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? cpi->rd_threshes[THR_NEW2] : this_rd_thresh;
-+            this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ?
-+                x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3];
-+            this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ?
-+                x->rd_threshes[THR_NEW2] : this_rd_thresh;
- 
-             tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
-                                                      best_mode.yrd, mdcounts,
-@@ -2150,10 +2189,12 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             rd.rate2 += rate;
-             rd.distortion2 += distortion;
- 
--            // If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV
-+            /* If even the 'Y' rd value of split is higher than best so far
-+             * then dont bother looking at UV
-+             */
-             if (tmp_rd < best_mode.yrd)
-             {
--                // Now work out UV cost and add it in
-+                /* Now work out UV cost and add it in */
-                 rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel);
-                 rd.rate2 += rd.rate_uv;
-                 rd.distortion2 += rd.distortion_uv;
-@@ -2225,7 +2266,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             mvp_full.as_mv.col = mvp.as_mv.col>>3;
-             mvp_full.as_mv.row = mvp.as_mv.row>>3;
- 
--            // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
-+            /* Get intersection of UMV window and valid MV window to
-+             * reduce # of checks in diamond search.
-+             */
-             if (x->mv_col_min < col_min )
-                 x->mv_col_min = col_min;
-             if (x->mv_col_max > col_max )
-@@ -2235,11 +2278,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             if (x->mv_row_max > row_max )
-                 x->mv_row_max = row_max;
- 
--            //adjust search range according to sr from mv prediction
-+            /* adjust search range according to sr from mv prediction */
-             if(sr > step_param)
-                 step_param = sr;
- 
--            // Initial step/diamond search
-+            /* Initial step/diamond search */
-             {
-                 bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv,
-                                         step_param, sadpb, &num00,
-@@ -2247,7 +2290,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-                                         x->mvcost, &best_ref_mv);
-                 mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
- 
--                // Further step/diamond searches as necessary
-+                /* Further step/diamond searches as necessary */
-                 n = 0;
-                 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
- 
-@@ -2293,11 +2336,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             {
-                 int search_range;
- 
--                //It seems not a good way to set search_range. Need further investigation.
--                //search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col));
-                 search_range = 8;
- 
--                //thissme = cpi->full_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
-                 thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv, sadpb,
-                                        search_range, &cpi->fn_ptr[BLOCK_16X16],
-                                        x->mvcost, &best_ref_mv);
-@@ -2330,24 +2370,31 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
- 
-             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
- 
--            // Add the new motion vector cost to our rolling cost variable
-+            /* Add the new motion vector cost to our rolling cost variable */
-             rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
-         }
- 
-         case NEARESTMV:
-         case NEARMV:
--            // Clip "next_nearest" so that it does not extend to far out of image
-+            /* Clip "next_nearest" so that it does not extend to far out
-+             * of image
-+             */
-             vp8_clamp_mv2(&mode_mv[this_mode], xd);
- 
--            // Do not bother proceeding if the vector (from newmv,nearest or near) is 0,0 as this should then be coded using the zeromv mode.
-+            /* Do not bother proceeding if the vector (from newmv, nearest
-+             * or near) is 0,0 as this should then be coded using the zeromv
-+             * mode.
-+             */
-             if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0))
-                 continue;
- 
-         case ZEROMV:
- 
--            // Trap vectors that reach beyond the UMV borders
--            // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point
--            // because of the lack of break statements in the previous two cases.
-+            /* Trap vectors that reach beyond the UMV borders
-+             * Note that ALL New MV, Nearest MV Near MV and Zero MV code
-+             * drops through to this point because of the lack of break
-+             * statements in the previous two cases.
-+             */
-             if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
-                 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
-                 continue;
-@@ -2365,35 +2412,52 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-                                            disable_skip, uv_intra_tteob,
-                                            intra_rd_penalty, cpi, x);
- 
--        // Keep record of best intra distortion
-+        /* Keep record of best intra distortion */
-         if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
-             (this_rd < best_mode.intra_rd) )
-         {
-           best_mode.intra_rd = this_rd;
-             *returnintra = rd.distortion2 ;
-         }
--
- #if CONFIG_TEMPORAL_DENOISING
-         if (cpi->oxcf.noise_sensitivity)
-         {
--          // Store the best NEWMV in x for later use in the denoiser.
--          // We are restricted to the LAST_FRAME since the denoiser only keeps
--          // one filter state.
--          if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
--              x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
--          {
--            x->e_mbd.best_sse_inter_mode = NEWMV;
--            x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
--            x->e_mbd.need_to_clamp_best_mvs =
--                x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
--          }
-+            unsigned int sse;
-+            vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse,
-+                                   mode_mv[this_mode]);
-+
-+            if (sse < best_rd_sse)
-+                best_rd_sse = sse;
-+
-+            /* Store for later use by denoiser. */
-+            if (this_mode == ZEROMV && sse < zero_mv_sse )
-+            {
-+                zero_mv_sse = sse;
-+                x->best_zeromv_reference_frame =
-+                        x->e_mbd.mode_info_context->mbmi.ref_frame;
-+            }
-+
-+            /* Store the best NEWMV in x for later use in the denoiser. */
-+            if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
-+                    sse < best_sse)
-+            {
-+                best_sse = sse;
-+                vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse,
-+                                       mode_mv[this_mode]);
-+                x->best_sse_inter_mode = NEWMV;
-+                x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
-+                x->need_to_clamp_best_mvs =
-+                    x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
-+                x->best_reference_frame =
-+                    x->e_mbd.mode_info_context->mbmi.ref_frame;
-+            }
-         }
- #endif
- 
--        // Did this mode help.. i.i is it the new best mode
-+        /* Did this mode help.. i.i is it the new best mode */
-         if (this_rd < best_mode.rd || x->skip)
-         {
--            // Note index of best mode so far
-+            /* Note index of best mode so far */
-             best_mode_index = mode_index;
-             *returnrate = rd.rate2;
-             *returndistortion = rd.distortion2;
-@@ -2406,95 +2470,103 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-             update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
- 
- 
--            // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
--            cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
--            cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
-+            /* Testing this mode gave rise to an improvement in best error
-+             * score. Lower threshold a bit for next time
-+             */
-+            x->rd_thresh_mult[mode_index] =
-+                (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
-+                    x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
-         }
- 
--        // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around.
-+        /* If the mode did not help improve the best error case then raise
-+         * the threshold for testing that mode next time around.
-+         */
-         else
-         {
--            cpi->rd_thresh_mult[mode_index] += 4;
--
--            if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
--                cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-+            x->rd_thresh_mult[mode_index] += 4;
- 
--            cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
-+            if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
-+                x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-         }
-+        x->rd_threshes[mode_index] =
-+            (cpi->rd_baseline_thresh[mode_index] >> 7) *
-+                x->rd_thresh_mult[mode_index];
- 
-         if (x->skip)
-             break;
- 
-     }
- 
--    // Reduce the activation RD thresholds for the best choice mode
-+    /* Reduce the activation RD thresholds for the best choice mode */
-     if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
-     {
--        int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
--
--        cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
--        cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
--
--        // If we chose a split mode then reset the new MV thresholds as well
--        /*if ( vp8_mode_order[best_mode_index] == SPLITMV )
--        {
--            best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWMV] >> 4);
--            cpi->rd_thresh_mult[THR_NEWMV] = (cpi->rd_thresh_mult[THR_NEWMV] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWMV]-best_adjustment: MIN_THRESHMULT;
--            cpi->rd_threshes[THR_NEWMV] = (cpi->rd_baseline_thresh[THR_NEWMV] >> 7) * cpi->rd_thresh_mult[THR_NEWMV];
--
--            best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWG] >> 4);
--            cpi->rd_thresh_mult[THR_NEWG] = (cpi->rd_thresh_mult[THR_NEWG] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWG]-best_adjustment: MIN_THRESHMULT;
--            cpi->rd_threshes[THR_NEWG] = (cpi->rd_baseline_thresh[THR_NEWG] >> 7) * cpi->rd_thresh_mult[THR_NEWG];
--
--            best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWA] >> 4);
--            cpi->rd_thresh_mult[THR_NEWA] = (cpi->rd_thresh_mult[THR_NEWA] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWA]-best_adjustment: MIN_THRESHMULT;
--            cpi->rd_threshes[THR_NEWA] = (cpi->rd_baseline_thresh[THR_NEWA] >> 7) * cpi->rd_thresh_mult[THR_NEWA];
--        }*/
--
-+        int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
-+
-+        x->rd_thresh_mult[best_mode_index] =
-+            (x->rd_thresh_mult[best_mode_index] >=
-+                (MIN_THRESHMULT + best_adjustment)) ?
-+                    x->rd_thresh_mult[best_mode_index] - best_adjustment :
-+                    MIN_THRESHMULT;
-+        x->rd_threshes[best_mode_index] =
-+            (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
-+                x->rd_thresh_mult[best_mode_index];
-     }
- 
--    // Note how often each mode chosen as best
-+    /* Note how often each mode chosen as best */
-     cpi->mode_chosen_counts[best_mode_index] ++;
- 
- #if CONFIG_TEMPORAL_DENOISING
-     if (cpi->oxcf.noise_sensitivity)
-     {
--      if (x->e_mbd.best_sse_inter_mode == DC_PRED) {
--        // No best MV found.
--        x->e_mbd.best_sse_inter_mode = best_mode.mbmode.mode;
--        x->e_mbd.best_sse_mv = best_mode.mbmode.mv;
--        x->e_mbd.need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
--      }
--
--      // TODO(holmer): No SSEs are calculated in rdopt.c. What else can be used?
--      vp8_denoiser_denoise_mb(&cpi->denoiser, x, 0, 0,
--                              recon_yoffset, recon_uvoffset);
--      // Reevalute ZEROMV if the current mode is INTRA.
--      if (best_mode.mbmode.ref_frame == INTRA_FRAME)
--      {
--        int this_rd = INT_MAX;
--        int disable_skip = 0;
--        int other_cost = 0;
--        vpx_memset(&rd, 0, sizeof(rd));
--        x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
--        rd.rate2 += x->ref_frame_cost[LAST_FRAME];
--        rd.rate2 += vp8_cost_mv_ref(ZEROMV, mdcounts);
--        x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
--        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
--        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
--        this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
--        this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
--                                           disable_skip, uv_intra_tteob,
--                                           intra_rd_penalty, cpi, x);
--        if (this_rd < best_mode.rd || x->skip)
-+        if (x->best_sse_inter_mode == DC_PRED)
-         {
--            // Note index of best mode so far
--            best_mode_index = mode_index;
--            *returnrate = rd.rate2;
--            *returndistortion = rd.distortion2;
--            update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
-+            /* No best MV found. */
-+            x->best_sse_inter_mode = best_mode.mbmode.mode;
-+            x->best_sse_mv = best_mode.mbmode.mv;
-+            x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
-+            x->best_reference_frame = best_mode.mbmode.ref_frame;
-+            best_sse = best_rd_sse;
-+        }
-+        vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
-+                                recon_yoffset, recon_uvoffset);
-+
-+
-+        /* Reevaluate ZEROMV after denoising. */
-+        if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
-+            x->best_zeromv_reference_frame != INTRA_FRAME)
-+        {
-+            int this_rd = INT_MAX;
-+            int disable_skip = 0;
-+            int other_cost = 0;
-+            int this_ref_frame = x->best_zeromv_reference_frame;
-+            rd.rate2 = x->ref_frame_cost[this_ref_frame] +
-+                    vp8_cost_mv_ref(ZEROMV, mdcounts);
-+            rd.distortion2 = 0;
-+
-+            /* set up the proper prediction buffers for the frame */
-+            x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
-+            x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
-+            x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
-+            x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
-+
-+            x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
-+            x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
-+            x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
-+
-+            this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
-+            this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
-+                                               disable_skip, uv_intra_tteob,
-+                                               intra_rd_penalty, cpi, x);
-+            if (this_rd < best_mode.rd || x->skip)
-+            {
-+                /* Note index of best mode so far */
-+                best_mode_index = mode_index;
-+                *returnrate = rd.rate2;
-+                *returndistortion = rd.distortion2;
-+                update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
-+            }
-         }
--      }
-+
-     }
- #endif
- 
-@@ -2512,7 +2584,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-     }
- 
- 
--    // macroblock modes
-+    /* macroblock modes */
-     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
- 
-     if (best_mode.mbmode.mode == B_PRED)
-@@ -2539,7 +2611,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
-     rd_update_mvcount(cpi, x, &best_ref_mv);
- }
- 
--void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
-+void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_)
- {
-     int error4x4, error16x16;
-     int rate4x4, rate16x16 = 0, rateuv;
-@@ -2551,15 +2623,13 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
- 
-     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
- 
--    rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
-+    rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
-     rate = rateuv;
- 
--    error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
--                                            &rate16x16, &rate16x16_tokenonly,
-+    error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
-                                             &dist16x16);
- 
--    error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
--                                         &rate4x4, &rate4x4_tokenonly,
-+    error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
-                                          &dist4x4, error16x16);
- 
-     if (error4x4 < error16x16)
-diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
-index db939f9..1e11fa7 100644
---- a/vp8/encoder/rdopt.h
-+++ b/vp8/encoder/rdopt.h
-@@ -65,9 +65,9 @@ static void insertsortsad(int arr[],int idx[], int len)
-     }
- }
- 
--extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);
-+extern void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue);
- extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
--extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
-+extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate);
- 
- 
- static void get_plane_pointers(const YV12_BUFFER_CONFIG *fb,
-@@ -86,15 +86,15 @@ static void get_predictor_pointers(const VP8_COMP *cpi,
-                                        unsigned int    recon_yoffset,
-                                        unsigned int    recon_uvoffset)
- {
--    if (cpi->ref_frame_flags & VP8_LAST_FLAG)
-+    if (cpi->ref_frame_flags & VP8_LAST_FRAME)
-         get_plane_pointers(&cpi->common.yv12_fb[cpi->common.lst_fb_idx],
-                            plane[LAST_FRAME], recon_yoffset, recon_uvoffset);
- 
--    if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
-+    if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
-         get_plane_pointers(&cpi->common.yv12_fb[cpi->common.gld_fb_idx],
-                            plane[GOLDEN_FRAME], recon_yoffset, recon_uvoffset);
- 
--    if (cpi->ref_frame_flags & VP8_ALT_FLAG)
-+    if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
-         get_plane_pointers(&cpi->common.yv12_fb[cpi->common.alt_fb_idx],
-                            plane[ALTREF_FRAME], recon_yoffset, recon_uvoffset);
- }
-@@ -106,11 +106,11 @@ static void get_reference_search_order(const VP8_COMP *cpi,
-     int i=0;
- 
-     ref_frame_map[i++] = INTRA_FRAME;
--    if (cpi->ref_frame_flags & VP8_LAST_FLAG)
-+    if (cpi->ref_frame_flags & VP8_LAST_FRAME)
-         ref_frame_map[i++] = LAST_FRAME;
--    if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
-+    if (cpi->ref_frame_flags & VP8_GOLD_FRAME)
-         ref_frame_map[i++] = GOLDEN_FRAME;
--    if (cpi->ref_frame_flags & VP8_ALT_FLAG)
-+    if (cpi->ref_frame_flags & VP8_ALTR_FRAME)
-         ref_frame_map[i++] = ALTREF_FRAME;
-     for(; i<4; i++)
-         ref_frame_map[i] = -1;
-diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c
-index fc0967d..37972e2 100644
---- a/vp8/encoder/segmentation.c
-+++ b/vp8/encoder/segmentation.c
-@@ -22,22 +22,24 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
- 
-     if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
-     {
--        // Reset Gf useage monitors
-+        /* Reset Gf useage monitors */
-         vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-         cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
-     }
-     else
-     {
--        // for each macroblock row in image
-+        /* for each macroblock row in image */
-         for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
-         {
--            // for each macroblock col in image
-+            /* for each macroblock col in image */
-             for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-             {
- 
--                // If using golden then set GF active flag if not already set.
--                // If using last frame 0,0 mode then leave flag as it is
--                // else if using non 0,0 motion or intra modes then clear flag if it is currently set
-+                /* If using golden then set GF active flag if not already set.
-+                 * If using last frame 0,0 mode then leave flag as it is
-+                 * else if using non 0,0 motion or intra modes then clear
-+                 * flag if it is currently set
-+                 */
-                 if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME))
-                 {
-                     if (*(x->gf_active_ptr) == 0)
-@@ -52,12 +54,12 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
-                     cpi->gf_active_count--;
-                 }
- 
--                x->gf_active_ptr++;          // Step onto next entry
--                this_mb_mode_info++;           // skip to next mb
-+                x->gf_active_ptr++;          /* Step onto next entry */
-+                this_mb_mode_info++;         /* skip to next mb */
- 
-             }
- 
--            // this is to account for the border
-+            /* this is to account for the border */
-             this_mb_mode_info++;
-         }
-     }
-diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
-index 6c61b36..b83ae89 100644
---- a/vp8/encoder/temporal_filter.c
-+++ b/vp8/encoder/temporal_filter.c
-@@ -30,8 +30,8 @@
- #include <math.h>
- #include <limits.h>
- 
--#define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
--#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
-+#define ALT_REF_MC_ENABLED 1    /* dis/enable MC in AltRef filtering */
-+#define ALT_REF_SUBPEL_ENABLED 1 /* dis/enable subpel in MC AltRef filtering */
- 
- #if VP8_TEMPORAL_ALT_REF
- 
-@@ -50,7 +50,7 @@ static void vp8_temporal_filter_predictors_mb_c
-     int offset;
-     unsigned char *yptr, *uptr, *vptr;
- 
--    // Y
-+    /* Y */
-     yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
- 
-     if ((mv_row | mv_col) & 7)
-@@ -63,7 +63,7 @@ static void vp8_temporal_filter_predictors_mb_c
-         vp8_copy_mem16x16(yptr, stride, &pred[0], 16);
-     }
- 
--    // U & V
-+    /* U & V */
-     mv_row >>= 1;
-     mv_col >>= 1;
-     stride = (stride + 1) >> 1;
-@@ -109,9 +109,10 @@ void vp8_temporal_filter_apply_c
-             int pixel_value = *frame2++;
- 
-             modifier   = src_byte - pixel_value;
--            // This is an integer approximation of:
--            // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
--            // modifier =  (int)roundf(coeff > 16 ? 0 : 16-coeff);
-+            /* This is an integer approximation of:
-+             * float coeff = (3.0 * modifer * modifier) / pow(2, strength);
-+             * modifier =  (int)roundf(coeff > 16 ? 0 : 16-coeff);
-+             */
-             modifier  *= modifier;
-             modifier  *= 3;
-             modifier  += 1 << (strength - 1);
-@@ -134,7 +135,6 @@ void vp8_temporal_filter_apply_c
- }
- 
- #if ALT_REF_MC_ENABLED
--static int dummy_cost[2*mv_max+1];
- 
- static int vp8_temporal_filter_find_matching_mb_c
- (
-@@ -155,10 +155,7 @@ static int vp8_temporal_filter_find_matching_mb_c
-     int_mv best_ref_mv1;
-     int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
- 
--    int *mvcost[2]    = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
--    int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
--
--    // Save input state
-+    /* Save input state */
-     unsigned char **base_src = b->base_src;
-     int src = b->src;
-     int src_stride = b->src_stride;
-@@ -170,7 +167,7 @@ static int vp8_temporal_filter_find_matching_mb_c
-     best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >>3;
-     best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >>3;
- 
--    // Setup frame pointers
-+    /* Setup frame pointers */
-     b->base_src = &arf_frame->y_buffer;
-     b->src_stride = arf_frame->y_stride;
-     b->src = mb_offset;
-@@ -179,7 +176,7 @@ static int vp8_temporal_filter_find_matching_mb_c
-     x->e_mbd.pre.y_stride = frame_ptr->y_stride;
-     d->offset = mb_offset;
- 
--    // Further step/diamond searches as necessary
-+    /* Further step/diamond searches as necessary */
-     if (cpi->Speed < 8)
-     {
-         step_param = cpi->sf.first_step + (cpi->Speed > 5);
-@@ -189,29 +186,29 @@ static int vp8_temporal_filter_find_matching_mb_c
-         step_param = cpi->sf.first_step + 2;
-     }
- 
--    /*cpi->sf.search_method == HEX*/
--    // TODO Check that the 16x16 vf & sdf are selected here
--    bestsme = vp8_hex_search(x, b, d,
--        &best_ref_mv1_full, &d->bmi.mv,
--        step_param,
--        sadpb,
--        &cpi->fn_ptr[BLOCK_16X16],
--        mvsadcost, mvcost, &best_ref_mv1);
-+    /* TODO Check that the 16x16 vf & sdf are selected here */
-+    /* Ignore mv costing by sending NULL cost arrays */
-+    bestsme = vp8_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.mv,
-+                             step_param, sadpb,
-+                             &cpi->fn_ptr[BLOCK_16X16],
-+                             NULL, NULL, &best_ref_mv1);
- 
- #if ALT_REF_SUBPEL_ENABLED
--    // Try sub-pixel MC?
--    //if (bestsme > error_thresh && bestsme < INT_MAX)
-+    /* Try sub-pixel MC? */
-     {
-         int distortion;
-         unsigned int sse;
-+        /* Ignore mv costing by sending NULL cost array */
-         bestsme = cpi->find_fractional_mv_step(x, b, d,
--                    &d->bmi.mv, &best_ref_mv1,
--                    x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
--                    mvcost, &distortion, &sse);
-+                                               &d->bmi.mv,
-+                                               &best_ref_mv1,
-+                                               x->errorperbit,
-+                                               &cpi->fn_ptr[BLOCK_16X16],
-+                                               NULL, &distortion, &sse);
-     }
- #endif
- 
--    // Save input state
-+    /* Save input state */
-     b->base_src = base_src;
-     b->src = src;
-     b->src_stride = src_stride;
-@@ -246,7 +243,7 @@ static void vp8_temporal_filter_iterate_c
-     unsigned char *dst1, *dst2;
-     DECLARE_ALIGNED_ARRAY(16, unsigned char,  predictor, 16*16 + 8*8 + 8*8);
- 
--    // Save input state
-+    /* Save input state */
-     unsigned char *y_buffer = mbd->pre.y_buffer;
-     unsigned char *u_buffer = mbd->pre.u_buffer;
-     unsigned char *v_buffer = mbd->pre.v_buffer;
-@@ -254,16 +251,17 @@ static void vp8_temporal_filter_iterate_c
-     for (mb_row = 0; mb_row < mb_rows; mb_row++)
-     {
- #if ALT_REF_MC_ENABLED
--        // Source frames are extended to 16 pixels.  This is different than
--        //  L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
--        // A 6 tap filter is used for motion search.  This requires 2 pixels
--        //  before and 3 pixels after.  So the largest Y mv on a border would
--        //  then be 16 - 3.  The UV blocks are half the size of the Y and
--        //  therefore only extended by 8.  The largest mv that a UV block
--        //  can support is 8 - 3.  A UV mv is half of a Y mv.
--        //  (16 - 3) >> 1 == 6 which is greater than 8 - 3.
--        // To keep the mv in play for both Y and UV planes the max that it
--        //  can be on a border is therefore 16 - 5.
-+        /* Source frames are extended to 16 pixels.  This is different than
-+         *  L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
-+         * A 6 tap filter is used for motion search.  This requires 2 pixels
-+         *  before and 3 pixels after.  So the largest Y mv on a border would
-+         *  then be 16 - 3.  The UV blocks are half the size of the Y and
-+         *  therefore only extended by 8.  The largest mv that a UV block
-+         *  can support is 8 - 3.  A UV mv is half of a Y mv.
-+         *  (16 - 3) >> 1 == 6 which is greater than 8 - 3.
-+         * To keep the mv in play for both Y and UV planes the max that it
-+         *  can be on a border is therefore 16 - 5.
-+         */
-         cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5));
-         cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
-                                 + (16 - 5);
-@@ -285,36 +283,41 @@ static void vp8_temporal_filter_iterate_c
- 
-             for (frame = 0; frame < frame_count; frame++)
-             {
--                int err = 0;
--
-                 if (cpi->frames[frame] == NULL)
-                     continue;
- 
-                 mbd->block[0].bmi.mv.as_mv.row = 0;
-                 mbd->block[0].bmi.mv.as_mv.col = 0;
- 
-+                if (frame == alt_ref_index)
-+                {
-+                    filter_weight = 2;
-+                }
-+                else
-+                {
-+                    int err = 0;
- #if ALT_REF_MC_ENABLED
- #define THRESH_LOW   10000
- #define THRESH_HIGH  20000
--
--                // Find best match in this frame by MC
--                err = vp8_temporal_filter_find_matching_mb_c
--                      (cpi,
--                       cpi->frames[alt_ref_index],
--                       cpi->frames[frame],
--                       mb_y_offset,
--                       THRESH_LOW);
--
-+                    /* Find best match in this frame by MC */
-+                    err = vp8_temporal_filter_find_matching_mb_c
-+                              (cpi,
-+                               cpi->frames[alt_ref_index],
-+                               cpi->frames[frame],
-+                               mb_y_offset,
-+                               THRESH_LOW);
- #endif
--                // Assign higher weight to matching MB if it's error
--                // score is lower. If not applying MC default behavior
--                // is to weight all MBs equal.
--                filter_weight = err<THRESH_LOW
--                                  ? 2 : err<THRESH_HIGH ? 1 : 0;
-+                    /* Assign higher weight to matching MB if it's error
-+                     * score is lower. If not applying MC default behavior
-+                     * is to weight all MBs equal.
-+                     */
-+                    filter_weight = err<THRESH_LOW
-+                                       ? 2 : err<THRESH_HIGH ? 1 : 0;
-+                }
- 
-                 if (filter_weight != 0)
-                 {
--                    // Construct the predictors
-+                    /* Construct the predictors */
-                     vp8_temporal_filter_predictors_mb_c
-                         (mbd,
-                          cpi->frames[frame]->y_buffer + mb_y_offset,
-@@ -325,7 +328,7 @@ static void vp8_temporal_filter_iterate_c
-                          mbd->block[0].bmi.mv.as_mv.col,
-                          predictor);
- 
--                    // Apply the filter (YUV)
-+                    /* Apply the filter (YUV) */
-                     vp8_temporal_filter_apply
-                         (f->y_buffer + mb_y_offset,
-                          f->y_stride,
-@@ -358,7 +361,7 @@ static void vp8_temporal_filter_iterate_c
-                 }
-             }
- 
--            // Normalize filter output to produce AltRef frame
-+            /* Normalize filter output to produce AltRef frame */
-             dst1 = cpi->alt_ref_buffer.y_buffer;
-             stride = cpi->alt_ref_buffer.y_stride;
-             byte = mb_y_offset;
-@@ -372,7 +375,7 @@ static void vp8_temporal_filter_iterate_c
- 
-                     dst1[byte] = (unsigned char)pval;
- 
--                    // move to next pixel
-+                    /* move to next pixel */
-                     byte++;
-                 }
- 
-@@ -389,19 +392,19 @@ static void vp8_temporal_filter_iterate_c
-                 {
-                     int m=k+64;
- 
--                    // U
-+                    /* U */
-                     unsigned int pval = accumulator[k] + (count[k] >> 1);
-                     pval *= cpi->fixed_divide[count[k]];
-                     pval >>= 19;
-                     dst1[byte] = (unsigned char)pval;
- 
--                    // V
-+                    /* V */
-                     pval = accumulator[m] + (count[m] >> 1);
-                     pval *= cpi->fixed_divide[count[m]];
-                     pval >>= 19;
-                     dst2[byte] = (unsigned char)pval;
- 
--                    // move to next pixel
-+                    /* move to next pixel */
-                     byte++;
-                 }
- 
-@@ -416,7 +419,7 @@ static void vp8_temporal_filter_iterate_c
-         mb_uv_offset += 8*(f->uv_stride-mb_cols);
-     }
- 
--    // Restore input state
-+    /* Restore input state */
-     mbd->pre.y_buffer = y_buffer;
-     mbd->pre.u_buffer = u_buffer;
-     mbd->pre.v_buffer = v_buffer;
-@@ -450,8 +453,7 @@ void vp8_temporal_filter_prepare_c
-     switch (blur_type)
-     {
-     case 1:
--        /////////////////////////////////////////
--        // Backward Blur
-+        /* Backward Blur */
- 
-         frames_to_blur_backward = num_frames_backward;
- 
-@@ -462,8 +464,7 @@ void vp8_temporal_filter_prepare_c
-         break;
- 
-     case 2:
--        /////////////////////////////////////////
--        // Forward Blur
-+        /* Forward Blur */
- 
-         frames_to_blur_forward = num_frames_forward;
- 
-@@ -475,8 +476,7 @@ void vp8_temporal_filter_prepare_c
- 
-     case 3:
-     default:
--        /////////////////////////////////////////
--        // Center Blur
-+        /* Center Blur */
-         frames_to_blur_forward = num_frames_forward;
-         frames_to_blur_backward = num_frames_backward;
- 
-@@ -486,7 +486,7 @@ void vp8_temporal_filter_prepare_c
-         if (frames_to_blur_backward > frames_to_blur_forward)
-             frames_to_blur_backward = frames_to_blur_forward;
- 
--        // When max_frames is even we have 1 more frame backward than forward
-+        /* When max_frames is even we have 1 more frame backward than forward */
-         if (frames_to_blur_forward > (max_frames - 1) / 2)
-             frames_to_blur_forward = ((max_frames - 1) / 2);
- 
-@@ -499,21 +499,7 @@ void vp8_temporal_filter_prepare_c
- 
-     start_frame = distance + frames_to_blur_forward;
- 
--#ifdef DEBUGFWG
--    // DEBUG FWG
--    printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
--           , max_frames
--           , num_frames_backward
--           , num_frames_forward
--           , frames_to_blur
--           , frames_to_blur_backward
--           , frames_to_blur_forward
--           , cpi->source_encode_index
--           , cpi->last_alt_ref_sei
--           , start_frame);
--#endif
--
--    // Setup frame pointers, NULL indicates frame not included in filter
-+    /* Setup frame pointers, NULL indicates frame not included in filter */
-     vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
-     for (frame = 0; frame < frames_to_blur; frame++)
-     {
-diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
-index ef41fa8..3b5268b 100644
---- a/vp8/encoder/tokenize.c
-+++ b/vp8/encoder/tokenize.c
-@@ -23,7 +23,7 @@
- #ifdef ENTROPY_STATS
- _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
- #endif
--void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
-+void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ;
- void vp8_fix_contexts(MACROBLOCKD *x);
- 
- #include "dct_value_tokens.h"
-@@ -102,11 +102,12 @@ static void fill_value_tokens()
- 
- static void tokenize2nd_order_b
- (
--    MACROBLOCKD *x,
-+    MACROBLOCK *x,
-     TOKENEXTRA **tp,
-     VP8_COMP *cpi
- )
- {
-+    MACROBLOCKD *xd = &x->e_mbd;
-     int pt;             /* near block/prev token context index */
-     int c;              /* start at DC */
-     TOKENEXTRA *t = *tp;/* store tokens starting here */
-@@ -117,11 +118,11 @@ static void tokenize2nd_order_b
-     int band, rc, v, token;
-     int eob;
- 
--    b = x->block + 24;
-+    b = xd->block + 24;
-     qcoeff_ptr = b->qcoeff;
--    a = (ENTROPY_CONTEXT *)x->above_context + 8;
--    l = (ENTROPY_CONTEXT *)x->left_context + 8;
--    eob = x->eobs[24];
-+    a = (ENTROPY_CONTEXT *)xd->above_context + 8;
-+    l = (ENTROPY_CONTEXT *)xd->left_context + 8;
-+    eob = xd->eobs[24];
-     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- 
-     if(!eob)
-@@ -131,7 +132,7 @@ static void tokenize2nd_order_b
-         t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
-         t->skip_eob_node = 0;
- 
--        ++cpi->coef_counts       [1] [0] [pt] [DCT_EOB_TOKEN];
-+        ++x->coef_counts       [1] [0] [pt] [DCT_EOB_TOKEN];
-         t++;
-         *tp = t;
-         *a = *l = 0;
-@@ -145,7 +146,7 @@ static void tokenize2nd_order_b
- 
-     t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
-     t->skip_eob_node = 0;
--    ++cpi->coef_counts       [1] [0] [pt] [token];
-+    ++x->coef_counts       [1] [0] [pt] [token];
-     pt = vp8_prev_token_class[token];
-     t++;
-     c = 1;
-@@ -164,7 +165,7 @@ static void tokenize2nd_order_b
- 
-         t->skip_eob_node = ((pt == 0));
- 
--        ++cpi->coef_counts       [1] [band] [pt] [token];
-+        ++x->coef_counts       [1] [band] [pt] [token];
- 
-         pt = vp8_prev_token_class[token];
-         t++;
-@@ -177,7 +178,7 @@ static void tokenize2nd_order_b
- 
-         t->skip_eob_node = 0;
- 
--        ++cpi->coef_counts       [1] [band] [pt] [DCT_EOB_TOKEN];
-+        ++x->coef_counts       [1] [band] [pt] [DCT_EOB_TOKEN];
- 
-         t++;
-     }
-@@ -189,12 +190,13 @@ static void tokenize2nd_order_b
- 
- static void tokenize1st_order_b
- (
--    MACROBLOCKD *x,
-+    MACROBLOCK *x,
-     TOKENEXTRA **tp,
-     int type,           /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
-     VP8_COMP *cpi
- )
- {
-+    MACROBLOCKD *xd = &x->e_mbd;
-     unsigned int block;
-     const BLOCKD *b;
-     int pt;             /* near block/prev token context index */
-@@ -207,15 +209,15 @@ static void tokenize1st_order_b
-     int band, rc, v;
-     int tmp1, tmp2;
- 
--    b = x->block;
-+    b = xd->block;
-     /* Luma */
-     for (block = 0; block < 16; block++, b++)
-     {
-         tmp1 = vp8_block2above[block];
-         tmp2 = vp8_block2left[block];
-         qcoeff_ptr = b->qcoeff;
--        a = (ENTROPY_CONTEXT *)x->above_context + tmp1;
--        l = (ENTROPY_CONTEXT *)x->left_context + tmp2;
-+        a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;
-+        l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;
- 
-         VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- 
-@@ -228,7 +230,7 @@ static void tokenize1st_order_b
-             t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt];
-             t->skip_eob_node = 0;
- 
--            ++cpi->coef_counts       [type] [c] [pt] [DCT_EOB_TOKEN];
-+            ++x->coef_counts       [type] [c] [pt] [DCT_EOB_TOKEN];
-             t++;
-             *tp = t;
-             *a = *l = 0;
-@@ -243,7 +245,7 @@ static void tokenize1st_order_b
- 
-         t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt];
-         t->skip_eob_node = 0;
--        ++cpi->coef_counts       [type] [c] [pt] [token];
-+        ++x->coef_counts       [type] [c] [pt] [token];
-         pt = vp8_prev_token_class[token];
-         t++;
-         c++;
-@@ -261,7 +263,7 @@ static void tokenize1st_order_b
-             t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
- 
-             t->skip_eob_node = (pt == 0);
--            ++cpi->coef_counts       [type] [band] [pt] [token];
-+            ++x->coef_counts       [type] [band] [pt] [token];
- 
-             pt = vp8_prev_token_class[token];
-             t++;
-@@ -273,7 +275,7 @@ static void tokenize1st_order_b
-             t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
- 
-             t->skip_eob_node = 0;
--            ++cpi->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];
-+            ++x->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];
- 
-             t++;
-         }
-@@ -287,8 +289,8 @@ static void tokenize1st_order_b
-         tmp1 = vp8_block2above[block];
-         tmp2 = vp8_block2left[block];
-         qcoeff_ptr = b->qcoeff;
--        a = (ENTROPY_CONTEXT *)x->above_context + tmp1;
--        l = (ENTROPY_CONTEXT *)x->left_context + tmp2;
-+        a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;
-+        l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;
- 
-         VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- 
-@@ -299,7 +301,7 @@ static void tokenize1st_order_b
-             t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
-             t->skip_eob_node = 0;
- 
--            ++cpi->coef_counts       [2] [0] [pt] [DCT_EOB_TOKEN];
-+            ++x->coef_counts       [2] [0] [pt] [DCT_EOB_TOKEN];
-             t++;
-             *tp = t;
-             *a = *l = 0;
-@@ -314,7 +316,7 @@ static void tokenize1st_order_b
- 
-         t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
-         t->skip_eob_node = 0;
--        ++cpi->coef_counts       [2] [0] [pt] [token];
-+        ++x->coef_counts       [2] [0] [pt] [token];
-         pt = vp8_prev_token_class[token];
-         t++;
-         c = 1;
-@@ -333,7 +335,7 @@ static void tokenize1st_order_b
- 
-             t->skip_eob_node = (pt == 0);
- 
--            ++cpi->coef_counts       [2] [band] [pt] [token];
-+            ++x->coef_counts       [2] [band] [pt] [token];
- 
-             pt = vp8_prev_token_class[token];
-             t++;
-@@ -346,7 +348,7 @@ static void tokenize1st_order_b
- 
-             t->skip_eob_node = 0;
- 
--            ++cpi->coef_counts       [2] [band] [pt] [DCT_EOB_TOKEN];
-+            ++x->coef_counts       [2] [band] [pt] [DCT_EOB_TOKEN];
- 
-             t++;
-         }
-@@ -374,16 +376,18 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block)
- }
- 
- 
--void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
-+void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
- {
-+    MACROBLOCKD *xd = &x->e_mbd;
-     int plane_type;
-     int has_y2_block;
- 
--    has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
--                    && x->mode_info_context->mbmi.mode != SPLITMV);
-+    has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED
-+                    && xd->mode_info_context->mbmi.mode != SPLITMV);
- 
--    x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block);
--    if (x->mode_info_context->mbmi.mb_skip_coeff)
-+    xd->mode_info_context->mbmi.mb_skip_coeff =
-+        mb_is_skippable(xd, has_y2_block);
-+    if (xd->mode_info_context->mbmi.mb_skip_coeff)
-     {
-         if (!cpi->common.mb_no_coeff_skip)
-         {
-@@ -391,8 +395,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
-         }
-         else
-         {
--            vp8_fix_contexts(x);
--            cpi->skip_true_count++;
-+            vp8_fix_contexts(xd);
-+            x->skip_true_count++;
-         }
- 
-         return;
-@@ -488,7 +492,8 @@ static void stuff2nd_order_b
-     TOKENEXTRA **tp,
-     ENTROPY_CONTEXT *a,
-     ENTROPY_CONTEXT *l,
--    VP8_COMP *cpi
-+    VP8_COMP *cpi,
-+    MACROBLOCK *x
- )
- {
-     int pt; /* near block/prev token context index */
-@@ -498,13 +503,12 @@ static void stuff2nd_order_b
-     t->Token = DCT_EOB_TOKEN;
-     t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
-     t->skip_eob_node = 0;
--    ++cpi->coef_counts       [1] [0] [pt] [DCT_EOB_TOKEN];
-+    ++x->coef_counts       [1] [0] [pt] [DCT_EOB_TOKEN];
-     ++t;
- 
-     *tp = t;
-     pt = 0;
-     *a = *l = pt;
--
- }
- 
- static void stuff1st_order_b
-@@ -513,7 +517,8 @@ static void stuff1st_order_b
-     ENTROPY_CONTEXT *a,
-     ENTROPY_CONTEXT *l,
-     int type,
--    VP8_COMP *cpi
-+    VP8_COMP *cpi,
-+    MACROBLOCK *x
- )
- {
-     int pt; /* near block/prev token context index */
-@@ -524,20 +529,21 @@ static void stuff1st_order_b
-     t->Token = DCT_EOB_TOKEN;
-     t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
-     t->skip_eob_node = 0;
--    ++cpi->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];
-+    ++x->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];
-     ++t;
-     *tp = t;
-     pt = 0; /* 0 <-> all coeff data is zero */
-     *a = *l = pt;
--
- }
-+
- static
- void stuff1st_order_buv
- (
-     TOKENEXTRA **tp,
-     ENTROPY_CONTEXT *a,
-     ENTROPY_CONTEXT *l,
--    VP8_COMP *cpi
-+    VP8_COMP *cpi,
-+    MACROBLOCK *x
- )
- {
-     int pt; /* near block/prev token context index */
-@@ -547,38 +553,38 @@ void stuff1st_order_buv
-     t->Token = DCT_EOB_TOKEN;
-     t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
-     t->skip_eob_node = 0;
--    ++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN];
-+    ++x->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN];
-     ++t;
-     *tp = t;
-     pt = 0; /* 0 <-> all coeff data is zero */
-     *a = *l = pt;
--
- }
- 
--void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
-+void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
- {
--    ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
--    ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
-+    MACROBLOCKD *xd = &x->e_mbd;
-+    ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context;
-+    ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context;
-     int plane_type;
-     int b;
-     plane_type = 3;
--    if((x->mode_info_context->mbmi.mode != B_PRED
--                        && x->mode_info_context->mbmi.mode != SPLITMV))
-+    if((xd->mode_info_context->mbmi.mode != B_PRED
-+                        && xd->mode_info_context->mbmi.mode != SPLITMV))
-     {
-         stuff2nd_order_b(t,
--                     A + vp8_block2above[24], L + vp8_block2left[24], cpi);
-+                     A + vp8_block2above[24], L + vp8_block2left[24], cpi, x);
-         plane_type = 0;
-     }
- 
-     for (b = 0; b < 16; b++)
-         stuff1st_order_b(t,
-                          A + vp8_block2above[b],
--                         L + vp8_block2left[b], plane_type, cpi);
-+                         L + vp8_block2left[b], plane_type, cpi, x);
- 
-     for (b = 16; b < 24; b++)
-         stuff1st_order_buv(t,
-                            A + vp8_block2above[b],
--                           L + vp8_block2left[b], cpi);
-+                           L + vp8_block2left[b], cpi, x);
- 
- }
- void vp8_fix_contexts(MACROBLOCKD *x)
-diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
-index f07b030..6f188cb 100644
---- a/vp8/encoder/x86/dct_mmx.asm
-+++ b/vp8/encoder/x86/dct_mmx.asm
-@@ -12,7 +12,7 @@
- %include "vpx_ports/x86_abi_support.asm"
- 
- ;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
--global sym(vp8_short_fdct4x4_mmx)
-+global sym(vp8_short_fdct4x4_mmx) PRIVATE
- sym(vp8_short_fdct4x4_mmx):
-     push        rbp
-     mov         rbp,        rsp
-diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
-index 3d52a5d..d880ce0 100644
---- a/vp8/encoder/x86/dct_sse2.asm
-+++ b/vp8/encoder/x86/dct_sse2.asm
-@@ -61,7 +61,7 @@
- %endmacro
- 
- ;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
--global sym(vp8_short_fdct4x4_sse2)
-+global sym(vp8_short_fdct4x4_sse2) PRIVATE
- sym(vp8_short_fdct4x4_sse2):
- 
-     STACK_FRAME_CREATE
-@@ -166,7 +166,7 @@ sym(vp8_short_fdct4x4_sse2):
-     STACK_FRAME_DESTROY
- 
- ;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
--global sym(vp8_short_fdct8x4_sse2)
-+global sym(vp8_short_fdct8x4_sse2) PRIVATE
- sym(vp8_short_fdct8x4_sse2):
- 
-     STACK_FRAME_CREATE
-diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c
-new file mode 100644
-index 0000000..c1ac6c1
---- /dev/null
-+++ b/vp8/encoder/x86/denoising_sse2.c
-@@ -0,0 +1,119 @@
-+/*
-+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+ *
-+ *  Use of this source code is governed by a BSD-style license
-+ *  that can be found in the LICENSE file in the root of the source
-+ *  tree. An additional intellectual property rights grant can be found
-+ *  in the file PATENTS.  All contributing project authors may
-+ *  be found in the AUTHORS file in the root of the source tree.
-+ */
-+
-+#include "vp8/encoder/denoising.h"
-+#include "vp8/common/reconinter.h"
-+#include "vpx/vpx_integer.h"
-+#include "vpx_mem/vpx_mem.h"
-+#include "vpx_rtcd.h"
-+
-+#include <emmintrin.h>
-+
-+union sum_union {
-+    __m128i v;
-+    signed char e[16];
-+};
-+
-+int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
-+                             YV12_BUFFER_CONFIG *running_avg,
-+                             MACROBLOCK *signal, unsigned int motion_magnitude,
-+                             int y_offset, int uv_offset)
-+{
-+    unsigned char *sig = signal->thismb;
-+    int sig_stride = 16;
-+    unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
-+    int mc_avg_y_stride = mc_running_avg->y_stride;
-+    unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
-+    int avg_y_stride = running_avg->y_stride;
-+    int r;
-+    __m128i acc_diff = _mm_setzero_si128();
-+    const __m128i k_0 = _mm_setzero_si128();
-+    const __m128i k_4 = _mm_set1_epi8(4);
-+    const __m128i k_8 = _mm_set1_epi8(8);
-+    const __m128i k_16 = _mm_set1_epi8(16);
-+    /* Modify each level's adjustment according to motion_magnitude. */
-+    const __m128i l3 = _mm_set1_epi8(
-+                      (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 : 6);
-+    /* Difference between level 3 and level 2 is 2. */
-+    const __m128i l32 = _mm_set1_epi8(2);
-+    /* Difference between level 2 and level 1 is 1. */
-+    const __m128i l21 = _mm_set1_epi8(1);
-+
-+    for (r = 0; r < 16; ++r)
-+    {
-+        /* Calculate differences */
-+        const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
-+        const __m128i v_mc_running_avg_y = _mm_loadu_si128(
-+                                           (__m128i *)(&mc_running_avg_y[0]));
-+        __m128i v_running_avg_y;
-+        const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
-+        const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
-+        /* Obtain the sign. FF if diff is negative. */
-+        const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
-+        /* Clamp absolute difference to 16 to be used to get mask. Doing this
-+         * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */
-+        const __m128i clamped_absdiff = _mm_min_epu8(
-+                                        _mm_or_si128(pdiff, ndiff), k_16);
-+        /* Get masks for l2 l1 and l0 adjustments */
-+        const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff);
-+        const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff);
-+        const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff);
-+        /* Get adjustments for l2, l1, and l0 */
-+        __m128i adj2 = _mm_and_si128(mask2, l32);
-+        const __m128i adj1 = _mm_and_si128(mask1, l21);
-+        const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
-+        __m128i adj,  padj, nadj;
-+
-+        /* Combine the adjustments and get absolute adjustments. */
-+        adj2 = _mm_add_epi8(adj2, adj1);
-+        adj = _mm_sub_epi8(l3, adj2);
-+        adj = _mm_andnot_si128(mask0, adj);
-+        adj = _mm_or_si128(adj, adj0);
-+
-+        /* Restore the sign and get positive and negative adjustments. */
-+        padj = _mm_andnot_si128(diff_sign, adj);
-+        nadj = _mm_and_si128(diff_sign, adj);
-+
-+        /* Calculate filtered value. */
-+        v_running_avg_y = _mm_adds_epu8(v_sig, padj);
-+        v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj);
-+        _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y);
-+
-+        /* Adjustments <=7, and each element in acc_diff can fit in signed
-+         * char.
-+         */
-+        acc_diff = _mm_adds_epi8(acc_diff, padj);
-+        acc_diff = _mm_subs_epi8(acc_diff, nadj);
-+
-+        /* Update pointers for next iteration. */
-+        sig += sig_stride;
-+        mc_running_avg_y += mc_avg_y_stride;
-+        running_avg_y += avg_y_stride;
-+    }
-+
-+    {
-+        /* Compute the sum of all pixel differences of this MB. */
-+        union sum_union s;
-+        int sum_diff = 0;
-+        s.v = acc_diff;
-+        sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] + s.e[4] + s.e[5]
-+                 + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11]
-+                 + s.e[12] + s.e[13] + s.e[14] + s.e[15];
-+
-+        if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
-+        {
-+            return COPY_BLOCK;
-+        }
-+    }
-+
-+    vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride,
-+                      signal->thismb, sig_stride);
-+    return FILTER_BLOCK;
-+}
-diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
-index 7ec7d60..fe26b18 100644
---- a/vp8/encoder/x86/encodeopt.asm
-+++ b/vp8/encoder/x86/encodeopt.asm
-@@ -12,7 +12,7 @@
- %include "vpx_ports/x86_abi_support.asm"
- 
- ;int vp8_block_error_xmm(short *coeff_ptr,  short *dcoef_ptr)
--global sym(vp8_block_error_xmm)
-+global sym(vp8_block_error_xmm) PRIVATE
- sym(vp8_block_error_xmm):
-     push        rbp
-     mov         rbp, rsp
-@@ -60,7 +60,7 @@ sym(vp8_block_error_xmm):
-     ret
- 
- ;int vp8_block_error_mmx(short *coeff_ptr,  short *dcoef_ptr)
--global sym(vp8_block_error_mmx)
-+global sym(vp8_block_error_mmx) PRIVATE
- sym(vp8_block_error_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -126,7 +126,7 @@ sym(vp8_block_error_mmx):
- 
- 
- ;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
--global sym(vp8_mbblock_error_mmx_impl)
-+global sym(vp8_mbblock_error_mmx_impl) PRIVATE
- sym(vp8_mbblock_error_mmx_impl):
-     push        rbp
-     mov         rbp, rsp
-@@ -203,7 +203,7 @@ sym(vp8_mbblock_error_mmx_impl):
- 
- 
- ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
--global sym(vp8_mbblock_error_xmm_impl)
-+global sym(vp8_mbblock_error_xmm_impl) PRIVATE
- sym(vp8_mbblock_error_xmm_impl):
-     push        rbp
-     mov         rbp, rsp
-@@ -273,7 +273,7 @@ sym(vp8_mbblock_error_xmm_impl):
- 
- 
- ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
--global sym(vp8_mbuverror_mmx_impl)
-+global sym(vp8_mbuverror_mmx_impl) PRIVATE
- sym(vp8_mbuverror_mmx_impl):
-     push        rbp
-     mov         rbp, rsp
-@@ -330,7 +330,7 @@ sym(vp8_mbuverror_mmx_impl):
- 
- 
- ;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
--global sym(vp8_mbuverror_xmm_impl)
-+global sym(vp8_mbuverror_xmm_impl) PRIVATE
- sym(vp8_mbuverror_xmm_impl):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
-index 71efd56..f498927 100644
---- a/vp8/encoder/x86/fwalsh_sse2.asm
-+++ b/vp8/encoder/x86/fwalsh_sse2.asm
-@@ -12,7 +12,7 @@
- %include "vpx_ports/x86_abi_support.asm"
- 
- ;void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch)
--global sym(vp8_short_walsh4x4_sse2)
-+global sym(vp8_short_walsh4x4_sse2) PRIVATE
- sym(vp8_short_walsh4x4_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
-index f29a54e..2864ce1 100644
---- a/vp8/encoder/x86/quantize_mmx.asm
-+++ b/vp8/encoder/x86/quantize_mmx.asm
-@@ -15,7 +15,7 @@
- ;                           short *qcoeff_ptr,short *dequant_ptr,
- ;                           short *scan_mask, short *round_ptr,
- ;                           short *quant_ptr, short *dqcoeff_ptr);
--global sym(vp8_fast_quantize_b_impl_mmx)
-+global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE
- sym(vp8_fast_quantize_b_impl_mmx):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
-index 7c249ff..724e54c 100644
---- a/vp8/encoder/x86/quantize_sse2.asm
-+++ b/vp8/encoder/x86/quantize_sse2.asm
-@@ -16,7 +16,7 @@
- ;  (BLOCK  *b,                     |  0
- ;   BLOCKD *d)                     |  1
- 
--global sym(vp8_regular_quantize_b_sse2)
-+global sym(vp8_regular_quantize_b_sse2) PRIVATE
- sym(vp8_regular_quantize_b_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -240,7 +240,7 @@ ZIGZAG_LOOP 15
- ;  (BLOCK  *b,                  |  0
- ;   BLOCKD *d)                  |  1
- 
--global sym(vp8_fast_quantize_b_sse2)
-+global sym(vp8_fast_quantize_b_sse2) PRIVATE
- sym(vp8_fast_quantize_b_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm
-index 70eac0c..f0e5d40 100644
---- a/vp8/encoder/x86/quantize_sse4.asm
-+++ b/vp8/encoder/x86/quantize_sse4.asm
-@@ -16,7 +16,7 @@
- ;  (BLOCK  *b,                     |  0
- ;   BLOCKD *d)                     |  1
- 
--global sym(vp8_regular_quantize_b_sse4)
-+global sym(vp8_regular_quantize_b_sse4) PRIVATE
- sym(vp8_regular_quantize_b_sse4):
- 
- %if ABI_IS_32BIT
-diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
-index e698e90..dd526f4 100644
---- a/vp8/encoder/x86/quantize_ssse3.asm
-+++ b/vp8/encoder/x86/quantize_ssse3.asm
-@@ -17,7 +17,7 @@
- ;   BLOCKD *d)                   |  1
- ;
- 
--global sym(vp8_fast_quantize_b_ssse3)
-+global sym(vp8_fast_quantize_b_ssse3) PRIVATE
- sym(vp8_fast_quantize_b_ssse3):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm
-index c6db3d1..5964a85 100644
---- a/vp8/encoder/x86/ssim_opt.asm
-+++ b/vp8/encoder/x86/ssim_opt.asm
-@@ -61,7 +61,7 @@
- ; or pavgb At this point this is just meant to be first pass for calculating
- ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
- ; in mode selection code.
--global sym(vp8_ssim_parms_16x16_sse2)
-+global sym(vp8_ssim_parms_16x16_sse2) PRIVATE
- sym(vp8_ssim_parms_16x16_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -151,7 +151,7 @@ sym(vp8_ssim_parms_16x16_sse2):
- ; or pavgb At this point this is just meant to be first pass for calculating
- ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
- ; in mode selection code.
--global sym(vp8_ssim_parms_8x8_sse2)
-+global sym(vp8_ssim_parms_8x8_sse2) PRIVATE
- sym(vp8_ssim_parms_8x8_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
-index 75e8aa3..794dd22 100644
---- a/vp8/encoder/x86/subtract_mmx.asm
-+++ b/vp8/encoder/x86/subtract_mmx.asm
-@@ -14,7 +14,7 @@
- ;void vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride,
- ;                            short *diff, unsigned char *Predictor,
- ;                            int pitch);
--global sym(vp8_subtract_b_mmx_impl)
-+global sym(vp8_subtract_b_mmx_impl) PRIVATE
- sym(vp8_subtract_b_mmx_impl):
-     push        rbp
-     mov         rbp, rsp
-@@ -75,7 +75,7 @@ sym(vp8_subtract_b_mmx_impl):
- 
- ;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
- ;unsigned char *pred, int pred_stride)
--global sym(vp8_subtract_mby_mmx)
-+global sym(vp8_subtract_mby_mmx) PRIVATE
- sym(vp8_subtract_mby_mmx):
-     push        rbp
-     mov         rbp, rsp
-@@ -150,7 +150,7 @@ sym(vp8_subtract_mby_mmx):
- ;                         int src_stride, unsigned char *upred,
- ;                         unsigned char *vpred, int pred_stride)
- 
--global sym(vp8_subtract_mbuv_mmx)
-+global sym(vp8_subtract_mbuv_mmx) PRIVATE
- sym(vp8_subtract_mbuv_mmx):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm
-index 008e9c7..a5d17f5 100644
---- a/vp8/encoder/x86/subtract_sse2.asm
-+++ b/vp8/encoder/x86/subtract_sse2.asm
-@@ -14,7 +14,7 @@
- ;void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
- ;                            short *diff, unsigned char *Predictor,
- ;                            int pitch);
--global sym(vp8_subtract_b_sse2_impl)
-+global sym(vp8_subtract_b_sse2_impl) PRIVATE
- sym(vp8_subtract_b_sse2_impl):
-     push        rbp
-     mov         rbp, rsp
-@@ -73,7 +73,7 @@ sym(vp8_subtract_b_sse2_impl):
- 
- ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
- ;unsigned char *pred, int pred_stride)
--global sym(vp8_subtract_mby_sse2)
-+global sym(vp8_subtract_mby_sse2) PRIVATE
- sym(vp8_subtract_mby_sse2):
-     push        rbp
-     mov         rbp, rsp
-@@ -146,7 +146,7 @@ sym(vp8_subtract_mby_sse2):
- ;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
- ;                         int src_stride, unsigned char *upred,
- ;                         unsigned char *vpred, int pred_stride)
--global sym(vp8_subtract_mbuv_sse2)
-+global sym(vp8_subtract_mbuv_sse2) PRIVATE
- sym(vp8_subtract_mbuv_sse2):
-     push        rbp
-     mov         rbp, rsp
-diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
-index b97c694..ce9d983 100644
---- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm
-+++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
-@@ -20,7 +20,7 @@
- ;   int             filter_weight,    |  5
- ;   unsigned int   *accumulator,      |  6
- ;   unsigned short *count)            |  7
--global sym(vp8_temporal_filter_apply_sse2)
-+global sym(vp8_temporal_filter_apply_sse2) PRIVATE
- sym(vp8_temporal_filter_apply_sse2):
- 
-     push        rbp
-diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
-index 3a7b146..a328f46 100644
---- a/vp8/vp8_common.mk
-+++ b/vp8/vp8_common.mk
-@@ -30,7 +30,6 @@ VP8_COMMON_SRCS-yes += common/findnearmv.c
- VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
- VP8_COMMON_SRCS-yes += common/idct_blk.c
- VP8_COMMON_SRCS-yes += common/idctllm.c
--VP8_COMMON_SRCS-yes += common/idctllm_test.cc
- VP8_COMMON_SRCS-yes += common/alloccommon.h
- VP8_COMMON_SRCS-yes += common/blockd.h
- VP8_COMMON_SRCS-yes += common/common.h
-@@ -85,7 +84,6 @@ VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
- VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm
- VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c
- VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm
--VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx_test.cc
- VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
- VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
- VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
-@@ -122,6 +120,14 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2.asm
- endif
- 
- # common (c)
-+VP8_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/idctllm_dspr2.c
-+VP8_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/filter_dspr2.c
-+VP8_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/loopfilter_filters_dspr2.c
-+VP8_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/reconinter_dspr2.c
-+VP8_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/idct_blk_dspr2.c
-+VP8_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/dequantize_dspr2.c
-+
-+# common (c)
- VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/filter_arm.c
- VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/loopfilter_arm.c
- VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/reconintra_arm.c
-diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
-index 5fb74c4..eeac3a8 100644
---- a/vp8/vp8_cx_iface.c
-+++ b/vp8/vp8_cx_iface.c
-@@ -9,6 +9,7 @@
-  */
- 
- 
-+#include "vpx_rtcd.h"
- #include "vpx/vpx_codec.h"
- #include "vpx/internal/vpx_codec_internal.h"
- #include "vpx_version.h"
-@@ -22,7 +23,6 @@
- struct vp8_extracfg
- {
-     struct vpx_codec_pkt_list *pkt_list;
--    vp8e_encoding_mode      encoding_mode;               /** best, good, realtime            */
-     int                         cpu_used;                    /** available cpu percentage in 1/16*/
-     unsigned int                enable_auto_alt_ref;           /** if encoder decides to uses alternate reference frame */
-     unsigned int                noise_sensitivity;
-@@ -51,10 +51,8 @@ static const struct extraconfig_map extracfg_map[] =
-         {
-             NULL,
- #if !(CONFIG_REALTIME_ONLY)
--            VP8_BEST_QUALITY_ENCODING,  /* Encoding Mode */
-             0,                          /* cpu_used      */
- #else
--            VP8_REAL_TIME_ENCODING,     /* Encoding Mode */
-             4,                          /* cpu_used      */
- #endif
-             0,                          /* enable_auto_alt_ref */
-@@ -88,7 +86,8 @@ struct vpx_codec_alg_priv
-     vpx_image_t             preview_img;
-     unsigned int            next_frame_flag;
-     vp8_postproc_cfg_t      preview_ppcfg;
--    vpx_codec_pkt_list_decl(64) pkt_list;              // changed to accomendate the maximum number of lagged frames allowed
-+    /* pkt_list size depends on the maximum number of lagged frames allowed. */
-+    vpx_codec_pkt_list_decl(64) pkt_list;
-     unsigned int                fixed_kf_cntr;
- };
- 
-@@ -146,25 +145,39 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
-     RANGE_CHECK_HI(cfg, rc_max_quantizer,   63);
-     RANGE_CHECK_HI(cfg, rc_min_quantizer,   cfg->rc_max_quantizer);
-     RANGE_CHECK_HI(cfg, g_threads,          64);
--#if !(CONFIG_REALTIME_ONLY)
--    RANGE_CHECK_HI(cfg, g_lag_in_frames,    25);
--#else
-+#if CONFIG_REALTIME_ONLY
-     RANGE_CHECK_HI(cfg, g_lag_in_frames,    0);
-+#elif CONFIG_MULTI_RES_ENCODING
-+    if (ctx->base.enc.total_encoders > 1)
-+        RANGE_CHECK_HI(cfg, g_lag_in_frames,    0);
-+#else
-+    RANGE_CHECK_HI(cfg, g_lag_in_frames,    25);
- #endif
-     RANGE_CHECK(cfg, rc_end_usage,          VPX_VBR, VPX_CQ);
-     RANGE_CHECK_HI(cfg, rc_undershoot_pct,  1000);
-     RANGE_CHECK_HI(cfg, rc_overshoot_pct,   1000);
-     RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
-     RANGE_CHECK(cfg, kf_mode,               VPX_KF_DISABLED, VPX_KF_AUTO);
--    //RANGE_CHECK_BOOL(cfg,                 g_delete_firstpassfile);
--    RANGE_CHECK_BOOL(cfg,                   rc_resize_allowed);
-+
-+/* TODO: add spatial re-sampling support and frame dropping in
-+ * multi-res-encoder.*/
-+#if CONFIG_MULTI_RES_ENCODING
-+    if (ctx->base.enc.total_encoders > 1)
-+        RANGE_CHECK_HI(cfg, rc_resize_allowed,     0);
-+#else
-+    RANGE_CHECK_BOOL(cfg, rc_resize_allowed);
-+#endif
-     RANGE_CHECK_HI(cfg, rc_dropframe_thresh,   100);
-     RANGE_CHECK_HI(cfg, rc_resize_up_thresh,   100);
-     RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100);
--#if !(CONFIG_REALTIME_ONLY)
--    RANGE_CHECK(cfg,        g_pass,         VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
--#else
-+
-+#if CONFIG_REALTIME_ONLY
-     RANGE_CHECK(cfg,        g_pass,         VPX_RC_ONE_PASS, VPX_RC_ONE_PASS);
-+#elif CONFIG_MULTI_RES_ENCODING
-+    if (ctx->base.enc.total_encoders > 1)
-+        RANGE_CHECK(cfg,    g_pass,         VPX_RC_ONE_PASS, VPX_RC_ONE_PASS);
-+#else
-+    RANGE_CHECK(cfg,        g_pass,         VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
- #endif
- 
-     /* VP8 does not support a lower bound on the keyframe interval in
-@@ -177,11 +190,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
- 
-     RANGE_CHECK_BOOL(vp8_cfg,               enable_auto_alt_ref);
-     RANGE_CHECK(vp8_cfg, cpu_used,           -16, 16);
--#if !(CONFIG_REALTIME_ONLY)
--    RANGE_CHECK(vp8_cfg, encoding_mode,      VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING);
--#else
--    RANGE_CHECK(vp8_cfg, encoding_mode,      VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING);
--#endif
- 
- #if CONFIG_REALTIME_ONLY && !CONFIG_TEMPORAL_DENOISING
-     RANGE_CHECK(vp8_cfg, noise_sensitivity,  0, 0);
-@@ -189,7 +197,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
-     RANGE_CHECK_HI(vp8_cfg, noise_sensitivity,  6);
- #endif
- 
--    RANGE_CHECK(vp8_cfg, token_partitions,   VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
-+    RANGE_CHECK(vp8_cfg, token_partitions,   VP8_ONE_TOKENPARTITION,
-+                VP8_EIGHT_TOKENPARTITION);
-     RANGE_CHECK_HI(vp8_cfg, Sharpness,       7);
-     RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
-     RANGE_CHECK_HI(vp8_cfg, arnr_strength,   6);
-@@ -203,7 +212,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
-     if (cfg->g_pass == VPX_RC_LAST_PASS)
-     {
-         size_t           packet_sz = sizeof(FIRSTPASS_STATS);
--        int              n_packets = cfg->rc_twopass_stats_in.sz / packet_sz;
-+        int              n_packets = (int)(cfg->rc_twopass_stats_in.sz /
-+                                          packet_sz);
-         FIRSTPASS_STATS *stats;
- 
-         if (!cfg->rc_twopass_stats_in.buf)
-@@ -227,7 +237,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
- 
-     if (cfg->ts_number_layers > 1)
-     {
--        int i;
-+        unsigned int i;
-         RANGE_CHECK_HI(cfg, ts_periodicity, 16);
- 
-         for (i=1; i<cfg->ts_number_layers; i++)
-@@ -299,7 +309,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
-         break;
-     }
- 
--    if (cfg.g_pass == VPX_RC_FIRST_PASS)
-+    if (cfg.g_pass == VPX_RC_FIRST_PASS || cfg.g_pass == VPX_RC_ONE_PASS)
-     {
-         oxcf->allow_lag     = 0;
-         oxcf->lag_in_frames = 0;
-@@ -355,7 +365,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
- 
-     oxcf->auto_key                 = cfg.kf_mode == VPX_KF_AUTO
-                                        && cfg.kf_min_dist != cfg.kf_max_dist;
--    //oxcf->kf_min_dist            = cfg.kf_min_dis;
-     oxcf->key_freq                 = cfg.kf_max_dist;
- 
-     oxcf->number_of_layers         = cfg.ts_number_layers;
-@@ -385,9 +394,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
-     }
- #endif
- 
--    //oxcf->delete_first_pass_file = cfg.g_delete_firstpassfile;
--    //strcpy(oxcf->first_pass_file, cfg.g_firstpass_file);
--
-     oxcf->cpu_used               = vp8_cfg.cpu_used;
-     oxcf->encode_breakout        = vp8_cfg.static_thresh;
-     oxcf->play_alternate         = vp8_cfg.enable_auto_alt_ref;
-@@ -447,7 +453,7 @@ static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t       *ctx,
-     vpx_codec_err_t res;
- 
-     if (((cfg->g_w != ctx->cfg.g_w) || (cfg->g_h != ctx->cfg.g_h))
--        && cfg->g_lag_in_frames > 1)
-+        && (cfg->g_lag_in_frames > 1 || cfg->g_pass != VPX_RC_ONE_PASS))
-         ERROR("Cannot change width or height after initialization");
- 
-     /* Prevent increasing lag_in_frames. This check is stricter than it needs
-@@ -542,19 +548,27 @@ static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg,
-     vpx_codec_err_t res = 0;
- 
- #if CONFIG_MULTI_RES_ENCODING
-+    LOWER_RES_FRAME_INFO *shared_mem_loc;
-     int mb_rows = ((cfg->g_w + 15) >>4);
-     int mb_cols = ((cfg->g_h + 15) >>4);
- 
--    *mem_loc = calloc(mb_rows*mb_cols, sizeof(LOWER_RES_INFO));
--    if(!(*mem_loc))
-+    shared_mem_loc = calloc(1, sizeof(LOWER_RES_FRAME_INFO));
-+    if(!shared_mem_loc)
-+    {
-+        res = VPX_CODEC_MEM_ERROR;
-+    }
-+
-+    shared_mem_loc->mb_info = calloc(mb_rows*mb_cols, sizeof(LOWER_RES_MB_INFO));
-+    if(!(shared_mem_loc->mb_info))
-     {
--        free(*mem_loc);
-         res = VPX_CODEC_MEM_ERROR;
-     }
-     else
-+    {
-+        *mem_loc = (void *)shared_mem_loc;
-         res = VPX_CODEC_OK;
-+    }
- #endif
--
-     return res;
- }
- 
-@@ -568,6 +582,8 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx,
- 
-     struct VP8_COMP *optr;
- 
-+    vpx_rtcd();
-+
-     if (!ctx->priv)
-     {
-         priv = calloc(1, sizeof(struct vpx_codec_alg_priv));
-@@ -616,15 +632,15 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx,
-             return VPX_CODEC_MEM_ERROR;
-         }
- 
-+        if(mr_cfg)
-+            ctx->priv->enc.total_encoders   = mr_cfg->mr_total_resolutions;
-+        else
-+            ctx->priv->enc.total_encoders   = 1;
-+
-         res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0);
- 
-         if (!res)
-         {
--            if(mr_cfg)
--                ctx->priv->enc.total_encoders   = mr_cfg->mr_total_resolutions;
--            else
--                ctx->priv->enc.total_encoders   = 1;
--
-             set_vp8e_config(&ctx->priv->alg_priv->oxcf,
-                              ctx->priv->alg_priv->cfg,
-                              ctx->priv->alg_priv->vp8_cfg,
-@@ -647,7 +663,11 @@ static vpx_codec_err_t vp8e_destroy(vpx_codec_alg_priv_t *ctx)
- #if CONFIG_MULTI_RES_ENCODING
-     /* Free multi-encoder shared memory */
-     if (ctx->oxcf.mr_total_resolutions > 0 && (ctx->oxcf.mr_encoder_id == ctx->oxcf.mr_total_resolutions-1))
-+    {
-+        LOWER_RES_FRAME_INFO *shared_mem_loc = (LOWER_RES_FRAME_INFO *)ctx->oxcf.mr_low_res_mode_info;
-+        free(shared_mem_loc->mb_info);
-         free(ctx->oxcf.mr_low_res_mode_info);
-+    }
- #endif
- 
-     free(ctx->cx_data);
-@@ -673,7 +693,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t   *img,
-     yv12->uv_stride = img->stride[VPX_PLANE_U];
- 
-     yv12->border  = (img->stride[VPX_PLANE_Y] - img->w) / 2;
--    yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); //REG_YUV = 0
-+    yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
-     return res;
- }
- 
-@@ -733,6 +753,9 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t  *ctx,
-     if (!ctx->cfg.rc_target_bitrate)
-         return res;
- 
-+    if (!ctx->cfg.rc_target_bitrate)
-+        return res;
-+
-     if (img)
-         res = validate_img(ctx, img);
- 
-@@ -756,13 +779,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t  *ctx,
-         int ref = 7;
- 
-         if (flags & VP8_EFLAG_NO_REF_LAST)
--            ref ^= VP8_LAST_FLAG;
-+            ref ^= VP8_LAST_FRAME;
- 
-         if (flags & VP8_EFLAG_NO_REF_GF)
--            ref ^= VP8_GOLD_FLAG;
-+            ref ^= VP8_GOLD_FRAME;
- 
-         if (flags & VP8_EFLAG_NO_REF_ARF)
--            ref ^= VP8_ALT_FLAG;
-+            ref ^= VP8_ALTR_FRAME;
- 
-         vp8_use_as_reference(ctx->cpi, ref);
-     }
-@@ -774,13 +797,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t  *ctx,
-         int upd = 7;
- 
-         if (flags & VP8_EFLAG_NO_UPD_LAST)
--            upd ^= VP8_LAST_FLAG;
-+            upd ^= VP8_LAST_FRAME;
- 
-         if (flags & VP8_EFLAG_NO_UPD_GF)
--            upd ^= VP8_GOLD_FLAG;
-+            upd ^= VP8_GOLD_FRAME;
- 
-         if (flags & VP8_EFLAG_NO_UPD_ARF)
--            upd ^= VP8_ALT_FLAG;
-+            upd ^= VP8_ALTR_FRAME;
- 
-         vp8_update_reference(ctx->cpi, upd);
-     }
-@@ -869,15 +892,16 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t  *ctx,
-                 VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
- 
-                 /* Add the frame packet to the list of returned packets. */
--                round = 1000000 * ctx->cfg.g_timebase.num / 2 - 1;
-+                round = (vpx_codec_pts_t)1000000
-+                        * ctx->cfg.g_timebase.num / 2 - 1;
-                 delta = (dst_end_time_stamp - dst_time_stamp);
-                 pkt.kind = VPX_CODEC_CX_FRAME_PKT;
-                 pkt.data.frame.pts =
-                     (dst_time_stamp * ctx->cfg.g_timebase.den + round)
-                     / ctx->cfg.g_timebase.num / 10000000;
--                pkt.data.frame.duration =
--                    (delta * ctx->cfg.g_timebase.den + round)
--                    / ctx->cfg.g_timebase.num / 10000000;
-+                pkt.data.frame.duration = (unsigned long)
-+                    ((delta * ctx->cfg.g_timebase.den + round)
-+                    / ctx->cfg.g_timebase.num / 10000000);
-                 pkt.data.frame.flags = lib_flags << 16;
- 
-                 if (lib_flags & FRAMEFLAGS_KEY)
-@@ -887,10 +911,11 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t  *ctx,
-                 {
-                     pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE;
- 
--                    // This timestamp should be as close as possible to the
--                    // prior PTS so that if a decoder uses pts to schedule when
--                    // to do this, we start right after last frame was decoded.
--                    // Invisible frames have no duration.
-+                    /* This timestamp should be as close as possible to the
-+                     * prior PTS so that if a decoder uses pts to schedule when
-+                     * to do this, we start right after last frame was decoded.
-+                     * Invisible frames have no duration.
-+                     */
-                     pkt.data.frame.pts = ((cpi->last_time_stamp_seen
-                         * ctx->cfg.g_timebase.den + round)
-                         / ctx->cfg.g_timebase.num / 10000000) + 1;
-@@ -942,8 +967,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t  *ctx,
-                     cx_data += size;
-                     cx_data_sz -= size;
-                 }
--
--                //printf("timestamp: %lld, duration: %d\n", pkt->data.frame.pts, pkt->data.frame.duration);
-             }
-         }
-     }
-diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
-index 37773db..c13d697 100644
---- a/vp8/vp8_dx_iface.c
-+++ b/vp8/vp8_dx_iface.c
-@@ -11,12 +11,19 @@
- 
- #include <stdlib.h>
- #include <string.h>
-+#include "vpx_rtcd.h"
- #include "vpx/vpx_decoder.h"
- #include "vpx/vp8dx.h"
- #include "vpx/internal/vpx_codec_internal.h"
- #include "vpx_version.h"
- #include "common/onyxd.h"
- #include "decoder/onyxd_int.h"
-+#include "common/alloccommon.h"
-+#include "vpx_mem/vpx_mem.h"
-+#if CONFIG_ERROR_CONCEALMENT
-+#include "decoder/error_concealment.h"
-+#endif
-+#include "decoder/decoderthreading.h"
- 
- #define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
- #define VP8_CAP_ERROR_CONCEALMENT (CONFIG_ERROR_CONCEALMENT ? \
-@@ -69,7 +76,7 @@ struct vpx_codec_alg_priv
- #endif
-     vpx_image_t             img;
-     int                     img_setup;
--    int                     img_avail;
-+    void                    *user_priv;
- };
- 
- static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t flags)
-@@ -187,6 +194,8 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
-     vpx_codec_err_t        res = VPX_CODEC_OK;
-     (void) data;
- 
-+    vpx_rtcd();
-+
-     /* This function only allocates space for the vpx_codec_alg_priv_t
-      * structure. More memory may be required at the time the stream
-      * information becomes known.
-@@ -341,16 +350,30 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
-                                   long                    deadline)
- {
-     vpx_codec_err_t res = VPX_CODEC_OK;
--
--    ctx->img_avail = 0;
-+    unsigned int resolution_change = 0;
-+    unsigned int w, h;
- 
-     /* Determine the stream parameters. Note that we rely on peek_si to
-      * validate that we have a buffer that does not wrap around the top
-      * of the heap.
-      */
--    if (!ctx->si.h)
--        res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si);
-+    w = ctx->si.w;
-+    h = ctx->si.h;
-+
-+    res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si);
-+
-+    if((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf)
-+    {
-+        /* the peek function returns an error for non keyframes, however for
-+         * this case, it is not an error */
-+        res = VPX_CODEC_OK;
-+    }
-+
-+    if(!ctx->decoder_init && !ctx->si.is_kf)
-+        res = VPX_CODEC_UNSUP_BITSTREAM;
- 
-+    if ((ctx->si.h != h) || (ctx->si.w != w))
-+        resolution_change = 1;
- 
-     /* Perform deferred allocations, if required */
-     if (!res && ctx->defer_alloc)
-@@ -426,6 +449,122 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
- 
-     if (!res && ctx->pbi)
-     {
-+        if(resolution_change)
-+        {
-+            VP8D_COMP *pbi = ctx->pbi;
-+            VP8_COMMON *const pc = & pbi->common;
-+            MACROBLOCKD *const xd  = & pbi->mb;
-+#if CONFIG_MULTITHREAD
-+            int i;
-+#endif
-+            pc->Width = ctx->si.w;
-+            pc->Height = ctx->si.h;
-+            {
-+                int prev_mb_rows = pc->mb_rows;
-+
-+                if (setjmp(pbi->common.error.jmp))
-+                {
-+                    pbi->common.error.setjmp = 0;
-+                    /* same return value as used in vp8dx_receive_compressed_data */
-+                    return -1;
-+                }
-+
-+                pbi->common.error.setjmp = 1;
-+
-+                if (pc->Width <= 0)
-+                {
-+                    pc->Width = w;
-+                    vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
-+                                       "Invalid frame width");
-+                }
-+
-+                if (pc->Height <= 0)
-+                {
-+                    pc->Height = h;
-+                    vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
-+                                       "Invalid frame height");
-+                }
-+
-+                if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height))
-+                    vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
-+                                       "Failed to allocate frame buffers");
-+
-+                xd->pre = pc->yv12_fb[pc->lst_fb_idx];
-+                xd->dst = pc->yv12_fb[pc->new_fb_idx];
-+
-+#if CONFIG_MULTITHREAD
-+                for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
-+                {
-+                    pbi->mb_row_di[i].mbd.dst = pc->yv12_fb[pc->new_fb_idx];
-+                    vp8_build_block_doffsets(&pbi->mb_row_di[i].mbd);
-+                }
-+#endif
-+                vp8_build_block_doffsets(&pbi->mb);
-+
-+                /* allocate memory for last frame MODE_INFO array */
-+#if CONFIG_ERROR_CONCEALMENT
-+
-+                if (pbi->ec_enabled)
-+                {
-+                    /* old prev_mip was released by vp8_de_alloc_frame_buffers()
-+                     * called in vp8_alloc_frame_buffers() */
-+                    pc->prev_mip = vpx_calloc(
-+                                       (pc->mb_cols + 1) * (pc->mb_rows + 1),
-+                                       sizeof(MODE_INFO));
-+
-+                    if (!pc->prev_mip)
-+                    {
-+                        vp8_de_alloc_frame_buffers(pc);
-+                        vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
-+                                           "Failed to allocate"
-+                                           "last frame MODE_INFO array");
-+                    }
-+
-+                    pc->prev_mi = pc->prev_mip + pc->mode_info_stride + 1;
-+
-+                    if (vp8_alloc_overlap_lists(pbi))
-+                        vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
-+                                           "Failed to allocate overlap lists "
-+                                           "for error concealment");
-+                }
-+
-+#endif
-+
-+#if CONFIG_MULTITHREAD
-+                if (pbi->b_multithreaded_rd)
-+                    vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows);
-+#else
-+                (void)prev_mb_rows;
-+#endif
-+            }
-+
-+            pbi->common.error.setjmp = 0;
-+
-+            /* required to get past the first get_free_fb() call */
-+            ctx->pbi->common.fb_idx_ref_cnt[0] = 0;
-+        }
-+
-+        ctx->user_priv = user_priv;
-+        if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline))
-+        {
-+            VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi;
-+            res = update_error_state(ctx, &pbi->common.error);
-+        }
-+    }
-+
-+    return res;
-+}
-+
-+static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t  *ctx,
-+                                  vpx_codec_iter_t      *iter)
-+{
-+    vpx_image_t *img = NULL;
-+
-+    /* iter acts as a flip flop, so an image is only returned on the first
-+     * call to get_frame.
-+     */
-+    if (!(*iter))
-+    {
-         YV12_BUFFER_CONFIG sd;
-         int64_t time_stamp = 0, time_end_stamp = 0;
-         vp8_ppflags_t flags = {0};
-@@ -451,34 +590,10 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
- #endif
-         }
- 
--        if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline))
--        {
--            VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi;
--            res = update_error_state(ctx, &pbi->common.error);
--        }
--
--        if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags))
-+        if (0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags))
-         {
--            yuvconfig2image(&ctx->img, &sd, user_priv);
--            ctx->img_avail = 1;
--        }
--    }
-+            yuvconfig2image(&ctx->img, &sd, ctx->user_priv);
- 
--    return res;
--}
--
--static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t  *ctx,
--                                  vpx_codec_iter_t      *iter)
--{
--    vpx_image_t *img = NULL;
--
--    if (ctx->img_avail)
--    {
--        /* iter acts as a flip flop, so an image is only returned on the first
--         * call to get_frame.
--         */
--        if (!(*iter))
--        {
-             img = &ctx->img;
-             *iter = img;
-         }
-diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
-index 019edbd..0ae2f10 100644
---- a/vp8/vp8cx.mk
-+++ b/vp8/vp8cx.mk
-@@ -22,16 +22,9 @@ ifeq ($(ARCH_ARM),yes)
-   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx_arm.mk
- endif
- 
--VP8_CX_SRCS-yes += vp8_cx_iface.c
-+VP8_CX_SRCS-yes += vp8cx.mk
- 
--# encoder
--#INCLUDES += algo/vpx_common/vpx_mem/include
--#INCLUDES += common
--#INCLUDES += common
--#INCLUDES += common
--#INCLUDES += algo/vpx_ref/cpu_id/include
--#INCLUDES += common
--#INCLUDES += encoder
-+VP8_CX_SRCS-yes += vp8_cx_iface.c
- 
- VP8_CX_SRCS-yes += encoder/asm_enc_offsets.c
- VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h
-@@ -99,6 +92,14 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
- VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
- VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
- VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
-+
-+ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
-+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
-+ifeq ($(HAVE_SSE2),yes)
-+vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2
-+endif
-+endif
-+
- VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
- VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
- VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
-diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
-index b16615d..b030ee5 100644
---- a/vp8/vp8cx_arm.mk
-+++ b/vp8/vp8cx_arm.mk
-@@ -9,7 +9,7 @@
- ##
- 
- 
--#VP8_CX_SRCS list is modified according to different platforms.
-+VP8_CX_SRCS-$(ARCH_ARM)  += vp8cx_arm.mk
- 
- #File list for arm
- # encoder
-diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
-index 2cfd280..dd39190 100644
---- a/vp8/vp8dx.mk
-+++ b/vp8/vp8dx.mk
-@@ -18,6 +18,8 @@ VP8_DX_SRCS-no  += $(VP8_COMMON_SRCS-no)
- VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)
- VP8_DX_SRCS_REMOVE-no  += $(VP8_COMMON_SRCS_REMOVE-no)
- 
-+VP8_DX_SRCS-yes += vp8dx.mk
-+
- VP8_DX_SRCS-yes += vp8_dx_iface.c
- 
- # common
-diff --git a/vp8_multi_resolution_encoder.c b/vp8_multi_resolution_encoder.c
-index 78f50c2..eae36a4 100644
---- a/vp8_multi_resolution_encoder.c
-+++ b/vp8_multi_resolution_encoder.c
-@@ -164,7 +164,7 @@ static void write_ivf_file_header(FILE *outfile,
-     mem_put_le32(header+24, frame_cnt);           /* length */
-     mem_put_le32(header+28, 0);                   /* unused */
- 
--    if(fwrite(header, 1, 32, outfile));
-+    (void) fwrite(header, 1, 32, outfile);
- }
- 
- static void write_ivf_frame_header(FILE *outfile,
-@@ -181,7 +181,7 @@ static void write_ivf_frame_header(FILE *outfile,
-     mem_put_le32(header+4, pts&0xFFFFFFFF);
-     mem_put_le32(header+8, pts >> 32);
- 
--    if(fwrite(header, 1, 12, outfile));
-+    (void) fwrite(header, 1, 12, outfile);
- }
- 
- int main(int argc, char **argv)
-@@ -273,7 +273,7 @@ int main(int argc, char **argv)
-     cfg[0].g_w = width;
-     cfg[0].g_h = height;
-     cfg[0].g_threads = 1;                           /* number of threads used */
--    cfg[0].rc_dropframe_thresh = 0;
-+    cfg[0].rc_dropframe_thresh = 30;
-     cfg[0].rc_end_usage = VPX_CBR;
-     cfg[0].rc_resize_allowed = 0;
-     cfg[0].rc_min_quantizer = 4;
-@@ -283,13 +283,17 @@ int main(int argc, char **argv)
-     cfg[0].rc_buf_initial_sz = 500;
-     cfg[0].rc_buf_optimal_sz = 600;
-     cfg[0].rc_buf_sz = 1000;
--    //cfg[0].rc_dropframe_thresh = 10;
-     cfg[0].g_error_resilient = 1;              /* Enable error resilient mode */
-     cfg[0].g_lag_in_frames   = 0;
- 
-     /* Disable automatic keyframe placement */
-+    /* Note: These 3 settings are copied to all levels. But, except the lowest
-+     * resolution level, all other levels are set to VPX_KF_DISABLED internally.
-+     */
-     //cfg[0].kf_mode           = VPX_KF_DISABLED;
--    cfg[0].kf_min_dist = cfg[0].kf_max_dist = 1000;
-+    cfg[0].kf_mode           = VPX_KF_AUTO;
-+    cfg[0].kf_min_dist = 3000;
-+    cfg[0].kf_max_dist = 3000;
- 
-     cfg[0].rc_target_bitrate = target_bitrate[0];       /* Set target bitrate */
-     cfg[0].g_timebase.num = 1;                          /* Set fps */
-@@ -361,6 +365,12 @@ int main(int argc, char **argv)
-         if(vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, static_thresh))
-             die_codec(&codec[i], "Failed to set static threshold");
-     }
-+    /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */
-+    for ( i=0; i< NUM_ENCODERS; i++)
-+    {
-+        if(vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0))
-+            die_codec(&codec[i], "Failed to set noise_sensitivity");
-+    }
- 
-     frame_avail = 1;
-     got_data = 0;
-@@ -405,8 +415,8 @@ int main(int argc, char **argv)
-                 switch(pkt[i]->kind) {
-                     case VPX_CODEC_CX_FRAME_PKT:
-                         write_ivf_frame_header(outfile[i], pkt[i]);
--                        if(fwrite(pkt[i]->data.frame.buf, 1, pkt[i]->data.frame.sz,
--                                  outfile[i]));
-+                        (void) fwrite(pkt[i]->data.frame.buf, 1,
-+                                      pkt[i]->data.frame.sz, outfile[i]);
-                     break;
-                     case VPX_CODEC_PSNR_PKT:
-                         if (show_psnr)
-diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c
-index 4311b1a..06270fe 100644
---- a/vp8_scalable_patterns.c
-+++ b/vp8_scalable_patterns.c
-@@ -93,7 +93,7 @@ static void write_ivf_file_header(FILE *outfile,
-     mem_put_le32(header+24, frame_cnt);           /* length */
-     mem_put_le32(header+28, 0);                   /* unused */
- 
--    if(fwrite(header, 1, 32, outfile));
-+    (void) fwrite(header, 1, 32, outfile);
- }
- 
- 
-@@ -111,10 +111,10 @@ static void write_ivf_frame_header(FILE *outfile,
-     mem_put_le32(header+4, pts&0xFFFFFFFF);
-     mem_put_le32(header+8, pts >> 32);
- 
--    if(fwrite(header, 1, 12, outfile));
-+    (void) fwrite(header, 1, 12, outfile);
- }
- 
--static int mode_to_num_layers[9] = {2, 2, 3, 3, 3, 3, 5, 2, 3};
-+static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3};
- 
- int main(int argc, char **argv) {
-     FILE                *infile, *outfile[VPX_TS_MAX_LAYERS];
-@@ -129,8 +129,8 @@ int main(int argc, char **argv) {
-     int                  got_data;
-     int                  flags = 0;
-     int                  i;
--    int                  pts = 0;              // PTS starts at 0
--    int                  frame_duration = 1;   // 1 timebase tick per frame
-+    int                  pts = 0;              /* PTS starts at 0 */
-+    int                  frame_duration = 1;   /* 1 timebase tick per frame */
- 
-     int                  layering_mode = 0;
-     int                  frames_in_layer[VPX_TS_MAX_LAYERS] = {0};
-@@ -138,7 +138,7 @@ int main(int argc, char **argv) {
-     int                  flag_periodicity;
-     int                  max_intra_size_pct;
- 
--    // Check usage and arguments
-+    /* Check usage and arguments */
-     if (argc < 9)
-         die("Usage: %s <infile> <outfile> <width> <height> <rate_num> "
-             " <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1>\n", argv[0]);
-@@ -150,43 +150,43 @@ int main(int argc, char **argv) {
- 
-     if (!sscanf(argv[7], "%d", &layering_mode))
-         die ("Invalid mode %s", argv[7]);
--    if (layering_mode<0 || layering_mode>8)
--        die ("Invalid mode (0..8) %s", argv[7]);
-+    if (layering_mode<0 || layering_mode>11)
-+        die ("Invalid mode (0..11) %s", argv[7]);
- 
-     if (argc != 8+mode_to_num_layers[layering_mode])
-         die ("Invalid number of arguments");
- 
--    if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1))
-+    if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 32))
-         die ("Failed to allocate image", width, height);
- 
-     printf("Using %s\n",vpx_codec_iface_name(interface));
- 
--    // Populate encoder configuration
-+    /* Populate encoder configuration */
-     res = vpx_codec_enc_config_default(interface, &cfg, 0);
-     if(res) {
-         printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-         return EXIT_FAILURE;
-     }
- 
--    // Update the default configuration with our settings
-+    /* Update the default configuration with our settings */
-     cfg.g_w = width;
-     cfg.g_h = height;
- 
--    // Timebase format e.g. 30fps: numerator=1, demoninator=30
-+    /* Timebase format e.g. 30fps: numerator=1, demoninator=30 */
-     if (!sscanf (argv[5], "%d", &cfg.g_timebase.num ))
-         die ("Invalid timebase numerator %s", argv[5]);
-     if (!sscanf (argv[6], "%d", &cfg.g_timebase.den ))
-         die ("Invalid timebase denominator %s", argv[6]);
- 
-     for (i=8; i<8+mode_to_num_layers[layering_mode]; i++)
--        if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8]))
-+        if (!sscanf(argv[i], "%ud", &cfg.ts_target_bitrate[i-8]))
-             die ("Invalid data rate %s", argv[i]);
- 
--    // Real time parameters
--    cfg.rc_dropframe_thresh = 0;  // 30
-+    /* Real time parameters */
-+    cfg.rc_dropframe_thresh = 0;
-     cfg.rc_end_usage        = VPX_CBR;
-     cfg.rc_resize_allowed   = 0;
--    cfg.rc_min_quantizer    = 8;
-+    cfg.rc_min_quantizer    = 2;
-     cfg.rc_max_quantizer    = 56;
-     cfg.rc_undershoot_pct   = 100;
-     cfg.rc_overshoot_pct    = 15;
-@@ -194,25 +194,44 @@ int main(int argc, char **argv) {
-     cfg.rc_buf_optimal_sz   = 600;
-     cfg.rc_buf_sz           = 1000;
- 
--    // Enable error resilient mode
-+    /* Enable error resilient mode */
-     cfg.g_error_resilient = 1;
-     cfg.g_lag_in_frames   = 0;
-     cfg.kf_mode           = VPX_KF_DISABLED;
- 
--    // Disable automatic keyframe placement
--    cfg.kf_min_dist = cfg.kf_max_dist = 1000;
-+    /* Disable automatic keyframe placement */
-+    cfg.kf_min_dist = cfg.kf_max_dist = 3000;
- 
--    // Temporal scaling parameters:
--    // NOTE: The 3 prediction frames cannot be used interchangeably due to
--    // differences in the way they are handled throughout the code. The
--    // frames should be allocated to layers in the order LAST, GF, ARF.
--    // Other combinations work, but may produce slightly inferior results.
-+    /* Default setting for bitrate: used in special case of 1 layer (case 0). */
-+    cfg.rc_target_bitrate = cfg.ts_target_bitrate[0];
-+
-+    /* Temporal scaling parameters: */
-+    /* NOTE: The 3 prediction frames cannot be used interchangeably due to
-+     * differences in the way they are handled throughout the code. The
-+     * frames should be allocated to layers in the order LAST, GF, ARF.
-+     * Other combinations work, but may produce slightly inferior results.
-+     */
-     switch (layering_mode)
-     {
--
-     case 0:
-     {
--        // 2-layers, 2-frame period
-+        /* 1-layer */
-+       int ids[1] = {0};
-+       cfg.ts_number_layers     = 1;
-+       cfg.ts_periodicity       = 1;
-+       cfg.ts_rate_decimator[0] = 1;
-+       memcpy(cfg.ts_layer_id, ids, sizeof(ids));
-+
-+       flag_periodicity = cfg.ts_periodicity;
-+
-+       // Update L only.
-+       layer_flags[0] = VPX_EFLAG_FORCE_KF  |
-+                        VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-+       break;
-+    }
-+    case 1:
-+    {
-+        /* 2-layers, 2-frame period */
-         int ids[2] = {0,1};
-         cfg.ts_number_layers     = 2;
-         cfg.ts_periodicity       = 2;
-@@ -222,14 +241,14 @@ int main(int argc, char **argv) {
- 
-         flag_periodicity = cfg.ts_periodicity;
- #if 1
--        // 0=L, 1=GF, Intra-layer prediction enabled
-+        /* 0=L, 1=GF, Intra-layer prediction enabled */
-         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
-                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-         layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-                          VP8_EFLAG_NO_REF_ARF;
- #else
--        // 0=L, 1=GF, Intra-layer prediction disabled
-+        /* 0=L, 1=GF, Intra-layer prediction disabled */
-         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
-                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-@@ -239,9 +258,9 @@ int main(int argc, char **argv) {
-         break;
-     }
- 
--    case 1:
-+    case 2:
-     {
--        // 2-layers, 3-frame period
-+        /* 2-layers, 3-frame period */
-         int ids[3] = {0,1,1};
-         cfg.ts_number_layers     = 2;
-         cfg.ts_periodicity       = 3;
-@@ -251,7 +270,7 @@ int main(int argc, char **argv) {
- 
-         flag_periodicity = cfg.ts_periodicity;
- 
--        // 0=L, 1=GF, Intra-layer prediction enabled
-+        /* 0=L, 1=GF, Intra-layer prediction enabled */
-         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
-                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-@@ -262,9 +281,9 @@ int main(int argc, char **argv) {
-         break;
-     }
- 
--    case 2:
-+    case 3:
-     {
--        // 3-layers, 6-frame period
-+        /* 3-layers, 6-frame period */
-         int ids[6] = {0,2,2,1,2,2};
-         cfg.ts_number_layers     = 3;
-         cfg.ts_periodicity       = 6;
-@@ -275,7 +294,7 @@ int main(int argc, char **argv) {
- 
-         flag_periodicity = cfg.ts_periodicity;
- 
--        // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled
-+        /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */
-         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
-                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-@@ -288,9 +307,9 @@ int main(int argc, char **argv) {
-         break;
-     }
- 
--    case 3:
-+    case 4:
-     {
--        // 3-layers, 4-frame period
-+        /* 3-layers, 4-frame period */
-         int ids[4] = {0,2,1,2};
-         cfg.ts_number_layers     = 3;
-         cfg.ts_periodicity       = 4;
-@@ -301,7 +320,7 @@ int main(int argc, char **argv) {
- 
-         flag_periodicity = cfg.ts_periodicity;
- 
--        // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled
-+        /* 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled */
-         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
-                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-@@ -315,9 +334,9 @@ int main(int argc, char **argv) {
-         break;
-     }
- 
--    case 4:
-+    case 5:
-     {
--        // 3-layers, 4-frame period
-+        /* 3-layers, 4-frame period */
-         int ids[4] = {0,2,1,2};
-         cfg.ts_number_layers     = 3;
-         cfg.ts_periodicity       = 4;
-@@ -328,8 +347,9 @@ int main(int argc, char **argv) {
- 
-         flag_periodicity = cfg.ts_periodicity;
- 
--        // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1,
--        // disabled in layer 2
-+        /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1,
-+         * disabled in layer 2
-+         */
-         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
-                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-@@ -342,9 +362,9 @@ int main(int argc, char **argv) {
-         break;
-     }
- 
--    case 5:
-+    case 6:
-     {
--        // 3-layers, 4-frame period
-+        /* 3-layers, 4-frame period */
-         int ids[4] = {0,2,1,2};
-         cfg.ts_number_layers     = 3;
-         cfg.ts_periodicity       = 4;
-@@ -355,7 +375,7 @@ int main(int argc, char **argv) {
- 
-         flag_periodicity = cfg.ts_periodicity;
- 
--        // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled
-+        /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */
-         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
-                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-@@ -366,11 +386,11 @@ int main(int argc, char **argv) {
-         break;
-     }
- 
--    case 6:
-+    case 7:
-     {
--        // NOTE: Probably of academic interest only
-+        /* NOTE: Probably of academic interest only */
- 
--        // 5-layers, 16-frame period
-+        /* 5-layers, 16-frame period */
-         int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4};
-         cfg.ts_number_layers     = 5;
-         cfg.ts_periodicity       = 16;
-@@ -405,9 +425,9 @@ int main(int argc, char **argv) {
-         break;
-     }
- 
--    case 7:
-+    case 8:
-     {
--        // 2-layers
-+        /* 2-layers, with sync point at first frame of layer 1. */
-         int ids[2] = {0,1};
-         cfg.ts_number_layers     = 2;
-         cfg.ts_periodicity       = 2;
-@@ -417,30 +437,49 @@ int main(int argc, char **argv) {
- 
-         flag_periodicity = 8;
- 
--        // 0=L, 1=GF
-+        /* 0=L, 1=GF */
-+        // ARF is used as predictor for all frames, and is only updated on
-+        // key frame. Sync point every 8 frames.
-+
-+        // Layer 0: predict from L and ARF, update L and G.
-         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
--                         VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
--                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
--        layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
--                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
--        layer_flags[2] =
--        layer_flags[4] =
--        layer_flags[6] = VP8_EFLAG_NO_REF_GF  | VP8_EFLAG_NO_REF_ARF |
--                         VP8_EFLAG_NO_UPD_GF  | VP8_EFLAG_NO_UPD_ARF;
--        layer_flags[3] =
--        layer_flags[5] = VP8_EFLAG_NO_REF_ARF |
--                         VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
--        layer_flags[7] = VP8_EFLAG_NO_REF_ARF |
--                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
--                         VP8_EFLAG_NO_UPD_ARF |
-+                         VP8_EFLAG_NO_REF_GF |
-+                         VP8_EFLAG_NO_UPD_ARF;
-+
-+        // Layer 1: sync point: predict from L and ARF, and update G.
-+        layer_flags[1] = VP8_EFLAG_NO_REF_GF |
-+                         VP8_EFLAG_NO_UPD_LAST |
-+                         VP8_EFLAG_NO_UPD_ARF;
-+
-+        // Layer 0, predict from L and ARF, update L.
-+        layer_flags[2] = VP8_EFLAG_NO_REF_GF  |
-+                         VP8_EFLAG_NO_UPD_GF  |
-+                         VP8_EFLAG_NO_UPD_ARF;
-+
-+        // Layer 1: predict from L, G and ARF, and update G.
-+        layer_flags[3] = VP8_EFLAG_NO_UPD_ARF |
-+                         VP8_EFLAG_NO_UPD_LAST |
-                          VP8_EFLAG_NO_UPD_ENTROPY;
-+
-+        // Layer 0
-+        layer_flags[4] = layer_flags[2];
-+
-+        // Layer 1
-+        layer_flags[5] = layer_flags[3];
-+
-+        // Layer 0
-+        layer_flags[6] = layer_flags[4];
-+
-+        // Layer 1
-+        layer_flags[7] = layer_flags[5];
-         break;
-     }
- 
--    case 8:
--    default:
-+    case 9:
-     {
--        // 3-layers
-+        /* 3-layers */
-+        // Sync points for layer 1 and 2 every 8 frames.
-+
-         int ids[4] = {0,2,1,2};
-         cfg.ts_number_layers     = 3;
-         cfg.ts_periodicity       = 4;
-@@ -451,7 +490,7 @@ int main(int argc, char **argv) {
- 
-         flag_periodicity = 8;
- 
--        // 0=L, 1=GF, 2=ARF
-+        /* 0=L, 1=GF, 2=ARF */
-         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
-                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-@@ -470,13 +509,109 @@ int main(int argc, char **argv) {
-                          VP8_EFLAG_NO_UPD_ENTROPY;
-         break;
-     }
-+    case 10:
-+    {
-+        // 3-layers structure where ARF is used as predictor for all frames,
-+        // and is only updated on key frame.
-+        // Sync points for layer 1 and 2 every 8 frames.
-+
-+        int ids[4] = {0,2,1,2};
-+        cfg.ts_number_layers     = 3;
-+        cfg.ts_periodicity       = 4;
-+        cfg.ts_rate_decimator[0] = 4;
-+        cfg.ts_rate_decimator[1] = 2;
-+        cfg.ts_rate_decimator[2] = 1;
-+        memcpy(cfg.ts_layer_id, ids, sizeof(ids));
-+
-+        flag_periodicity = 8;
-+
-+        /* 0=L, 1=GF, 2=ARF */
-+
-+        // Layer 0: predict from L and ARF; update L and G.
-+        layer_flags[0] =  VPX_EFLAG_FORCE_KF  |
-+                          VP8_EFLAG_NO_UPD_ARF |
-+                          VP8_EFLAG_NO_REF_GF;
-+
-+        // Layer 2: sync point: predict from L and ARF; update none.
-+        layer_flags[1] = VP8_EFLAG_NO_REF_GF |
-+                         VP8_EFLAG_NO_UPD_GF |
-+                         VP8_EFLAG_NO_UPD_ARF |
-+                         VP8_EFLAG_NO_UPD_LAST |
-+                         VP8_EFLAG_NO_UPD_ENTROPY;
-+
-+        // Layer 1: sync point: predict from L and ARF; update G.
-+        layer_flags[2] = VP8_EFLAG_NO_REF_GF |
-+                         VP8_EFLAG_NO_UPD_ARF |
-+                         VP8_EFLAG_NO_UPD_LAST;
-+
-+        // Layer 2: predict from L, G, ARF; update none.
-+        layer_flags[3] = VP8_EFLAG_NO_UPD_GF |
-+                         VP8_EFLAG_NO_UPD_ARF |
-+                         VP8_EFLAG_NO_UPD_LAST |
-+                         VP8_EFLAG_NO_UPD_ENTROPY;
-+
-+        // Layer 0: predict from L and ARF; update L.
-+        layer_flags[4] = VP8_EFLAG_NO_UPD_GF |
-+                         VP8_EFLAG_NO_UPD_ARF |
-+                         VP8_EFLAG_NO_REF_GF;
-+
-+        // Layer 2: predict from L, G, ARF; update none.
-+        layer_flags[5] = layer_flags[3];
-+
-+        // Layer 1: predict from L, G, ARF; update G.
-+        layer_flags[6] = VP8_EFLAG_NO_UPD_ARF |
-+                         VP8_EFLAG_NO_UPD_LAST;
-+
-+        // Layer 2: predict from L, G, ARF; update none.
-+        layer_flags[7] = layer_flags[3];
-+        break;
-+    }
-+    case 11:
-+    default:
-+    {
-+       // 3-layers structure as in case 10, but no sync/refresh points for
-+       // layer 1 and 2.
-+
-+       int ids[4] = {0,2,1,2};
-+       cfg.ts_number_layers     = 3;
-+       cfg.ts_periodicity       = 4;
-+       cfg.ts_rate_decimator[0] = 4;
-+       cfg.ts_rate_decimator[1] = 2;
-+       cfg.ts_rate_decimator[2] = 1;
-+       memcpy(cfg.ts_layer_id, ids, sizeof(ids));
-+
-+       flag_periodicity = 8;
-+
-+       /* 0=L, 1=GF, 2=ARF */
-+
-+       // Layer 0: predict from L and ARF; update L.
-+       layer_flags[0] = VP8_EFLAG_NO_UPD_GF |
-+                        VP8_EFLAG_NO_UPD_ARF |
-+                        VP8_EFLAG_NO_REF_GF;
-+       layer_flags[4] = layer_flags[0];
-+
-+       // Layer 1: predict from L, G, ARF; update G.
-+       layer_flags[2] = VP8_EFLAG_NO_UPD_ARF |
-+                        VP8_EFLAG_NO_UPD_LAST;
-+       layer_flags[6] = layer_flags[2];
-+
-+       // Layer 2: predict from L, G, ARF; update none.
-+       layer_flags[1] = VP8_EFLAG_NO_UPD_GF |
-+                        VP8_EFLAG_NO_UPD_ARF |
-+                        VP8_EFLAG_NO_UPD_LAST |
-+                        VP8_EFLAG_NO_UPD_ENTROPY;
-+       layer_flags[3] = layer_flags[1];
-+       layer_flags[5] = layer_flags[1];
-+       layer_flags[7] = layer_flags[1];
-+       break;
-+    }
-     }
- 
--    // Open input file
-+    /* Open input file */
-     if(!(infile = fopen(argv[1], "rb")))
-         die("Failed to open %s for reading", argv[1]);
- 
--    // Open an output file for each stream
-+    /* Open an output file for each stream */
-     for (i=0; i<cfg.ts_number_layers; i++)
-     {
-         char file_name[512];
-@@ -486,24 +621,23 @@ int main(int argc, char **argv) {
-         write_ivf_file_header(outfile[i], &cfg, 0);
-     }
- 
--    // Initialize codec
-+    /* Initialize codec */
-     if (vpx_codec_enc_init (&codec, interface, &cfg, 0))
-         die_codec (&codec, "Failed to initialize encoder");
- 
--    // Cap CPU & first I-frame size
-+    /* Cap CPU & first I-frame size */
-     vpx_codec_control (&codec, VP8E_SET_CPUUSED,                -6);
--    vpx_codec_control (&codec, VP8E_SET_STATIC_THRESHOLD,      800);
--    vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY,       2);
-+    vpx_codec_control (&codec, VP8E_SET_STATIC_THRESHOLD,      1);
-+    vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY,       1);
-+    vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS,       1);
- 
-     max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
-                          * ((double) cfg.g_timebase.den / cfg.g_timebase.num)
-                          / 10.0);
--    //printf ("max_intra_size_pct=%d\n", max_intra_size_pct);
-+    /* printf ("max_intra_size_pct=%d\n", max_intra_size_pct); */
- 
-     vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
-                       max_intra_size_pct);
--    //    vpx_codec_control (&codec, VP8E_SET_TOKEN_PARTITIONS,
--    //                      static_cast<vp8e_token_partitions>(_tokenPartitions));
- 
-     frame_avail = 1;
-     while (frame_avail || got_data) {
-@@ -517,8 +651,8 @@ int main(int argc, char **argv) {
-                             1, flags, VPX_DL_REALTIME))
-             die_codec(&codec, "Failed to encode frame");
- 
--        // Reset KF flag
--        if (layering_mode != 6)
-+        /* Reset KF flag */
-+        if (layering_mode != 7)
-             layer_flags[0] &= ~VPX_EFLAG_FORCE_KF;
- 
-         got_data = 0;
-@@ -530,29 +664,25 @@ int main(int argc, char **argv) {
-                                               i<cfg.ts_number_layers; i++)
-                 {
-                     write_ivf_frame_header(outfile[i], pkt);
--                    if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
--                              outfile[i]));
-+                    (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-+                                  outfile[i]);
-                     frames_in_layer[i]++;
-                 }
-                 break;
-             default:
-                 break;
-             }
--            printf (pkt->kind == VPX_CODEC_CX_FRAME_PKT
--                    && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
--            fflush (stdout);
-         }
-         frame_cnt++;
-         pts += frame_duration;
-     }
--    printf ("\n");
-     fclose (infile);
- 
-     printf ("Processed %d frames.\n",frame_cnt-1);
-     if (vpx_codec_destroy(&codec))
-             die_codec (&codec, "Failed to destroy codec");
- 
--    // Try to rewrite the output file headers with the actual frame count
-+    /* Try to rewrite the output file headers with the actual frame count */
-     for (i=0; i<cfg.ts_number_layers; i++)
-     {
-         if (!fseek(outfile[i], 0, SEEK_SET))
-@@ -562,4 +692,3 @@ int main(int argc, char **argv) {
- 
-     return EXIT_SUCCESS;
- }
--
-diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
-index 0703d6a..4474331 100644
---- a/vpx/internal/vpx_codec_internal.h
-+++ b/vpx/internal/vpx_codec_internal.h
-@@ -165,7 +165,7 @@ typedef vpx_codec_err_t (*vpx_codec_control_fn_t)(vpx_codec_alg_priv_t  *ctx,
-  * mapping. This implies that ctrl_id values chosen by the algorithm
-  * \ref MUST be non-zero.
-  */
--typedef const struct
-+typedef const struct vpx_codec_ctrl_fn_map
- {
-     int                    ctrl_id;
-     vpx_codec_control_fn_t   fn;
-@@ -280,7 +280,7 @@ typedef vpx_codec_err_t
-  * one mapping must be present, in addition to the end-of-list.
-  *
-  */
--typedef const struct
-+typedef const struct vpx_codec_enc_cfg_map
- {
-     int                 usage;
-     vpx_codec_enc_cfg_t cfg;
-@@ -302,14 +302,14 @@ struct vpx_codec_iface
-     vpx_codec_ctrl_fn_map_t  *ctrl_maps;   /**< \copydoc ::vpx_codec_ctrl_fn_map_t */
-     vpx_codec_get_mmap_fn_t   get_mmap;    /**< \copydoc ::vpx_codec_get_mmap_fn_t */
-     vpx_codec_set_mmap_fn_t   set_mmap;    /**< \copydoc ::vpx_codec_set_mmap_fn_t */
--    struct
-+    struct vpx_codec_dec_iface
-     {
-         vpx_codec_peek_si_fn_t    peek_si;     /**< \copydoc ::vpx_codec_peek_si_fn_t */
-         vpx_codec_get_si_fn_t     get_si;      /**< \copydoc ::vpx_codec_peek_si_fn_t */
-         vpx_codec_decode_fn_t     decode;      /**< \copydoc ::vpx_codec_decode_fn_t */
-         vpx_codec_get_frame_fn_t  get_frame;   /**< \copydoc ::vpx_codec_get_frame_fn_t */
-     } dec;
--    struct
-+    struct vpx_codec_enc_iface
-     {
-         vpx_codec_enc_cfg_map_t           *cfg_maps;      /**< \copydoc ::vpx_codec_enc_cfg_map_t */
-         vpx_codec_encode_fn_t              encode;        /**< \copydoc ::vpx_codec_encode_fn_t */
-diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
-index 03ddc62..73c1c66 100644
---- a/vpx/src/vpx_encoder.c
-+++ b/vpx/src/vpx_encoder.c
-@@ -117,6 +117,13 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t      *ctx,
-                 mr_cfg.mr_down_sampling_factor.num = dsf->num;
-                 mr_cfg.mr_down_sampling_factor.den = dsf->den;
- 
-+                /* Force Key-frame synchronization. Namely, encoder at higher
-+                 * resolution always use the same frame_type chosen by the
-+                 * lowest-resolution encoder.
-+                 */
-+                if(mr_cfg.mr_encoder_id)
-+                    cfg->kf_mode = VPX_KF_DISABLED;
-+
-                 ctx->iface = iface;
-                 ctx->name = iface->name;
-                 ctx->priv = NULL;
-@@ -126,8 +133,20 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t      *ctx,
- 
-                 if (res)
-                 {
--                    ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
-+                    const char *error_detail =
-+                        ctx->priv ? ctx->priv->err_detail : NULL;
-+                    /* Destroy current ctx */
-+                    ctx->err_detail = error_detail;
-                     vpx_codec_destroy(ctx);
-+
-+                    /* Destroy already allocated high-level ctx */
-+                    while (i)
-+                    {
-+                        ctx--;
-+                        ctx->err_detail = error_detail;
-+                        vpx_codec_destroy(ctx);
-+                        i--;
-+                    }
-                 }
- 
-                 if (ctx->priv)
-diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
-index 0af631c..a3c95d2 100644
---- a/vpx/vp8cx.h
-+++ b/vpx/vp8cx.h
-@@ -204,8 +204,8 @@ typedef struct vpx_roi_map
-     unsigned char *roi_map;      /**< specify an id between 0 and 3 for each 16x16 region within a frame */
-     unsigned int   rows;         /**< number of rows */
-     unsigned int   cols;         /**< number of cols */
--    int     delta_q[4];          /**< quantizer delta [-64, 64] off baseline for regions with id between 0 and 3*/
--    int     delta_lf[4];         /**< loop filter strength delta [-32, 32] for regions with id between 0 and 3 */
-+    int     delta_q[4];          /**< quantizer delta [-63, 63] off baseline for regions with id between 0 and 3*/
-+    int     delta_lf[4];         /**< loop filter strength delta [-63, 63] for regions with id between 0 and 3 */
-     unsigned int   static_threshold[4];/**< threshold for region to be treated as static */
- } vpx_roi_map_t;
- 
-@@ -234,18 +234,6 @@ typedef struct vpx_scaling_mode
-     VPX_SCALING_MODE    v_scaling_mode;  /**< vertical scaling mode   */
- } vpx_scaling_mode_t;
- 
--/*!\brief VP8 encoding mode
-- *
-- * This defines VP8 encoding mode
-- *
-- */
--typedef enum
--{
--    VP8_BEST_QUALITY_ENCODING,
--    VP8_GOOD_QUALITY_ENCODING,
--    VP8_REAL_TIME_ENCODING
--} vp8e_encoding_mode;
--
- /*!\brief VP8 token partition mode
-  *
-  * This defines VP8 partitioning mode for compressed data, i.e., the number of
-@@ -298,12 +286,12 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF,   unsigned int)
- VPX_CTRL_USE_TYPE(VP8E_SET_NOISE_SENSITIVITY,  unsigned int)
- VPX_CTRL_USE_TYPE(VP8E_SET_SHARPNESS,          unsigned int)
- VPX_CTRL_USE_TYPE(VP8E_SET_STATIC_THRESHOLD,   unsigned int)
--VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS,   vp8e_token_partitions)
-+VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS,   int) /* vp8e_token_partitions */
- 
- VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES,     unsigned int)
- VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH ,     unsigned int)
- VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE     ,     unsigned int)
--VPX_CTRL_USE_TYPE(VP8E_SET_TUNING,             vp8e_tuning)
-+VPX_CTRL_USE_TYPE(VP8E_SET_TUNING,             int) /* vp8e_tuning */
- VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL     ,      unsigned int)
- 
- VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER,     int *)
-diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
-index d92e165..243b7a5 100644
---- a/vpx/vpx_codec.h
-+++ b/vpx/vpx_codec.h
-@@ -49,15 +49,22 @@ extern "C" {
- #ifndef DEPRECATED
- #if defined(__GNUC__) && __GNUC__
- #define DEPRECATED          __attribute__ ((deprecated))
--#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
- #elif defined(_MSC_VER)
- #define DEPRECATED
--#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */
- #else
- #define DEPRECATED
--#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
- #endif
-+#endif  /* DEPRECATED */
-+
-+#ifndef DECLSPEC_DEPRECATED
-+#if defined(__GNUC__) && __GNUC__
-+#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
-+#elif defined(_MSC_VER)
-+#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */
-+#else
-+#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
- #endif
-+#endif  /* DECLSPEC_DEPRECATED */
- 
-     /*!\brief Decorator indicating a function is potentially unused */
- #ifdef UNUSED
-diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
-index 427fd0f..ffa123f 100644
---- a/vpx/vpx_codec.mk
-+++ b/vpx/vpx_codec.mk
-@@ -11,6 +11,21 @@
- 
- API_EXPORTS += exports
- 
-+API_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h
-+API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h
-+API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h
-+API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h
-+
-+API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h
-+API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h
-+API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8.h
-+API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h
-+
-+API_DOC_SRCS-yes += vpx_codec.h
-+API_DOC_SRCS-yes += vpx_decoder.h
-+API_DOC_SRCS-yes += vpx_encoder.h
-+API_DOC_SRCS-yes += vpx_image.h
-+
- API_SRCS-yes                += src/vpx_decoder.c
- API_SRCS-yes                += vpx_decoder.h
- API_SRCS-yes                += src/vpx_encoder.c
-@@ -23,3 +38,4 @@ API_SRCS-yes                += vpx_codec.mk
- API_SRCS-yes                += vpx_codec_impl_bottom.h
- API_SRCS-yes                += vpx_codec_impl_top.h
- API_SRCS-yes                += vpx_image.h
-+API_SRCS-$(BUILD_LIBVPX)    += vpx_integer.h
-diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
-index 7992cc4..1ccf1c5 100644
---- a/vpx/vpx_decoder.h
-+++ b/vpx/vpx_decoder.h
-@@ -113,6 +113,10 @@ extern "C" {
-      * function directly, to ensure that the ABI version number parameter
-      * is properly initialized.
-      *
-+     * If the library was configured with --disable-multithread, this call
-+     * is not thread safe and should be guarded with a lock if being used
-+     * in a multithreaded context.
-+     *
-      * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
-      * parameter), the storage pointed to by the cfg parameter must be
-      * kept readable and stable until all memory maps have been set.
-diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
-index 239036e..67d9033 100644
---- a/vpx/vpx_encoder.h
-+++ b/vpx/vpx_encoder.h
-@@ -655,6 +655,10 @@ extern "C" {
-      * function directly, to ensure that the ABI version number parameter
-      * is properly initialized.
-      *
-+     * If the library was configured with --disable-multithread, this call
-+     * is not thread safe and should be guarded with a lock if being used
-+     * in a multithreaded context.
-+     *
-      * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
-      * parameter), the storage pointed to by the cfg parameter must be
-      * kept readable and stable until all memory maps have been set.
-diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
-index ebe428d..8ff95a1 100644
---- a/vpx_ports/arm_cpudetect.c
-+++ b/vpx_ports/arm_cpudetect.c
-@@ -32,8 +32,33 @@ static int arm_cpu_env_mask(void)
-     return env && *env ? (int)strtol(env, NULL, 0) : ~0;
- }
- 
-+#if !CONFIG_RUNTIME_CPU_DETECT
- 
--#if defined(_MSC_VER)
-+int arm_cpu_caps(void)
-+{
-+  /* This function should actually be a no-op. There is no way to adjust any of
-+   * these because the RTCD tables do not exist: the functions are called
-+   * statically */
-+    int flags;
-+    int mask;
-+    if (!arm_cpu_env_flags(&flags))
-+    {
-+        return flags;
-+    }
-+    mask = arm_cpu_env_mask();
-+#if HAVE_EDSP
-+    flags |= HAS_EDSP;
-+#endif /* HAVE_EDSP */
-+#if HAVE_MEDIA
-+    flags |= HAS_MEDIA;
-+#endif /* HAVE_MEDIA */
-+#if HAVE_NEON
-+    flags |= HAS_NEON;
-+#endif /* HAVE_NEON */
-+    return flags & mask;
-+}
-+
-+#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
- /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
- #define WIN32_LEAN_AND_MEAN
- #define WIN32_EXTRA_LEAN
-@@ -52,7 +77,7 @@ int arm_cpu_caps(void)
-      *  instructions via their assembled hex code.
-      * All of these instructions should be essentially nops.
-      */
--#if defined(HAVE_EDSP)
-+#if HAVE_EDSP
-     if (mask & HAS_EDSP)
-     {
-         __try
-@@ -66,7 +91,7 @@ int arm_cpu_caps(void)
-             /*Ignore exception.*/
-         }
-     }
--#if defined(HAVE_MEDIA)
-+#if HAVE_MEDIA
-     if (mask & HAS_MEDIA)
-         __try
-         {
-@@ -79,7 +104,7 @@ int arm_cpu_caps(void)
-             /*Ignore exception.*/
-         }
-     }
--#if defined(HAVE_NEON)
-+#if HAVE_NEON
-     if (mask & HAS_NEON)
-     {
-         __try
-@@ -93,14 +118,13 @@ int arm_cpu_caps(void)
-             /*Ignore exception.*/
-         }
-     }
--#endif
--#endif
--#endif
-+#endif /* HAVE_NEON */
-+#endif /* HAVE_MEDIA */
-+#endif /* HAVE_EDSP */
-     return flags & mask;
- }
- 
--#elif defined(__linux__)
--#if defined(__ANDROID__)
-+#elif defined(__ANDROID__) /* end _MSC_VER */
- #include <cpu-features.h>
- 
- int arm_cpu_caps(void)
-@@ -115,19 +139,20 @@ int arm_cpu_caps(void)
-     mask = arm_cpu_env_mask();
-     features = android_getCpuFeatures();
- 
--#if defined(HAVE_EDSP)
-+#if HAVE_EDSP
-     flags |= HAS_EDSP;
--#endif
--#if defined(HAVE_MEDIA)
-+#endif /* HAVE_EDSP */
-+#if HAVE_MEDIA
-     flags |= HAS_MEDIA;
--#endif
--#if defined(HAVE_NEON)
-+#endif /* HAVE_MEDIA */
-+#if HAVE_NEON
-     if (features & ANDROID_CPU_ARM_FEATURE_NEON)
-         flags |= HAS_NEON;
--#endif
-+#endif /* HAVE_NEON */
-     return flags & mask;
- }
--#else // !defined(__ANDROID__)
-+
-+#elif defined(__linux__) /* end __ANDROID__ */
- #include <stdio.h>
- 
- int arm_cpu_caps(void)
-@@ -153,27 +178,27 @@ int arm_cpu_caps(void)
-         char buf[512];
-         while (fgets(buf, 511, fin) != NULL)
-         {
--#if defined(HAVE_EDSP) || defined(HAVE_NEON)
-+#if HAVE_EDSP || HAVE_NEON
-             if (memcmp(buf, "Features", 8) == 0)
-             {
-                 char *p;
--#if defined(HAVE_EDSP)
-+#if HAVE_EDSP
-                 p=strstr(buf, " edsp");
-                 if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
-                 {
-                     flags |= HAS_EDSP;
-                 }
--#if defined(HAVE_NEON)
-+#if HAVE_NEON
-                 p = strstr(buf, " neon");
-                 if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
-                 {
-                     flags |= HAS_NEON;
-                 }
--#endif
--#endif
-+#endif /* HAVE_NEON */
-+#endif /* HAVE_EDSP */
-             }
--#endif
--#if defined(HAVE_MEDIA)
-+#endif /* HAVE_EDSP || HAVE_NEON */
-+#if HAVE_MEDIA
-             if (memcmp(buf, "CPU architecture:",17) == 0){
-                 int version;
-                 version = atoi(buf+17);
-@@ -182,37 +207,13 @@ int arm_cpu_caps(void)
-                     flags |= HAS_MEDIA;
-                 }
-             }
--#endif
-+#endif /* HAVE_MEDIA */
-         }
-         fclose(fin);
-     }
-     return flags & mask;
- }
--#endif // defined(__linux__)
--#elif !CONFIG_RUNTIME_CPU_DETECT
--
--int arm_cpu_caps(void)
--{
--    int flags;
--    int mask;
--    if (!arm_cpu_env_flags(&flags))
--    {
--        return flags;
--    }
--    mask = arm_cpu_env_mask();
--#if defined(HAVE_EDSP)
--    flags |= HAS_EDSP;
--#endif
--#if defined(HAVE_MEDIA)
--    flags |= HAS_MEDIA;
--#endif
--#if defined(HAVE_NEON)
--    flags |= HAS_NEON;
--#endif
--    return flags & mask;
--}
--
--#else
-+#else /* end __linux__ */
- #error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
-- "available for your platform. Reconfigure without --enable-runtime-cpu-detect."
-+ "available for your platform. Reconfigure with --disable-runtime-cpu-detect."
- #endif
-diff --git a/vpx_ports/asm_offsets.h b/vpx_ports/asm_offsets.h
-index d3b4fc7..7b6ae4a 100644
---- a/vpx_ports/asm_offsets.h
-+++ b/vpx_ports/asm_offsets.h
-@@ -19,11 +19,11 @@
-     static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
- 
- #if INLINE_ASM
--#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val));
-+#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val))
- #define BEGIN int main(void) {
- #define END return 0; }
- #else
--#define DEFINE(sym, val) int sym = val;
-+#define DEFINE(sym, val) const int sym = val
- #define BEGIN
- #define END
- #endif
-diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm
-index 306e235..efad1a5 100644
---- a/vpx_ports/emms.asm
-+++ b/vpx_ports/emms.asm
-@@ -12,14 +12,14 @@
- %include "vpx_ports/x86_abi_support.asm"
- 
- section .text
--    global sym(vpx_reset_mmx_state)
-+global sym(vpx_reset_mmx_state) PRIVATE
- sym(vpx_reset_mmx_state):
-     emms
-     ret
- 
- 
- %ifidn __OUTPUT_FORMAT__,x64
--global sym(vpx_winx64_fldcw)
-+global sym(vpx_winx64_fldcw) PRIVATE
- sym(vpx_winx64_fldcw):
-     sub   rsp, 8
-     mov   [rsp], rcx ; win x64 specific
-@@ -28,7 +28,7 @@ sym(vpx_winx64_fldcw):
-     ret
- 
- 
--global sym(vpx_winx64_fstcw)
-+global sym(vpx_winx64_fstcw) PRIVATE
- sym(vpx_winx64_fstcw):
-     sub   rsp, 8
-     fstcw [rsp]
-diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h
-index 0e52368..dec28d5 100644
---- a/vpx_ports/mem_ops.h
-+++ b/vpx_ports/mem_ops.h
-@@ -145,27 +145,27 @@ static unsigned MEM_VALUE_T mem_get_le32(const void *vmem)
- 
- #undef  mem_get_sbe16
- #define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16)
--mem_get_s_generic(be, 16);
-+mem_get_s_generic(be, 16)
- 
- #undef  mem_get_sbe24
- #define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24)
--mem_get_s_generic(be, 24);
-+mem_get_s_generic(be, 24)
- 
- #undef  mem_get_sbe32
- #define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32)
--mem_get_s_generic(be, 32);
-+mem_get_s_generic(be, 32)
- 
- #undef  mem_get_sle16
- #define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16)
--mem_get_s_generic(le, 16);
-+mem_get_s_generic(le, 16)
- 
- #undef  mem_get_sle24
- #define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24)
--mem_get_s_generic(le, 24);
-+mem_get_s_generic(le, 24)
- 
- #undef  mem_get_sle32
- #define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32)
--mem_get_s_generic(le, 32);
-+mem_get_s_generic(le, 32)
- 
- #undef  mem_put_be16
- #define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16)
-diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h
-index 0fbba65..fca653a 100644
---- a/vpx_ports/mem_ops_aligned.h
-+++ b/vpx_ports/mem_ops_aligned.h
-@@ -99,51 +99,51 @@
- 
- #undef  mem_get_be16_aligned
- #define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned)
--mem_get_be_aligned_generic(16);
-+mem_get_be_aligned_generic(16)
- 
- #undef  mem_get_be32_aligned
- #define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned)
--mem_get_be_aligned_generic(32);
-+mem_get_be_aligned_generic(32)
- 
- #undef  mem_get_le16_aligned
- #define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned)
--mem_get_le_aligned_generic(16);
-+mem_get_le_aligned_generic(16)
- 
- #undef  mem_get_le32_aligned
- #define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned)
--mem_get_le_aligned_generic(32);
-+mem_get_le_aligned_generic(32)
- 
- #undef  mem_get_sbe16_aligned
- #define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned)
--mem_get_sbe_aligned_generic(16);
-+mem_get_sbe_aligned_generic(16)
- 
- #undef  mem_get_sbe32_aligned
- #define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned)
--mem_get_sbe_aligned_generic(32);
-+mem_get_sbe_aligned_generic(32)
- 
- #undef  mem_get_sle16_aligned
- #define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned)
--mem_get_sle_aligned_generic(16);
-+mem_get_sle_aligned_generic(16)
- 
- #undef  mem_get_sle32_aligned
- #define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned)
--mem_get_sle_aligned_generic(32);
-+mem_get_sle_aligned_generic(32)
- 
- #undef  mem_put_be16_aligned
- #define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned)
--mem_put_be_aligned_generic(16);
-+mem_put_be_aligned_generic(16)
- 
- #undef  mem_put_be32_aligned
- #define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned)
--mem_put_be_aligned_generic(32);
-+mem_put_be_aligned_generic(32)
- 
- #undef  mem_put_le16_aligned
- #define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned)
--mem_put_le_aligned_generic(16);
-+mem_put_le_aligned_generic(16)
- 
- #undef  mem_put_le32_aligned
- #define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned)
--mem_put_le_aligned_generic(32);
-+mem_put_le_aligned_generic(32)
- 
- #undef mem_get_ne_aligned_generic
- #undef mem_get_se_aligned_generic
-diff --git a/vpx_ports/vpx_ports.mk b/vpx_ports/vpx_ports.mk
-new file mode 100644
-index 0000000..e6cb52f
---- /dev/null
-+++ b/vpx_ports/vpx_ports.mk
-@@ -0,0 +1,26 @@
-+##
-+##  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-+##
-+##  Use of this source code is governed by a BSD-style license
-+##  that can be found in the LICENSE file in the root of the source
-+##  tree. An additional intellectual property rights grant can be found
-+##  in the file PATENTS.  All contributing project authors may
-+##  be found in the AUTHORS file in the root of the source tree.
-+##
-+
-+
-+PORTS_SRCS-yes += vpx_ports.mk
-+
-+PORTS_SRCS-$(BUILD_LIBVPX) += asm_offsets.h
-+PORTS_SRCS-$(BUILD_LIBVPX) += mem.h
-+PORTS_SRCS-$(BUILD_LIBVPX) += vpx_timer.h
-+
-+ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
-+PORTS_SRCS-$(BUILD_LIBVPX) += emms.asm
-+PORTS_SRCS-$(BUILD_LIBVPX) += x86.h
-+PORTS_SRCS-$(BUILD_LIBVPX) += x86_abi_support.asm
-+PORTS_SRCS-$(BUILD_LIBVPX) += x86_cpuid.c
-+endif
-+
-+PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c
-+PORTS_SRCS-$(ARCH_ARM) += arm.h
-diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
-index 1341c7f..9dd8c4b 100644
---- a/vpx_ports/x86.h
-+++ b/vpx_ports/x86.h
-@@ -162,7 +162,7 @@ x86_readtsc(void)
-     return tsc;
- #else
- #if ARCH_X86_64
--    return __rdtsc();
-+    return (unsigned int)__rdtsc();
- #else
-     __asm  rdtsc;
- #endif
-diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
-index cef6a0b..0c9fe37 100644
---- a/vpx_ports/x86_abi_support.asm
-+++ b/vpx_ports/x86_abi_support.asm
-@@ -88,12 +88,41 @@
- %define sym(x) x
- %elifidn __OUTPUT_FORMAT__,elf64
- %define sym(x) x
-+%elifidn __OUTPUT_FORMAT__,elfx32
-+%define sym(x) x
- %elifidn __OUTPUT_FORMAT__,x64
- %define sym(x) x
- %else
- %define sym(x) _ %+ x
- %endif
- 
-+;  PRIVATE
-+;  Macro for the attribute to hide a global symbol for the target ABI.
-+;  This is only active if CHROMIUM is defined.
-+;
-+;  Chromium doesn't like exported global symbols due to symbol clashing with
-+;  plugins among other things.
-+;
-+;  Requires Chromium's patched copy of yasm:
-+;    http://src.chromium.org/viewvc/chrome?view=rev&revision=73761
-+;    http://www.tortall.net/projects/yasm/ticket/236
-+;
-+%ifdef CHROMIUM
-+  %ifidn   __OUTPUT_FORMAT__,elf32
-+    %define PRIVATE :hidden
-+  %elifidn __OUTPUT_FORMAT__,elf64
-+    %define PRIVATE :hidden
-+  %elifidn __OUTPUT_FORMAT__,elfx32
-+    %define PRIVATE :hidden
-+  %elifidn __OUTPUT_FORMAT__,x64
-+    %define PRIVATE
-+  %else
-+    %define PRIVATE :private_extern
-+  %endif
-+%else
-+  %define PRIVATE
-+%endif
-+
- ; arg()
- ; Return the address specification of the given argument
- ;
-@@ -181,7 +210,16 @@
-     %endmacro
-   %endif
-   %endif
--  %define HIDDEN_DATA(x) x
-+
-+  %ifdef CHROMIUM
-+    %ifidn __OUTPUT_FORMAT__,macho32
-+      %define HIDDEN_DATA(x) x:private_extern
-+    %else
-+      %define HIDDEN_DATA(x) x
-+    %endif
-+  %else
-+    %define HIDDEN_DATA(x) x
-+  %endif
- %else
-   %macro GET_GOT 1
-   %endmacro
-@@ -189,6 +227,9 @@
-   %ifidn __OUTPUT_FORMAT__,elf64
-     %define WRT_PLT wrt ..plt
-     %define HIDDEN_DATA(x) x:data hidden
-+  %elifidn __OUTPUT_FORMAT__,elfx32
-+    %define WRT_PLT wrt ..plt
-+    %define HIDDEN_DATA(x) x:data hidden
-   %else
-     %define HIDDEN_DATA(x) x
-   %endif
-@@ -330,5 +371,8 @@ section .text
- %elifidn __OUTPUT_FORMAT__,elf64
- section .note.GNU-stack noalloc noexec nowrite progbits
- section .text
-+%elifidn __OUTPUT_FORMAT__,elfx32
-+section .note.GNU-stack noalloc noexec nowrite progbits
-+section .text
- %endif
- 
-diff --git a/vpx_scale/arm/neon/yv12extend_arm.c b/vpx_scale/arm/neon/yv12extend_arm.c
-index 7529fc6..eabd495 100644
---- a/vpx_scale/arm/neon/yv12extend_arm.c
-+++ b/vpx_scale/arm/neon/yv12extend_arm.c
-@@ -8,18 +8,14 @@
-  *  be found in the AUTHORS file in the root of the source tree.
-  */
- 
-+#include "./vpx_rtcd.h"
- 
--#include "vpx_scale/yv12config.h"
--#include "vpx_mem/vpx_mem.h"
--#include "vpx_scale/vpxscale.h"
-+extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc,
-+                                          struct yv12_buffer_config *dst_ybc);
- 
--extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc,
--                                          YV12_BUFFER_CONFIG *dst_ybc);
-+void vp8_yv12_copy_frame_neon(struct yv12_buffer_config *src_ybc,
-+                              struct yv12_buffer_config *dst_ybc) {
-+  vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
- 
--void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc,
--                              YV12_BUFFER_CONFIG *dst_ybc)
--{
--    vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
--
--    vp8_yv12_extend_frame_borders_neon(dst_ybc);
-+  vp8_yv12_extend_frame_borders_neon(dst_ybc);
- }
-diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c
-index 4468e9d..c116740 100644
---- a/vpx_scale/generic/bicubic_scaler.c
-+++ b/vpx_scale/generic/bicubic_scaler.c
-@@ -46,245 +46,229 @@ static float a = -0.6;
- //         3     2
- // C0 = a*t - a*t
- //
--static short c0_fixed(unsigned int t)
--{
--    // put t in Q16 notation
--    unsigned short v1, v2;
--
--    // Q16
--    v1 = (a_i * t) >> 16;
--    v1 = (v1 * t) >> 16;
--
--    // Q16
--    v2 = (a_i * t) >> 16;
--    v2 = (v2 * t) >> 16;
--    v2 = (v2 * t) >> 16;
--
--    // Q12
--    return -((v1 - v2) >> 4);
-+static short c0_fixed(unsigned int t) {
-+  // put t in Q16 notation
-+  unsigned short v1, v2;
-+
-+  // Q16
-+  v1 = (a_i * t) >> 16;
-+  v1 = (v1 * t) >> 16;
-+
-+  // Q16
-+  v2 = (a_i * t) >> 16;
-+  v2 = (v2 * t) >> 16;
-+  v2 = (v2 * t) >> 16;
-+
-+  // Q12
-+  return -((v1 - v2) >> 4);
- }
- 
- //                     2          3
- // C1 = a*t + (3-2*a)*t  - (2-a)*t
- //
--static short c1_fixed(unsigned int t)
--{
--    unsigned short v1, v2, v3;
--    unsigned short two, three;
--
--    // Q16
--    v1 = (a_i * t) >> 16;
--
--    // Q13
--    two = 2 << 13;
--    v2 = two - (a_i >> 3);
--    v2 = (v2 * t) >> 16;
--    v2 = (v2 * t) >> 16;
--    v2 = (v2 * t) >> 16;
--
--    // Q13
--    three = 3 << 13;
--    v3 = three - (2 * (a_i >> 3));
--    v3 = (v3 * t) >> 16;
--    v3 = (v3 * t) >> 16;
--
--    // Q12
--    return (((v1 >> 3) - v2 + v3) >> 1);
-+static short c1_fixed(unsigned int t) {
-+  unsigned short v1, v2, v3;
-+  unsigned short two, three;
-+
-+  // Q16
-+  v1 = (a_i * t) >> 16;
-+
-+  // Q13
-+  two = 2 << 13;
-+  v2 = two - (a_i >> 3);
-+  v2 = (v2 * t) >> 16;
-+  v2 = (v2 * t) >> 16;
-+  v2 = (v2 * t) >> 16;
-+
-+  // Q13
-+  three = 3 << 13;
-+  v3 = three - (2 * (a_i >> 3));
-+  v3 = (v3 * t) >> 16;
-+  v3 = (v3 * t) >> 16;
-+
-+  // Q12
-+  return (((v1 >> 3) - v2 + v3) >> 1);
- 
- }
- 
- //                 2          3
- // C2 = 1 - (3-a)*t  + (2-a)*t
- //
--static short c2_fixed(unsigned int t)
--{
--    unsigned short v1, v2, v3;
--    unsigned short two, three;
--
--    // Q13
--    v1 = 1 << 13;
--
--    // Q13
--    three = 3 << 13;
--    v2 = three - (a_i >> 3);
--    v2 = (v2 * t) >> 16;
--    v2 = (v2 * t) >> 16;
--
--    // Q13
--    two = 2 << 13;
--    v3 = two - (a_i >> 3);
--    v3 = (v3 * t) >> 16;
--    v3 = (v3 * t) >> 16;
--    v3 = (v3 * t) >> 16;
--
--    // Q12
--    return (v1 - v2 + v3) >> 1;
-+static short c2_fixed(unsigned int t) {
-+  unsigned short v1, v2, v3;
-+  unsigned short two, three;
-+
-+  // Q13
-+  v1 = 1 << 13;
-+
-+  // Q13
-+  three = 3 << 13;
-+  v2 = three - (a_i >> 3);
-+  v2 = (v2 * t) >> 16;
-+  v2 = (v2 * t) >> 16;
-+
-+  // Q13
-+  two = 2 << 13;
-+  v3 = two - (a_i >> 3);
-+  v3 = (v3 * t) >> 16;
-+  v3 = (v3 * t) >> 16;
-+  v3 = (v3 * t) >> 16;
-+
-+  // Q12
-+  return (v1 - v2 + v3) >> 1;
- }
- 
- //                 2      3
- // C3 = a*t - 2*a*t  + a*t
- //
--static short c3_fixed(unsigned int t)
--{
--    int v1, v2, v3;
-+static short c3_fixed(unsigned int t) {
-+  int v1, v2, v3;
- 
--    // Q16
--    v1 = (a_i * t) >> 16;
-+  // Q16
-+  v1 = (a_i * t) >> 16;
- 
--    // Q15
--    v2 = 2 * (a_i >> 1);
--    v2 = (v2 * t) >> 16;
--    v2 = (v2 * t) >> 16;
-+  // Q15
-+  v2 = 2 * (a_i >> 1);
-+  v2 = (v2 * t) >> 16;
-+  v2 = (v2 * t) >> 16;
- 
--    // Q16
--    v3 = (a_i * t) >> 16;
--    v3 = (v3 * t) >> 16;
--    v3 = (v3 * t) >> 16;
-+  // Q16
-+  v3 = (a_i * t) >> 16;
-+  v3 = (v3 * t) >> 16;
-+  v3 = (v3 * t) >> 16;
- 
--    // Q12
--    return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
-+  // Q12
-+  return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
- }
- #else
- //          3     2
- // C0 = -a*t + a*t
- //
--float C0(float t)
--{
--    return -a * t * t * t + a * t * t;
-+float C0(float t) {
-+  return -a * t * t * t + a * t * t;
- }
- 
- //                      2          3
- // C1 = -a*t + (2*a+3)*t  - (a+2)*t
- //
--float C1(float t)
--{
--    return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
-+float C1(float t) {
-+  return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
- }
- 
- //                 2          3
- // C2 = 1 - (a+3)*t  + (a+2)*t
- //
--float C2(float t)
--{
--    return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
-+float C2(float t) {
-+  return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
- }
- 
- //                 2      3
- // C3 = a*t - 2*a*t  + a*t
- //
--float C3(float t)
--{
--    return a * t * t * t - 2.0f * a * t * t + a * t;
-+float C3(float t) {
-+  return a * t * t * t - 2.0f * a * t * t + a * t;
- }
- #endif
- 
- #if 0
--int compare_real_fixed()
--{
--    int i, errors = 0;
--    float mult = 1.0 / 10000.0;
--    unsigned int fixed_mult = mult * 4294967296;//65536;
--    unsigned int phase_offset_int;
--    float phase_offset_real;
--
--    for (i = 0; i < 10000; i++)
--    {
--        int fixed0, fixed1, fixed2, fixed3, fixed_total;
--        int real0, real1, real2, real3, real_total;
--
--        phase_offset_real = (float)i * mult;
--        phase_offset_int = (fixed_mult * i) >> 16;
-+int compare_real_fixed() {
-+  int i, errors = 0;
-+  float mult = 1.0 / 10000.0;
-+  unsigned int fixed_mult = mult * 4294967296;// 65536;
-+  unsigned int phase_offset_int;
-+  float phase_offset_real;
-+
-+  for (i = 0; i < 10000; i++) {
-+    int fixed0, fixed1, fixed2, fixed3, fixed_total;
-+    int real0, real1, real2, real3, real_total;
-+
-+    phase_offset_real = (float)i * mult;
-+    phase_offset_int = (fixed_mult * i) >> 16;
- //      phase_offset_int = phase_offset_real * 65536;
- 
--        fixed0 = c0_fixed(phase_offset_int);
--        real0 = C0(phase_offset_real) * 4096.0;
-+    fixed0 = c0_fixed(phase_offset_int);
-+    real0 = C0(phase_offset_real) * 4096.0;
- 
--        if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
--            errors++;
-+    if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
-+      errors++;
- 
--        fixed1 = c1_fixed(phase_offset_int);
--        real1 = C1(phase_offset_real) * 4096.0;
-+    fixed1 = c1_fixed(phase_offset_int);
-+    real1 = C1(phase_offset_real) * 4096.0;
- 
--        if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
--            errors++;
-+    if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
-+      errors++;
- 
--        fixed2 = c2_fixed(phase_offset_int);
--        real2 = C2(phase_offset_real) * 4096.0;
-+    fixed2 = c2_fixed(phase_offset_int);
-+    real2 = C2(phase_offset_real) * 4096.0;
- 
--        if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
--            errors++;
-+    if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
-+      errors++;
- 
--        fixed3 = c3_fixed(phase_offset_int);
--        real3 = C3(phase_offset_real) * 4096.0;
-+    fixed3 = c3_fixed(phase_offset_int);
-+    real3 = C3(phase_offset_real) * 4096.0;
- 
--        if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
--            errors++;
-+    if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
-+      errors++;
- 
--        fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
--        real_total = real0 + real1 + real2 + real3;
-+    fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
-+    real_total = real0 + real1 + real2 + real3;
- 
--        if ((fixed_total > 4097) || (fixed_total < 4094))
--            errors ++;
-+    if ((fixed_total > 4097) || (fixed_total < 4094))
-+      errors++;
- 
--        if ((real_total > 4097) || (real_total < 4095))
--            errors ++;
--    }
-+    if ((real_total > 4097) || (real_total < 4095))
-+      errors++;
-+  }
- 
--    return errors;
-+  return errors;
- }
- #endif
- 
- // Find greatest common denominator between two integers.  Method used here is
- //  slow compared to Euclid's algorithm, but does not require any division.
--int gcd(int a, int b)
--{
--    // Problem with this algorithm is that if a or b = 0 this function
--    //  will never exit.  Don't want to return 0 because any computation
--    //  that was based on a common denoninator and tried to reduce by
--    //  dividing by 0 would fail.  Best solution that could be thought of
--    //  would to be fail by returing a 1;
--    if (a <= 0 || b <= 0)
--        return 1;
--
--    while (a != b)
--    {
--        if (b > a)
--            b = b - a;
--        else
--        {
--            int tmp = a;//swap large and
--            a = b; //small
--            b = tmp;
--        }
-+int gcd(int a, int b) {
-+  // Problem with this algorithm is that if a or b = 0 this function
-+  //  will never exit.  Don't want to return 0 because any computation
-+  //  that was based on a common denoninator and tried to reduce by
-+  //  dividing by 0 would fail.  Best solution that could be thought of
-+  //  would to be fail by returing a 1;
-+  if (a <= 0 || b <= 0)
-+    return 1;
-+
-+  while (a != b) {
-+    if (b > a)
-+      b = b - a;
-+    else {
-+      int tmp = a;// swap large and
-+      a = b; // small
-+      b = tmp;
-     }
-+  }
- 
--    return b;
-+  return b;
- }
- 
--void bicubic_coefficient_init()
--{
--    vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
--    g_first_time = 0;
-+void bicubic_coefficient_init() {
-+  vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
-+  g_first_time = 0;
- }
- 
--void bicubic_coefficient_destroy()
--{
--    if (!g_first_time)
--    {
--        vpx_free(g_b_scaler.l_w);
-+void bicubic_coefficient_destroy() {
-+  if (!g_first_time) {
-+    vpx_free(g_b_scaler.l_w);
- 
--        vpx_free(g_b_scaler.l_h);
-+    vpx_free(g_b_scaler.l_h);
- 
--        vpx_free(g_b_scaler.l_h_uv);
-+    vpx_free(g_b_scaler.l_h_uv);
- 
--        vpx_free(g_b_scaler.c_w);
-+    vpx_free(g_b_scaler.c_w);
- 
--        vpx_free(g_b_scaler.c_h);
-+    vpx_free(g_b_scaler.c_h);
- 
--        vpx_free(g_b_scaler.c_h_uv);
-+    vpx_free(g_b_scaler.c_h_uv);
- 
--        vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
--    }
-+    vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
-+  }
- }
- 
- // Create the coeffients that will be used for the cubic interpolation.
-@@ -292,311 +276,294 @@ void bicubic_coefficient_destroy()
- //  regimes the phase offsets will be different.  There are 4 coefficents
- //  for each point, two on each side.  The layout is that there are the
- //  4 coefficents for each phase in the array and then the next phase.
--int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height)
--{
--    int i;
-+int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) {
-+  int i;
- #ifdef FIXED_POINT
--    int phase_offset_int;
--    unsigned int fixed_mult;
--    int product_val = 0;
-+  int phase_offset_int;
-+  unsigned int fixed_mult;
-+  int product_val = 0;
- #else
--    float phase_offset;
-+  float phase_offset;
- #endif
--    int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
-+  int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
- 
--    if (g_first_time)
--        bicubic_coefficient_init();
-+  if (g_first_time)
-+    bicubic_coefficient_init();
- 
- 
--    // check to see if the coefficents have already been set up correctly
--    if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
--        && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
--        return 0;
-+  // check to see if the coefficents have already been set up correctly
-+  if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
-+      && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
-+    return 0;
- 
--    g_b_scaler.in_width = in_width;
--    g_b_scaler.in_height = in_height;
--    g_b_scaler.out_width = out_width;
--    g_b_scaler.out_height = out_height;
-+  g_b_scaler.in_width = in_width;
-+  g_b_scaler.in_height = in_height;
-+  g_b_scaler.out_width = out_width;
-+  g_b_scaler.out_height = out_height;
- 
--    // Don't want to allow crazy scaling, just try and prevent a catastrophic
--    //  failure here.  Want to fail after setting the member functions so if
--    //  if the scaler is called the member functions will not scale.
--    if (out_width <= 0 || out_height <= 0)
--        return -1;
-+  // Don't want to allow crazy scaling, just try and prevent a catastrophic
-+  //  failure here.  Want to fail after setting the member functions so if
-+  //  if the scaler is called the member functions will not scale.
-+  if (out_width <= 0 || out_height <= 0)
-+    return -1;
- 
--    // reduce in/out width and height ratios using the gcd
--    gcd_w = gcd(out_width, in_width);
--    gcd_h = gcd(out_height, in_height);
--    gcd_h_uv = gcd(out_height, in_height / 2);
-+  // reduce in/out width and height ratios using the gcd
-+  gcd_w = gcd(out_width, in_width);
-+  gcd_h = gcd(out_height, in_height);
-+  gcd_h_uv = gcd(out_height, in_height / 2);
- 
--    // the numerator width and height are to be saved in
--    //  globals so they can be used during the scaling process
--    //  without having to be recalculated.
--    g_b_scaler.nw = out_width / gcd_w;
--    d_w = in_width / gcd_w;
-+  // the numerator width and height are to be saved in
-+  //  globals so they can be used during the scaling process
-+  //  without having to be recalculated.
-+  g_b_scaler.nw = out_width / gcd_w;
-+  d_w = in_width / gcd_w;
- 
--    g_b_scaler.nh = out_height / gcd_h;
--    d_h = in_height / gcd_h;
-+  g_b_scaler.nh = out_height / gcd_h;
-+  d_h = in_height / gcd_h;
- 
--    g_b_scaler.nh_uv = out_height / gcd_h_uv;
--    d_h_uv = (in_height / 2) / gcd_h_uv;
-+  g_b_scaler.nh_uv = out_height / gcd_h_uv;
-+  d_h_uv = (in_height / 2) / gcd_h_uv;
- 
--    // allocate memory for the coefficents
--    vpx_free(g_b_scaler.l_w);
-+  // allocate memory for the coefficents
-+  vpx_free(g_b_scaler.l_w);
- 
--    vpx_free(g_b_scaler.l_h);
-+  vpx_free(g_b_scaler.l_h);
- 
--    vpx_free(g_b_scaler.l_h_uv);
-+  vpx_free(g_b_scaler.l_h_uv);
- 
--    g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
--    g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
--    g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
-+  g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
-+  g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
-+  g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
- 
--    vpx_free(g_b_scaler.c_w);
-+  vpx_free(g_b_scaler.c_w);
- 
--    vpx_free(g_b_scaler.c_h);
-+  vpx_free(g_b_scaler.c_h);
- 
--    vpx_free(g_b_scaler.c_h_uv);
-+  vpx_free(g_b_scaler.c_h_uv);
- 
--    g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
--    g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
--    g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
-+  g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
-+  g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
-+  g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
- 
--    g_b_scaler.hbuf = g_hbuf;
--    g_b_scaler.hbuf_uv = g_hbuf_uv;
-+  g_b_scaler.hbuf = g_hbuf;
-+  g_b_scaler.hbuf_uv = g_hbuf_uv;
- 
--    // Set up polyphase filter taps.  This needs to be done before
--    //  the scaling because of the floating point math required.  The
--    //  coefficients are multiplied by 2^12 so that fixed point math
--    //  can be used in the main scaling loop.
-+  // Set up polyphase filter taps.  This needs to be done before
-+  //  the scaling because of the floating point math required.  The
-+  //  coefficients are multiplied by 2^12 so that fixed point math
-+  //  can be used in the main scaling loop.
- #ifdef FIXED_POINT
--    fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
-+  fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
- 
--    product_val = 0;
-+  product_val = 0;
- 
--    for (i = 0; i < g_b_scaler.nw; i++)
--    {
--        if (product_val > g_b_scaler.nw)
--            product_val -= g_b_scaler.nw;
-+  for (i = 0; i < g_b_scaler.nw; i++) {
-+    if (product_val > g_b_scaler.nw)
-+      product_val -= g_b_scaler.nw;
- 
--        phase_offset_int = (fixed_mult * product_val) >> 16;
-+    phase_offset_int = (fixed_mult * product_val) >> 16;
- 
--        g_b_scaler.c_w[i*4]   = c3_fixed(phase_offset_int);
--        g_b_scaler.c_w[i*4+1] = c2_fixed(phase_offset_int);
--        g_b_scaler.c_w[i*4+2] = c1_fixed(phase_offset_int);
--        g_b_scaler.c_w[i*4+3] = c0_fixed(phase_offset_int);
-+    g_b_scaler.c_w[i * 4]   = c3_fixed(phase_offset_int);
-+    g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int);
-+    g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int);
-+    g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int);
- 
--        product_val += d_w;
--    }
-+    product_val += d_w;
-+  }
- 
- 
--    fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
-+  fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
- 
--    product_val = 0;
-+  product_val = 0;
- 
--    for (i = 0; i < g_b_scaler.nh; i++)
--    {
--        if (product_val > g_b_scaler.nh)
--            product_val -= g_b_scaler.nh;
-+  for (i = 0; i < g_b_scaler.nh; i++) {
-+    if (product_val > g_b_scaler.nh)
-+      product_val -= g_b_scaler.nh;
- 
--        phase_offset_int = (fixed_mult * product_val) >> 16;
-+    phase_offset_int = (fixed_mult * product_val) >> 16;
- 
--        g_b_scaler.c_h[i*4]   = c0_fixed(phase_offset_int);
--        g_b_scaler.c_h[i*4+1] = c1_fixed(phase_offset_int);
--        g_b_scaler.c_h[i*4+2] = c2_fixed(phase_offset_int);
--        g_b_scaler.c_h[i*4+3] = c3_fixed(phase_offset_int);
-+    g_b_scaler.c_h[i * 4]   = c0_fixed(phase_offset_int);
-+    g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int);
-+    g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int);
-+    g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int);
- 
--        product_val += d_h;
--    }
-+    product_val += d_h;
-+  }
- 
--    fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
-+  fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
- 
--    product_val = 0;
-+  product_val = 0;
- 
--    for (i = 0; i < g_b_scaler.nh_uv; i++)
--    {
--        if (product_val > g_b_scaler.nh_uv)
--            product_val -= g_b_scaler.nh_uv;
-+  for (i = 0; i < g_b_scaler.nh_uv; i++) {
-+    if (product_val > g_b_scaler.nh_uv)
-+      product_val -= g_b_scaler.nh_uv;
- 
--        phase_offset_int = (fixed_mult * product_val) >> 16;
-+    phase_offset_int = (fixed_mult * product_val) >> 16;
- 
--        g_b_scaler.c_h_uv[i*4]   = c0_fixed(phase_offset_int);
--        g_b_scaler.c_h_uv[i*4+1] = c1_fixed(phase_offset_int);
--        g_b_scaler.c_h_uv[i*4+2] = c2_fixed(phase_offset_int);
--        g_b_scaler.c_h_uv[i*4+3] = c3_fixed(phase_offset_int);
-+    g_b_scaler.c_h_uv[i * 4]   = c0_fixed(phase_offset_int);
-+    g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int);
-+    g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int);
-+    g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int);
- 
--        product_val += d_h_uv;
--    }
-+    product_val += d_h_uv;
-+  }
- 
- #else
- 
--    for (i = 0; i < g_nw; i++)
--    {
--        phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
--        g_c_w[i*4]   = (C3(phase_offset) * 4096.0);
--        g_c_w[i*4+1] = (C2(phase_offset) * 4096.0);
--        g_c_w[i*4+2] = (C1(phase_offset) * 4096.0);
--        g_c_w[i*4+3] = (C0(phase_offset) * 4096.0);
--    }
--
--    for (i = 0; i < g_nh; i++)
--    {
--        phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
--        g_c_h[i*4]   = (C0(phase_offset) * 4096.0);
--        g_c_h[i*4+1] = (C1(phase_offset) * 4096.0);
--        g_c_h[i*4+2] = (C2(phase_offset) * 4096.0);
--        g_c_h[i*4+3] = (C3(phase_offset) * 4096.0);
--    }
--
--    for (i = 0; i < g_nh_uv; i++)
--    {
--        phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
--        g_c_h_uv[i*4]   = (C0(phase_offset) * 4096.0);
--        g_c_h_uv[i*4+1] = (C1(phase_offset) * 4096.0);
--        g_c_h_uv[i*4+2] = (C2(phase_offset) * 4096.0);
--        g_c_h_uv[i*4+3] = (C3(phase_offset) * 4096.0);
--    }
-+  for (i = 0; i < g_nw; i++) {
-+    phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
-+    g_c_w[i * 4]   = (C3(phase_offset) * 4096.0);
-+    g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0);
-+    g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0);
-+    g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0);
-+  }
-+
-+  for (i = 0; i < g_nh; i++) {
-+    phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
-+    g_c_h[i * 4]   = (C0(phase_offset) * 4096.0);
-+    g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0);
-+    g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0);
-+    g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0);
-+  }
-+
-+  for (i = 0; i < g_nh_uv; i++) {
-+    phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
-+    g_c_h_uv[i * 4]   = (C0(phase_offset) * 4096.0);
-+    g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0);
-+    g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0);
-+    g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0);
-+  }
- 
- #endif
- 
--    // Create an array that corresponds input lines to output lines.
--    //  This doesn't require floating point math, but it does require
--    //  a division and because hardware division is not present that
--    //  is a call.
--    for (i = 0; i < out_width; i++)
--    {
--        g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
-+  // Create an array that corresponds input lines to output lines.
-+  //  This doesn't require floating point math, but it does require
-+  //  a division and because hardware division is not present that
-+  //  is a call.
-+  for (i = 0; i < out_width; i++) {
-+    g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
- 
--        if ((g_b_scaler.l_w[i] + 2) <= in_width)
--            g_b_scaler.max_usable_out_width = i;
-+    if ((g_b_scaler.l_w[i] + 2) <= in_width)
-+      g_b_scaler.max_usable_out_width = i;
- 
--    }
-+  }
- 
--    for (i = 0; i < out_height + 1; i++)
--    {
--        g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
--        g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
--    }
-+  for (i = 0; i < out_height + 1; i++) {
-+    g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
-+    g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
-+  }
- 
--    return 0;
-+  return 0;
- }
- 
- int bicubic_scale(int in_width, int in_height, int in_stride,
-                   int out_width, int out_height, int out_stride,
--                  unsigned char *input_image, unsigned char *output_image)
--{
--    short *RESTRICT l_w, * RESTRICT l_h;
--    short *RESTRICT c_w, * RESTRICT c_h;
--    unsigned char *RESTRICT ip, * RESTRICT op;
--    unsigned char *RESTRICT hbuf;
--    int h, w, lw, lh;
--    int temp_sum;
--    int phase_offset_w, phase_offset_h;
--
--    c_w = g_b_scaler.c_w;
--    c_h = g_b_scaler.c_h;
--
--    op = output_image;
--
--    l_w = g_b_scaler.l_w;
--    l_h = g_b_scaler.l_h;
--
--    phase_offset_h = 0;
--
--    for (h = 0; h < out_height; h++)
--    {
--        // select the row to work on
--        lh = l_h[h];
--        ip = input_image + (in_stride * lh);
--
--        // vp8_filter the row vertically into an temporary buffer.
--        //  If the phase offset == 0 then all the multiplication
--        //  is going to result in the output equalling the input.
--        //  So instead point the temporary buffer to the input.
--        //  Also handle the boundry condition of not being able to
--        //  filter that last lines.
--        if (phase_offset_h && (lh < in_height - 2))
--        {
--            hbuf = g_b_scaler.hbuf;
--
--            for (w = 0; w < in_width; w++)
--            {
--                temp_sum =  c_h[phase_offset_h*4+3] * ip[w - in_stride];
--                temp_sum += c_h[phase_offset_h*4+2] * ip[w];
--                temp_sum += c_h[phase_offset_h*4+1] * ip[w + in_stride];
--                temp_sum += c_h[phase_offset_h*4]   * ip[w + 2*in_stride];
--
--                hbuf[w] = temp_sum >> 12;
--            }
--        }
--        else
--            hbuf = ip;
--
--        // increase the phase offset for the next time around.
--        if (++phase_offset_h >= g_b_scaler.nh)
--            phase_offset_h = 0;
--
--        // now filter and expand it horizontally into the final
--        //  output buffer
-+                  unsigned char *input_image, unsigned char *output_image) {
-+  short *RESTRICT l_w, * RESTRICT l_h;
-+  short *RESTRICT c_w, * RESTRICT c_h;
-+  unsigned char *RESTRICT ip, * RESTRICT op;
-+  unsigned char *RESTRICT hbuf;
-+  int h, w, lw, lh;
-+  int temp_sum;
-+  int phase_offset_w, phase_offset_h;
-+
-+  c_w = g_b_scaler.c_w;
-+  c_h = g_b_scaler.c_h;
-+
-+  op = output_image;
-+
-+  l_w = g_b_scaler.l_w;
-+  l_h = g_b_scaler.l_h;
-+
-+  phase_offset_h = 0;
-+
-+  for (h = 0; h < out_height; h++) {
-+    // select the row to work on
-+    lh = l_h[h];
-+    ip = input_image + (in_stride * lh);
-+
-+    // vp8_filter the row vertically into an temporary buffer.
-+    //  If the phase offset == 0 then all the multiplication
-+    //  is going to result in the output equalling the input.
-+    //  So instead point the temporary buffer to the input.
-+    //  Also handle the boundry condition of not being able to
-+    //  filter that last lines.
-+    if (phase_offset_h && (lh < in_height - 2)) {
-+      hbuf = g_b_scaler.hbuf;
-+
-+      for (w = 0; w < in_width; w++) {
-+        temp_sum =  c_h[phase_offset_h * 4 + 3] * ip[w - in_stride];
-+        temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w];
-+        temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride];
-+        temp_sum += c_h[phase_offset_h * 4]   * ip[w + 2 * in_stride];
-+
-+        hbuf[w] = temp_sum >> 12;
-+      }
-+    } else
-+      hbuf = ip;
-+
-+    // increase the phase offset for the next time around.
-+    if (++phase_offset_h >= g_b_scaler.nh)
-+      phase_offset_h = 0;
-+
-+    // now filter and expand it horizontally into the final
-+    //  output buffer
-+    phase_offset_w = 0;
-+
-+    for (w = 0; w < out_width; w++) {
-+      // get the index to use to expand the image
-+      lw = l_w[w];
-+
-+      temp_sum =  c_w[phase_offset_w * 4]   * hbuf[lw - 1];
-+      temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw];
-+      temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1];
-+      temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2];
-+      temp_sum = temp_sum >> 12;
-+
-+      if (++phase_offset_w >= g_b_scaler.nw)
-         phase_offset_w = 0;
- 
--        for (w = 0; w < out_width; w++)
--        {
--            // get the index to use to expand the image
--            lw = l_w[w];
--
--            temp_sum =  c_w[phase_offset_w*4]   * hbuf[lw - 1];
--            temp_sum += c_w[phase_offset_w*4+1] * hbuf[lw];
--            temp_sum += c_w[phase_offset_w*4+2] * hbuf[lw + 1];
--            temp_sum += c_w[phase_offset_w*4+3] * hbuf[lw + 2];
--            temp_sum = temp_sum >> 12;
-+      // boundry conditions
-+      if ((lw + 2) >= in_width)
-+        temp_sum = hbuf[lw];
- 
--            if (++phase_offset_w >= g_b_scaler.nw)
--                phase_offset_w = 0;
-+      if (lw == 0)
-+        temp_sum = hbuf[0];
- 
--            // boundry conditions
--            if ((lw + 2) >= in_width)
--                temp_sum = hbuf[lw];
--
--            if (lw == 0)
--                temp_sum = hbuf[0];
--
--            op[w] = temp_sum;
--        }
--
--        op += out_stride;
-+      op[w] = temp_sum;
-     }
- 
--    return 0;
-+    op += out_stride;
-+  }
-+
-+  return 0;
- }
- 
--void bicubic_scale_frame_reset()
--{
--    g_b_scaler.out_width = 0;
--    g_b_scaler.out_height = 0;
-+void bicubic_scale_frame_reset() {
-+  g_b_scaler.out_width = 0;
-+  g_b_scaler.out_height = 0;
- }
- 
- void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
--                         int new_width, int new_height)
--{
-+                         int new_width, int new_height) {
- 
--    dst->y_width = new_width;
--    dst->y_height = new_height;
--    dst->uv_width = new_width / 2;
--    dst->uv_height = new_height / 2;
-+  dst->y_width = new_width;
-+  dst->y_height = new_height;
-+  dst->uv_width = new_width / 2;
-+  dst->uv_height = new_height / 2;
- 
--    dst->y_stride = dst->y_width;
--    dst->uv_stride = dst->uv_width;
-+  dst->y_stride = dst->y_width;
-+  dst->uv_stride = dst->uv_width;
- 
--    bicubic_scale(src->y_width, src->y_height, src->y_stride,
--                  new_width, new_height, dst->y_stride,
--                  src->y_buffer, dst->y_buffer);
-+  bicubic_scale(src->y_width, src->y_height, src->y_stride,
-+                new_width, new_height, dst->y_stride,
-+                src->y_buffer, dst->y_buffer);
- 
--    bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
--                  new_width / 2, new_height / 2, dst->uv_stride,
--                  src->u_buffer, dst->u_buffer);
-+  bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
-+                new_width / 2, new_height / 2, dst->uv_stride,
-+                src->u_buffer, dst->u_buffer);
- 
--    bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
--                  new_width / 2, new_height / 2, dst->uv_stride,
--                  src->v_buffer, dst->v_buffer);
-+  bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
-+                new_width / 2, new_height / 2, dst->uv_stride,
-+                src->v_buffer, dst->v_buffer);
- }
-diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c
-index 9beb162..60c21fb 100644
---- a/vpx_scale/generic/gen_scalers.c
-+++ b/vpx_scale/generic/gen_scalers.c
-@@ -34,47 +34,42 @@
-  *  SPECIAL NOTES : None.
-  *
-  ****************************************************************************/
--void vp8_horizontal_line_4_5_scale_c
--(
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    unsigned i;
--    unsigned int a, b, c;
--    unsigned char *des = dest;
--    const unsigned char *src = source;
--
--    (void) dest_width;
--
--    for (i = 0; i < source_width - 4; i += 4)
--    {
--        a = src[0];
--        b = src[1];
--        des [0] = (unsigned char) a;
--        des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
--        c = src[2] * 154;
--        a = src[3];
--        des [2] = (unsigned char)((b * 102 + c + 128) >> 8);
--        des [3] = (unsigned char)((c + 102 * a + 128) >> 8);
--        b = src[4];
--        des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8);
--
--        src += 4;
--        des += 5;
--    }
--
-+void vp8_horizontal_line_4_5_scale_c(const unsigned char *source,
-+                                     unsigned int source_width,
-+                                     unsigned char *dest,
-+                                     unsigned int dest_width) {
-+  unsigned i;
-+  unsigned int a, b, c;
-+  unsigned char *des = dest;
-+  const unsigned char *src = source;
-+
-+  (void) dest_width;
-+
-+  for (i = 0; i < source_width - 4; i += 4) {
-     a = src[0];
-     b = src[1];
--    des [0] = (unsigned char)(a);
-+    des [0] = (unsigned char) a;
-     des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
-     c = src[2] * 154;
-     a = src[3];
-     des [2] = (unsigned char)((b * 102 + c + 128) >> 8);
-     des [3] = (unsigned char)((c + 102 * a + 128) >> 8);
--    des [4] = (unsigned char)(a);
-+    b = src[4];
-+    des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8);
-+
-+    src += 4;
-+    des += 5;
-+  }
-+
-+  a = src[0];
-+  b = src[1];
-+  des [0] = (unsigned char)(a);
-+  des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
-+  c = src[2] * 154;
-+  a = src[3];
-+  des [2] = (unsigned char)((b * 102 + c + 128) >> 8);
-+  des [3] = (unsigned char)((c + 102 * a + 128) >> 8);
-+  des [4] = (unsigned char)(a);
- 
- }
- 
-@@ -97,31 +92,31 @@ void vp8_horizontal_line_4_5_scale_c
-  *                  the current band.
-  *
-  ****************************************************************************/
--void vp8_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b, c, d;
--    unsigned char *des = dest;
-+void vp8_vertical_band_4_5_scale_c(unsigned char *dest,
-+                                   unsigned int dest_pitch,
-+                                   unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c, d;
-+  unsigned char *des = dest;
- 
--    for (i = 0; i < dest_width; i++)
--    {
--        a = des [0];
--        b = des [dest_pitch];
-+  for (i = 0; i < dest_width; i++) {
-+    a = des [0];
-+    b = des [dest_pitch];
- 
--        des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
-+    des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
- 
--        c = des[dest_pitch*2] * 154;
--        d = des[dest_pitch*3];
-+    c = des[dest_pitch * 2] * 154;
-+    d = des[dest_pitch * 3];
- 
--        des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8);
--        des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8);
-+    des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8);
-+    des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8);
- 
--        /* First line in next band */
--        a = des [dest_pitch * 5];
--        des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8);
-+    /* First line in next band */
-+    a = des [dest_pitch * 5];
-+    des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8);
- 
--        des ++;
--    }
-+    des++;
-+  }
- }
- 
- /****************************************************************************
-@@ -144,30 +139,30 @@ void vp8_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch,
-  *                  last band.
-  *
-  ****************************************************************************/
--void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b, c, d;
--    unsigned char *des = dest;
-+void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest,
-+                                        unsigned int dest_pitch,
-+                                        unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c, d;
-+  unsigned char *des = dest;
- 
--    for (i = 0; i < dest_width; ++i)
--    {
--        a = des[0];
--        b = des[dest_pitch];
-+  for (i = 0; i < dest_width; ++i) {
-+    a = des[0];
-+    b = des[dest_pitch];
- 
--        des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
-+    des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8);
- 
--        c = des[dest_pitch*2] * 154;
--        d = des[dest_pitch*3];
-+    c = des[dest_pitch * 2] * 154;
-+    d = des[dest_pitch * 3];
- 
--        des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8);
--        des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8);
-+    des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8);
-+    des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8);
- 
--        /* No other line for interplation of this line, so .. */
--        des[dest_pitch*4] = (unsigned char) d;
-+    /* No other line for interplation of this line, so .. */
-+    des[dest_pitch * 4] = (unsigned char) d;
- 
--        des++;
--    }
-+    des++;
-+  }
- }
- 
- /****************************************************************************
-@@ -190,40 +185,35 @@ void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_p
-  *
-  *
-  ****************************************************************************/
--void vp8_horizontal_line_2_3_scale_c
--(
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    unsigned int i;
--    unsigned int a, b, c;
--    unsigned char *des = dest;
--    const unsigned char *src = source;
--
--    (void) dest_width;
--
--    for (i = 0; i < source_width - 2; i += 2)
--    {
--        a = src[0];
--        b = src[1];
--        c = src[2];
--
--        des [0] = (unsigned char)(a);
--        des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
--        des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8);
--
--        src += 2;
--        des += 3;
--    }
--
-+void vp8_horizontal_line_2_3_scale_c(const unsigned char *source,
-+                                     unsigned int source_width,
-+                                     unsigned char *dest,
-+                                     unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c;
-+  unsigned char *des = dest;
-+  const unsigned char *src = source;
-+
-+  (void) dest_width;
-+
-+  for (i = 0; i < source_width - 2; i += 2) {
-     a = src[0];
-     b = src[1];
-+    c = src[2];
-+
-     des [0] = (unsigned char)(a);
-     des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
--    des [2] = (unsigned char)(b);
-+    des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8);
-+
-+    src += 2;
-+    des += 3;
-+  }
-+
-+  a = src[0];
-+  b = src[1];
-+  des [0] = (unsigned char)(a);
-+  des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
-+  des [2] = (unsigned char)(b);
- }
- 
- 
-@@ -246,22 +236,22 @@ void vp8_horizontal_line_2_3_scale_c
-  *                  the current band.
-  *
-  ****************************************************************************/
--void vp8_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b, c;
--    unsigned char *des = dest;
--
--    for (i = 0; i < dest_width; i++)
--    {
--        a = des [0];
--        b = des [dest_pitch];
--        c = des[dest_pitch*3];
--        des [dest_pitch  ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
--        des [dest_pitch*2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8);
--
--        des++;
--    }
-+void vp8_vertical_band_2_3_scale_c(unsigned char *dest,
-+                                   unsigned int dest_pitch,
-+                                   unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c;
-+  unsigned char *des = dest;
-+
-+  for (i = 0; i < dest_width; i++) {
-+    a = des [0];
-+    b = des [dest_pitch];
-+    c = des[dest_pitch * 3];
-+    des [dest_pitch  ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
-+    des [dest_pitch * 2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8);
-+
-+    des++;
-+  }
- }
- 
- /****************************************************************************
-@@ -284,21 +274,21 @@ void vp8_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch,
-  *                  last band.
-  *
-  ****************************************************************************/
--void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b;
--    unsigned char *des = dest;
--
--    for (i = 0; i < dest_width; ++i)
--    {
--        a = des [0];
--        b = des [dest_pitch];
--
--        des [dest_pitch  ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
--        des [dest_pitch*2] = (unsigned char)(b);
--        des++;
--    }
-+void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest,
-+                                        unsigned int dest_pitch,
-+                                        unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b;
-+  unsigned char *des = dest;
-+
-+  for (i = 0; i < dest_width; ++i) {
-+    a = des [0];
-+    b = des [dest_pitch];
-+
-+    des [dest_pitch  ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8);
-+    des [dest_pitch * 2] = (unsigned char)(b);
-+    des++;
-+  }
- }
- 
- /****************************************************************************
-@@ -321,49 +311,44 @@ void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_p
-  *
-  *
-  ****************************************************************************/
--void vp8_horizontal_line_3_5_scale_c
--(
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    unsigned int i;
--    unsigned int a, b, c;
--    unsigned char *des = dest;
--    const unsigned char *src = source;
--
--    (void) dest_width;
--
--    for (i = 0; i < source_width - 3; i += 3)
--    {
--        a = src[0];
--        b = src[1];
--        des [0] = (unsigned char)(a);
--        des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
--
--        c = src[2] ;
--        des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
--        des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
--
--        a = src[3];
--        des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
--
--        src += 3;
--        des += 5;
--    }
--
-+void vp8_horizontal_line_3_5_scale_c(const unsigned char *source,
-+                                     unsigned int source_width,
-+                                     unsigned char *dest,
-+                                     unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c;
-+  unsigned char *des = dest;
-+  const unsigned char *src = source;
-+
-+  (void) dest_width;
-+
-+  for (i = 0; i < source_width - 3; i += 3) {
-     a = src[0];
-     b = src[1];
-     des [0] = (unsigned char)(a);
--
-     des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
--    c = src[2] ;
-+
-+    c = src[2];
-     des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
-     des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
- 
--    des [4] = (unsigned char)(c);
-+    a = src[3];
-+    des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
-+
-+    src += 3;
-+    des += 5;
-+  }
-+
-+  a = src[0];
-+  b = src[1];
-+  des [0] = (unsigned char)(a);
-+
-+  des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
-+  c = src[2];
-+  des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
-+  des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
-+
-+  des [4] = (unsigned char)(c);
- }
- 
- /****************************************************************************
-@@ -385,28 +370,28 @@ void vp8_horizontal_line_3_5_scale_c
-  *                  the current band.
-  *
-  ****************************************************************************/
--void vp8_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b, c;
--    unsigned char *des = dest;
--
--    for (i = 0; i < dest_width; i++)
--    {
--        a = des [0];
--        b = des [dest_pitch];
--        des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
--
--        c = des[dest_pitch*2];
--        des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
--        des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
--
--        /* First line in next band... */
--        a = des [dest_pitch * 5];
--        des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
--
--        des++;
--    }
-+void vp8_vertical_band_3_5_scale_c(unsigned char *dest,
-+                                   unsigned int dest_pitch,
-+                                   unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c;
-+  unsigned char *des = dest;
-+
-+  for (i = 0; i < dest_width; i++) {
-+    a = des [0];
-+    b = des [dest_pitch];
-+    des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
-+
-+    c = des[dest_pitch * 2];
-+    des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
-+    des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
-+
-+    /* First line in next band... */
-+    a = des [dest_pitch * 5];
-+    des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
-+
-+    des++;
-+  }
- }
- 
- /****************************************************************************
-@@ -429,28 +414,28 @@ void vp8_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch,
-  *                  last band.
-  *
-  ****************************************************************************/
--void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b, c;
--    unsigned char *des = dest;
-+void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest,
-+                                        unsigned int dest_pitch,
-+                                        unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c;
-+  unsigned char *des = dest;
- 
--    for (i = 0; i < dest_width; ++i)
--    {
--        a = des [0];
--        b = des [dest_pitch];
-+  for (i = 0; i < dest_width; ++i) {
-+    a = des [0];
-+    b = des [dest_pitch];
- 
--        des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
-+    des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8);
- 
--        c = des[dest_pitch*2];
--        des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
--        des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
-+    c = des[dest_pitch * 2];
-+    des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
-+    des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
- 
--        /* No other line for interplation of this line, so .. */
--        des [ dest_pitch * 4 ] = (unsigned char)(c) ;
-+    /* No other line for interplation of this line, so .. */
-+    des [ dest_pitch * 4 ] = (unsigned char)(c);
- 
--        des++;
--    }
-+    des++;
-+  }
- }
- 
- /****************************************************************************
-@@ -473,46 +458,41 @@ void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_p
-  *
-  *
-  ****************************************************************************/
--void vp8_horizontal_line_3_4_scale_c
--(
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    unsigned int i;
--    unsigned int a, b, c;
--    unsigned char *des = dest;
--    const unsigned char *src = source;
--
--    (void) dest_width;
--
--    for (i = 0; i < source_width - 3; i += 3)
--    {
--        a = src[0];
--        b = src[1];
--        des [0] = (unsigned char)(a);
--        des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
--
--        c = src[2];
--        des [2] = (unsigned char)((b + c + 1) >> 1);
--
--        a = src[3];
--        des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
--
--        src += 3;
--        des += 4;
--    }
--
-+void vp8_horizontal_line_3_4_scale_c(const unsigned char *source,
-+                                     unsigned int source_width,
-+                                     unsigned char *dest,
-+                                     unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c;
-+  unsigned char *des = dest;
-+  const unsigned char *src = source;
-+
-+  (void) dest_width;
-+
-+  for (i = 0; i < source_width - 3; i += 3) {
-     a = src[0];
-     b = src[1];
-     des [0] = (unsigned char)(a);
-     des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
- 
--    c = src[2] ;
-+    c = src[2];
-     des [2] = (unsigned char)((b + c + 1) >> 1);
--    des [3] = (unsigned char)(c);
-+
-+    a = src[3];
-+    des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
-+
-+    src += 3;
-+    des += 4;
-+  }
-+
-+  a = src[0];
-+  b = src[1];
-+  des [0] = (unsigned char)(a);
-+  des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
-+
-+  c = src[2];
-+  des [2] = (unsigned char)((b + c + 1) >> 1);
-+  des [3] = (unsigned char)(c);
- }
- 
- /****************************************************************************
-@@ -534,27 +514,27 @@ void vp8_horizontal_line_3_4_scale_c
-  *                  the current band.
-  *
-  ****************************************************************************/
--void vp8_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b, c;
--    unsigned char *des = dest;
--
--    for (i = 0; i < dest_width; i++)
--    {
--        a = des [0];
--        b = des [dest_pitch];
--        des [dest_pitch]   = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
--
--        c = des[dest_pitch*2];
--        des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1);
--
--        /* First line in next band... */
--        a = des [dest_pitch*4];
--        des [dest_pitch*3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
--
--        des++;
--    }
-+void vp8_vertical_band_3_4_scale_c(unsigned char *dest,
-+                                   unsigned int dest_pitch,
-+                                   unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c;
-+  unsigned char *des = dest;
-+
-+  for (i = 0; i < dest_width; i++) {
-+    a = des [0];
-+    b = des [dest_pitch];
-+    des [dest_pitch]   = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
-+
-+    c = des[dest_pitch * 2];
-+    des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1);
-+
-+    /* First line in next band... */
-+    a = des [dest_pitch * 4];
-+    des [dest_pitch * 3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
-+
-+    des++;
-+  }
- }
- 
- /****************************************************************************
-@@ -577,27 +557,27 @@ void vp8_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch,
-  *                  last band.
-  *
-  ****************************************************************************/
--void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b, c;
--    unsigned char *des = dest;
-+void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest,
-+                                        unsigned int dest_pitch,
-+                                        unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c;
-+  unsigned char *des = dest;
- 
--    for (i = 0; i < dest_width; ++i)
--    {
--        a = des [0];
--        b = des [dest_pitch];
-+  for (i = 0; i < dest_width; ++i) {
-+    a = des [0];
-+    b = des [dest_pitch];
- 
--        des [dest_pitch]   = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
-+    des [dest_pitch]   = (unsigned char)((a * 64 + b * 192 + 128) >> 8);
- 
--        c = des[dest_pitch*2];
--        des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1);
-+    c = des[dest_pitch * 2];
-+    des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1);
- 
--        /* No other line for interplation of this line, so .. */
--        des [dest_pitch*3] = (unsigned char)(c);
-+    /* No other line for interplation of this line, so .. */
-+    des [dest_pitch * 3] = (unsigned char)(c);
- 
--        des++;
--    }
-+    des++;
-+  }
- }
- 
- /****************************************************************************
-@@ -619,34 +599,29 @@ void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_p
-  *  SPECIAL NOTES : None.
-  *
-  ****************************************************************************/
--void vp8_horizontal_line_1_2_scale_c
--(
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    unsigned int i;
--    unsigned int a, b;
--    unsigned char *des = dest;
--    const unsigned char *src = source;
--
--    (void) dest_width;
--
--    for (i = 0; i < source_width - 1; i += 1)
--    {
--        a = src[0];
--        b = src[1];
--        des [0] = (unsigned char)(a);
--        des [1] = (unsigned char)((a + b + 1) >> 1);
--        src += 1;
--        des += 2;
--    }
--
-+void vp8_horizontal_line_1_2_scale_c(const unsigned char *source,
-+                                     unsigned int source_width,
-+                                     unsigned char *dest,
-+                                     unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b;
-+  unsigned char *des = dest;
-+  const unsigned char *src = source;
-+
-+  (void) dest_width;
-+
-+  for (i = 0; i < source_width - 1; i += 1) {
-     a = src[0];
-+    b = src[1];
-     des [0] = (unsigned char)(a);
--    des [1] = (unsigned char)(a);
-+    des [1] = (unsigned char)((a + b + 1) >> 1);
-+    src += 1;
-+    des += 2;
-+  }
-+
-+  a = src[0];
-+  des [0] = (unsigned char)(a);
-+  des [1] = (unsigned char)(a);
- }
- 
- /****************************************************************************
-@@ -668,21 +643,21 @@ void vp8_horizontal_line_1_2_scale_c
-  *                  the current band.
-  *
-  ****************************************************************************/
--void vp8_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b;
--    unsigned char *des = dest;
-+void vp8_vertical_band_1_2_scale_c(unsigned char *dest,
-+                                   unsigned int dest_pitch,
-+                                   unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b;
-+  unsigned char *des = dest;
- 
--    for (i = 0; i < dest_width; i++)
--    {
--        a = des [0];
--        b = des [dest_pitch * 2];
-+  for (i = 0; i < dest_width; i++) {
-+    a = des [0];
-+    b = des [dest_pitch * 2];
- 
--        des[dest_pitch] = (unsigned char)((a + b + 1) >> 1);
-+    des[dest_pitch] = (unsigned char)((a + b + 1) >> 1);
- 
--        des++;
--    }
-+    des++;
-+  }
- }
- 
- /****************************************************************************
-@@ -705,16 +680,16 @@ void vp8_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch,
-  *                  last band.
-  *
-  ****************************************************************************/
--void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned char *des = dest;
--
--    for (i = 0; i < dest_width; ++i)
--    {
--        des[dest_pitch] = des[0];
--        des++;
--    }
-+void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest,
-+                                        unsigned int dest_pitch,
-+                                        unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned char *des = dest;
-+
-+  for (i = 0; i < dest_width; ++i) {
-+    des[dest_pitch] = des[0];
-+    des++;
-+  }
- }
- 
- 
-@@ -740,67 +715,64 @@ void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_p
-  *  SPECIAL NOTES : None.
-  *
-  ****************************************************************************/
--void vp8_horizontal_line_5_4_scale_c
--(
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    unsigned i;
--    unsigned int a, b, c, d, e;
--    unsigned char *des = dest;
--    const unsigned char *src = source;
--
--    (void) dest_width;
--
--    for (i = 0; i < source_width; i += 5)
--    {
--        a = src[0];
--        b = src[1];
--        c = src[2];
--        d = src[3];
--        e = src[4];
--
--        des[0] = (unsigned char) a;
--        des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8);
--        des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8);
--        des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8);
--
--        src += 5;
--        des += 4;
--    }
-+void vp8_horizontal_line_5_4_scale_c(const unsigned char *source,
-+                                     unsigned int source_width,
-+                                     unsigned char *dest,
-+                                     unsigned int dest_width) {
-+  unsigned i;
-+  unsigned int a, b, c, d, e;
-+  unsigned char *des = dest;
-+  const unsigned char *src = source;
-+
-+  (void) dest_width;
-+
-+  for (i = 0; i < source_width; i += 5) {
-+    a = src[0];
-+    b = src[1];
-+    c = src[2];
-+    d = src[3];
-+    e = src[4];
-+
-+    des[0] = (unsigned char) a;
-+    des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8);
-+    des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8);
-+    des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8);
-+
-+    src += 5;
-+    des += 4;
-+  }
- }
- 
- 
- 
- 
--void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b, c, d, e;
--    unsigned char *des = dest;
--    unsigned char *src = source;
-+void vp8_vertical_band_5_4_scale_c(unsigned char *source,
-+                                   unsigned int src_pitch,
-+                                   unsigned char *dest,
-+                                   unsigned int dest_pitch,
-+                                   unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c, d, e;
-+  unsigned char *des = dest;
-+  unsigned char *src = source;
- 
--    for (i = 0; i < dest_width; i++)
--    {
-+  for (i = 0; i < dest_width; i++) {
- 
--        a = src[0 * src_pitch];
--        b = src[1 * src_pitch];
--        c = src[2 * src_pitch];
--        d = src[3 * src_pitch];
--        e = src[4 * src_pitch];
-+    a = src[0 * src_pitch];
-+    b = src[1 * src_pitch];
-+    c = src[2 * src_pitch];
-+    d = src[3 * src_pitch];
-+    e = src[4 * src_pitch];
- 
--        des[0 * dest_pitch] = (unsigned char) a;
--        des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8);
--        des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8);
--        des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8);
-+    des[0 * dest_pitch] = (unsigned char) a;
-+    des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8);
-+    des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8);
-+    des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8);
- 
--        src ++;
--        des ++;
-+    src++;
-+    des++;
- 
--    }
-+  }
- }
- 
- 
-@@ -824,63 +796,60 @@ void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch
-  *
-  *
-  ****************************************************************************/
--void vp8_horizontal_line_5_3_scale_c
--(
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    unsigned int i;
--    unsigned int a, b, c, d , e;
--    unsigned char *des = dest;
--    const unsigned char *src = source;
--
--    (void) dest_width;
--
--    for (i = 0; i < source_width; i += 5)
--    {
--        a = src[0];
--        b = src[1];
--        c = src[2];
--        d = src[3];
--        e = src[4];
--
--        des[0] = (unsigned char) a;
--        des[1] = (unsigned char)((b * 85  + c * 171 + 128) >> 8);
--        des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8);
--
--        src += 5;
--        des += 3;
--    }
-+void vp8_horizontal_line_5_3_scale_c(const unsigned char *source,
-+                                     unsigned int source_width,
-+                                     unsigned char *dest,
-+                                     unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c, d, e;
-+  unsigned char *des = dest;
-+  const unsigned char *src = source;
-+
-+  (void) dest_width;
-+
-+  for (i = 0; i < source_width; i += 5) {
-+    a = src[0];
-+    b = src[1];
-+    c = src[2];
-+    d = src[3];
-+    e = src[4];
-+
-+    des[0] = (unsigned char) a;
-+    des[1] = (unsigned char)((b * 85  + c * 171 + 128) >> 8);
-+    des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8);
-+
-+    src += 5;
-+    des += 3;
-+  }
- 
- }
- 
--void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    unsigned int i;
--    unsigned int a, b, c, d, e;
--    unsigned char *des = dest;
--    unsigned char *src = source;
-+void vp8_vertical_band_5_3_scale_c(unsigned char *source,
-+                                   unsigned int src_pitch,
-+                                   unsigned char *dest,
-+                                   unsigned int dest_pitch,
-+                                   unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a, b, c, d, e;
-+  unsigned char *des = dest;
-+  unsigned char *src = source;
- 
--    for (i = 0; i < dest_width; i++)
--    {
-+  for (i = 0; i < dest_width; i++) {
- 
--        a = src[0 * src_pitch];
--        b = src[1 * src_pitch];
--        c = src[2 * src_pitch];
--        d = src[3 * src_pitch];
--        e = src[4 * src_pitch];
-+    a = src[0 * src_pitch];
-+    b = src[1 * src_pitch];
-+    c = src[2 * src_pitch];
-+    d = src[3 * src_pitch];
-+    e = src[4 * src_pitch];
- 
--        des[0 * dest_pitch] = (unsigned char) a;
--        des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8);
--        des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8);
-+    des[0 * dest_pitch] = (unsigned char) a;
-+    des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8);
-+    des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8);
- 
--        src ++;
--        des ++;
-+    src++;
-+    des++;
- 
--    }
-+  }
- }
- 
- /****************************************************************************
-@@ -902,55 +871,52 @@ void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch
-  *  SPECIAL NOTES : None.
-  *
-  ****************************************************************************/
--void vp8_horizontal_line_2_1_scale_c
--(
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    unsigned int i;
--    unsigned int a;
--    unsigned char *des = dest;
--    const unsigned char *src = source;
--
--    (void) dest_width;
--
--    for (i = 0; i < source_width; i += 2)
--    {
--        a = src[0];
--        des [0] = (unsigned char)(a);
--        src += 2;
--        des += 1;
--    }
--
--
--
--}
--void vp8_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    (void) dest_pitch;
--    (void) src_pitch;
--    vpx_memcpy(dest, source, dest_width);
-+void vp8_horizontal_line_2_1_scale_c(const unsigned char *source,
-+                                     unsigned int source_width,
-+                                     unsigned char *dest,
-+                                     unsigned int dest_width) {
-+  unsigned int i;
-+  unsigned int a;
-+  unsigned char *des = dest;
-+  const unsigned char *src = source;
-+
-+  (void) dest_width;
-+
-+  for (i = 0; i < source_width; i += 2) {
-+    a = src[0];
-+    des [0] = (unsigned char)(a);
-+    src += 2;
-+    des += 1;
-+  }
- }
- 
--void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    int i;
--    int temp;
--    int width = dest_width;
--
--    (void) dest_pitch;
--
--    for (i = 0; i < width; i++)
--    {
--        temp = 8;
--        temp += source[i-(int)src_pitch] * 3;
--        temp += source[i] * 10;
--        temp += source[i+src_pitch] * 3;
--        temp >>= 4 ;
--        dest[i] = (unsigned char)(temp);
--    }
-+void vp8_vertical_band_2_1_scale_c(unsigned char *source,
-+                                   unsigned int src_pitch,
-+                                   unsigned char *dest,
-+                                   unsigned int dest_pitch,
-+                                   unsigned int dest_width) {
-+  (void) dest_pitch;
-+  (void) src_pitch;
-+  vpx_memcpy(dest, source, dest_width);
-+}
- 
-+void vp8_vertical_band_2_1_scale_i_c(unsigned char *source,
-+                                     unsigned int src_pitch,
-+                                     unsigned char *dest,
-+                                     unsigned int dest_pitch,
-+                                     unsigned int dest_width) {
-+  int i;
-+  int temp;
-+  int width = dest_width;
-+
-+  (void) dest_pitch;
-+
-+  for (i = 0; i < width; i++) {
-+    temp = 8;
-+    temp += source[i - (int)src_pitch] * 3;
-+    temp += source[i] * 10;
-+    temp += source[i + src_pitch] * 3;
-+    temp >>= 4;
-+    dest[i] = (unsigned char)(temp);
-+  }
- }
-diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
-index c02e4ff..7de85ca 100644
---- a/vpx_scale/generic/vpxscale.c
-+++ b/vpx_scale/generic/vpxscale.c
-@@ -20,23 +20,22 @@
- /****************************************************************************
- *  Header Files
- ****************************************************************************/
--#include "vpx_rtcd.h"
-+#include "./vpx_rtcd.h"
- #include "vpx_mem/vpx_mem.h"
- #include "vpx_scale/yv12config.h"
- #include "vpx_scale/scale_mode.h"
- 
--typedef struct
--{
--    int     expanded_frame_width;
--    int     expanded_frame_height;
-+typedef struct {
-+  int     expanded_frame_width;
-+  int     expanded_frame_height;
- 
--    int HScale;
--    int HRatio;
--    int VScale;
--    int VRatio;
-+  int HScale;
-+  int HRatio;
-+  int VScale;
-+  int VRatio;
- 
--    YV12_BUFFER_CONFIG *src_yuv_config;
--    YV12_BUFFER_CONFIG *dst_yuv_config;
-+  YV12_BUFFER_CONFIG *src_yuv_config;
-+  YV12_BUFFER_CONFIG *dst_yuv_config;
- 
- } SCALE_VARS;
- 
-@@ -60,15 +59,14 @@ typedef struct
-  ****************************************************************************/
- static
- void horizontal_line_copy(
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    (void) dest_width;
--
--    duck_memcpy(dest, source, source_width);
-+  const unsigned char *source,
-+  unsigned int source_width,
-+  unsigned char *dest,
-+  unsigned int dest_width
-+) {
-+  (void) dest_width;
-+
-+  duck_memcpy(dest, source, source_width);
- }
- /****************************************************************************
-  *
-@@ -90,16 +88,15 @@ void horizontal_line_copy(
-  ****************************************************************************/
- static
- void null_scale(
--    unsigned char *dest,
--    unsigned int dest_pitch,
--    unsigned int dest_width
--)
--{
--    (void) dest;
--    (void) dest_pitch;
--    (void) dest_width;
--
--    return;
-+  unsigned char *dest,
-+  unsigned int dest_pitch,
-+  unsigned int dest_width
-+) {
-+  (void) dest;
-+  (void) dest_pitch;
-+  (void) dest_width;
-+
-+  return;
- }
- 
- /****************************************************************************
-@@ -127,35 +124,33 @@ void null_scale(
- static
- void scale1d_2t1_i
- (
--    const unsigned char *source,
--    int source_step,
--    unsigned int source_scale,
--    unsigned int source_length,
--    unsigned char *dest,
--    int dest_step,
--    unsigned int dest_scale,
--    unsigned int dest_length
--)
--{
--    unsigned int i, j;
--    unsigned int temp;
--    int source_pitch = source_step;
--    (void) source_length;
--    (void) source_scale;
--    (void) dest_scale;
--
--    source_step *= 2;
--    dest[0] = source[0];
--
--    for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step)
--    {
--        temp = 8;
--        temp += 3 * source[j-source_pitch];
--        temp += 10 * source[j];
--        temp += 3 * source[j+source_pitch];
--        temp >>= 4;
--        dest[i] = (char)(temp);
--    }
-+  const unsigned char *source,
-+  int source_step,
-+  unsigned int source_scale,
-+  unsigned int source_length,
-+  unsigned char *dest,
-+  int dest_step,
-+  unsigned int dest_scale,
-+  unsigned int dest_length
-+) {
-+  unsigned int i, j;
-+  unsigned int temp;
-+  int source_pitch = source_step;
-+  (void) source_length;
-+  (void) source_scale;
-+  (void) dest_scale;
-+
-+  source_step *= 2;
-+  dest[0] = source[0];
-+
-+  for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) {
-+    temp = 8;
-+    temp += 3 * source[j - source_pitch];
-+    temp += 10 * source[j];
-+    temp += 3 * source[j + source_pitch];
-+    temp >>= 4;
-+    dest[i] = (char)(temp);
-+  }
- }
- 
- /****************************************************************************
-@@ -183,27 +178,26 @@ void scale1d_2t1_i
- static
- void scale1d_2t1_ps
- (
--    const unsigned char *source,
--    int source_step,
--    unsigned int source_scale,
--    unsigned int source_length,
--    unsigned char *dest,
--    int dest_step,
--    unsigned int dest_scale,
--    unsigned int dest_length
--)
--{
--    unsigned int i, j;
--
--    (void) source_length;
--    (void) source_scale;
--    (void) dest_scale;
--
--    source_step *= 2;
--    j = 0;
--
--    for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step)
--        dest[i] = source[j];
-+  const unsigned char *source,
-+  int source_step,
-+  unsigned int source_scale,
-+  unsigned int source_length,
-+  unsigned char *dest,
-+  int dest_step,
-+  unsigned int dest_scale,
-+  unsigned int dest_length
-+) {
-+  unsigned int i, j;
-+
-+  (void) source_length;
-+  (void) source_scale;
-+  (void) dest_scale;
-+
-+  source_step *= 2;
-+  j = 0;
-+
-+  for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step)
-+    dest[i] = source[j];
- }
- /****************************************************************************
-  *
-@@ -230,45 +224,42 @@ void scale1d_2t1_ps
- static
- void scale1d_c
- (
--    const unsigned char *source,
--    int source_step,
--    unsigned int source_scale,
--    unsigned int source_length,
--    unsigned char *dest,
--    int dest_step,
--    unsigned int dest_scale,
--    unsigned int dest_length
--)
--{
--    unsigned int i;
--    unsigned int round_value = dest_scale / 2;
--    unsigned int left_modifier = dest_scale;
--    unsigned int right_modifier = 0;
--    unsigned char left_pixel = *source;
--    unsigned char right_pixel = *(source + source_step);
--
--    (void) source_length;
--
--    /* These asserts are needed if there are boundary issues... */
--    /*assert ( dest_scale > source_scale );*/
--    /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/
--
--    for (i = 0; i < dest_length * dest_step; i += dest_step)
--    {
--        dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale);
--
--        right_modifier += source_scale;
--
--        while (right_modifier > dest_scale)
--        {
--            right_modifier -= dest_scale;
--            source += source_step;
--            left_pixel = *source;
--            right_pixel = *(source + source_step);
--        }
--
--        left_modifier = dest_scale - right_modifier;
-+  const unsigned char *source,
-+  int source_step,
-+  unsigned int source_scale,
-+  unsigned int source_length,
-+  unsigned char *dest,
-+  int dest_step,
-+  unsigned int dest_scale,
-+  unsigned int dest_length
-+) {
-+  unsigned int i;
-+  unsigned int round_value = dest_scale / 2;
-+  unsigned int left_modifier = dest_scale;
-+  unsigned int right_modifier = 0;
-+  unsigned char left_pixel = *source;
-+  unsigned char right_pixel = *(source + source_step);
-+
-+  (void) source_length;
-+
-+  /* These asserts are needed if there are boundary issues... */
-+  /*assert ( dest_scale > source_scale );*/
-+  /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/
-+
-+  for (i = 0; i < dest_length * dest_step; i += dest_step) {
-+    dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale);
-+
-+    right_modifier += source_scale;
-+
-+    while (right_modifier > dest_scale) {
-+      right_modifier -= dest_scale;
-+      source += source_step;
-+      left_pixel = *source;
-+      right_pixel = *(source + source_step);
-     }
-+
-+    left_modifier = dest_scale - right_modifier;
-+  }
- }
- 
- /****************************************************************************
-@@ -304,246 +295,221 @@ void scale1d_c
- static
- void Scale2D
- (
--    /*const*/
--    unsigned char *source,
--    int source_pitch,
--    unsigned int source_width,
--    unsigned int source_height,
--    unsigned char *dest,
--    int dest_pitch,
--    unsigned int dest_width,
--    unsigned int dest_height,
--    unsigned char *temp_area,
--    unsigned char temp_area_height,
--    unsigned int hscale,
--    unsigned int hratio,
--    unsigned int vscale,
--    unsigned int vratio,
--    unsigned int interlaced
--)
--{
--    /*unsigned*/
--    int i, j, k;
--    int bands;
--    int dest_band_height;
--    int source_band_height;
--
--    typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length,
--                            unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length);
--
--    Scale1D Scale1Dv = scale1d_c;
--    Scale1D Scale1Dh = scale1d_c;
--
--    void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
--    void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL;
--
--    int ratio_scalable = 1;
--    int interpolation = 0;
--
--    unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */
--    unsigned char *line_src;
--
--
--    source_base = (unsigned char *)source;
--
--    if (source_pitch < 0)
--    {
--        int offset;
--
--        offset = (source_height - 1);
--        offset *= source_pitch;
--
--        source_base += offset;
--    }
--
--    /* find out the ratio for each direction */
--    switch (hratio * 10 / hscale)
--    {
-+  /*const*/
-+  unsigned char *source,
-+  int source_pitch,
-+  unsigned int source_width,
-+  unsigned int source_height,
-+  unsigned char *dest,
-+  int dest_pitch,
-+  unsigned int dest_width,
-+  unsigned int dest_height,
-+  unsigned char *temp_area,
-+  unsigned char temp_area_height,
-+  unsigned int hscale,
-+  unsigned int hratio,
-+  unsigned int vscale,
-+  unsigned int vratio,
-+  unsigned int interlaced
-+) {
-+  /*unsigned*/
-+  int i, j, k;
-+  int bands;
-+  int dest_band_height;
-+  int source_band_height;
-+
-+  typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length,
-+                          unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length);
-+
-+  Scale1D Scale1Dv = scale1d_c;
-+  Scale1D Scale1Dh = scale1d_c;
-+
-+  void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
-+  void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL;
-+
-+  int ratio_scalable = 1;
-+  int interpolation = 0;
-+
-+  unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */
-+  unsigned char *line_src;
-+
-+
-+  source_base = (unsigned char *)source;
-+
-+  if (source_pitch < 0) {
-+    int offset;
-+
-+    offset = (source_height - 1);
-+    offset *= source_pitch;
-+
-+    source_base += offset;
-+  }
-+
-+  /* find out the ratio for each direction */
-+  switch (hratio * 10 / hscale) {
-     case 8:
--        /* 4-5 Scale in Width direction */
--        horiz_line_scale = vp8_horizontal_line_5_4_scale;
--        break;
-+      /* 4-5 Scale in Width direction */
-+      horiz_line_scale = vp8_horizontal_line_5_4_scale;
-+      break;
-     case 6:
--        /* 3-5 Scale in Width direction */
--        horiz_line_scale = vp8_horizontal_line_5_3_scale;
--        break;
-+      /* 3-5 Scale in Width direction */
-+      horiz_line_scale = vp8_horizontal_line_5_3_scale;
-+      break;
-     case 5:
--        /* 1-2 Scale in Width direction */
--        horiz_line_scale = vp8_horizontal_line_2_1_scale;
--        break;
-+      /* 1-2 Scale in Width direction */
-+      horiz_line_scale = vp8_horizontal_line_2_1_scale;
-+      break;
-     default:
--        /* The ratio is not acceptable now */
--        /* throw("The ratio is not acceptable for now!"); */
--        ratio_scalable = 0;
--        break;
--    }
-+      /* The ratio is not acceptable now */
-+      /* throw("The ratio is not acceptable for now!"); */
-+      ratio_scalable = 0;
-+      break;
-+  }
- 
--    switch (vratio * 10 / vscale)
--    {
-+  switch (vratio * 10 / vscale) {
-     case 8:
--        /* 4-5 Scale in vertical direction */
--        vert_band_scale     = vp8_vertical_band_5_4_scale;
--        source_band_height  = 5;
--        dest_band_height    = 4;
--        break;
-+      /* 4-5 Scale in vertical direction */
-+      vert_band_scale     = vp8_vertical_band_5_4_scale;
-+      source_band_height  = 5;
-+      dest_band_height    = 4;
-+      break;
-     case 6:
--        /* 3-5 Scale in vertical direction */
--        vert_band_scale     = vp8_vertical_band_5_3_scale;
--        source_band_height  = 5;
--        dest_band_height    = 3;
--        break;
-+      /* 3-5 Scale in vertical direction */
-+      vert_band_scale     = vp8_vertical_band_5_3_scale;
-+      source_band_height  = 5;
-+      dest_band_height    = 3;
-+      break;
-     case 5:
--        /* 1-2 Scale in vertical direction */
-+      /* 1-2 Scale in vertical direction */
- 
--        if (interlaced)
--        {
--            /* if the content is interlaced, point sampling is used */
--            vert_band_scale     = vp8_vertical_band_2_1_scale;
--        }
--        else
--        {
-+      if (interlaced) {
-+        /* if the content is interlaced, point sampling is used */
-+        vert_band_scale     = vp8_vertical_band_2_1_scale;
-+      } else {
- 
--            interpolation = 1;
--            /* if the content is progressive, interplo */
--            vert_band_scale     = vp8_vertical_band_2_1_scale_i;
-+        interpolation = 1;
-+        /* if the content is progressive, interplo */
-+        vert_band_scale     = vp8_vertical_band_2_1_scale_i;
- 
--        }
-+      }
- 
--        source_band_height  = 2;
--        dest_band_height    = 1;
--        break;
-+      source_band_height  = 2;
-+      dest_band_height    = 1;
-+      break;
-     default:
--        /* The ratio is not acceptable now */
--        /* throw("The ratio is not acceptable for now!"); */
--        ratio_scalable = 0;
--        break;
-+      /* The ratio is not acceptable now */
-+      /* throw("The ratio is not acceptable for now!"); */
-+      ratio_scalable = 0;
-+      break;
-+  }
-+
-+  if (ratio_scalable) {
-+    if (source_height == dest_height) {
-+      /* for each band of the image */
-+      for (k = 0; k < (int)dest_height; k++) {
-+        horiz_line_scale(source, source_width, dest, dest_width);
-+        source += source_pitch;
-+        dest   += dest_pitch;
-+      }
-+
-+      return;
-     }
- 
--    if (ratio_scalable)
--    {
--        if (source_height == dest_height)
--        {
--            /* for each band of the image */
--            for (k = 0; k < (int)dest_height; k++)
--            {
--                horiz_line_scale(source, source_width, dest, dest_width);
--                source += source_pitch;
--                dest   += dest_pitch;
--            }
--
--            return;
--        }
--
--        if (interpolation)
--        {
--            if (source < source_base)
--                source = source_base;
--
--            horiz_line_scale(source, source_width, temp_area, dest_width);
--        }
--
--        for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++)
--        {
--            /* scale one band horizontally */
--            for (i = 0; i < source_band_height; i++)
--            {
--                /* Trap case where we could read off the base of the source buffer */
--
--                line_src = (unsigned char *)source + i * source_pitch;
--
--                if (line_src < source_base)
--                    line_src = source_base;
--
--                horiz_line_scale(line_src, source_width,
--                                 temp_area + (i + 1)*dest_pitch, dest_width);
--            }
--
--            /* Vertical scaling is in place */
--            vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width);
--
--            if (interpolation)
--                vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width);
--
--            /* Next band... */
--            source += (unsigned long) source_band_height  * source_pitch;
--            dest   += (unsigned long) dest_band_height * dest_pitch;
--        }
--
--        return;
-+    if (interpolation) {
-+      if (source < source_base)
-+        source = source_base;
-+
-+      horiz_line_scale(source, source_width, temp_area, dest_width);
-     }
- 
--    if (hscale == 2 && hratio == 1)
--        Scale1Dh = scale1d_2t1_ps;
-+    for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) {
-+      /* scale one band horizontally */
-+      for (i = 0; i < source_band_height; i++) {
-+        /* Trap case where we could read off the base of the source buffer */
- 
--    if (vscale == 2 && vratio == 1)
--    {
--        if (interlaced)
--            Scale1Dv = scale1d_2t1_ps;
--        else
--            Scale1Dv = scale1d_2t1_i;
--    }
-+        line_src = (unsigned char *)source + i * source_pitch;
- 
--    if (source_height == dest_height)
--    {
--        /* for each band of the image */
--        for (k = 0; k < (int)dest_height; k++)
--        {
--            Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width);
--            source += source_pitch;
--            dest   += dest_pitch;
--        }
--
--        return;
--    }
-+        if (line_src < source_base)
-+          line_src = source_base;
-+
-+        horiz_line_scale(line_src, source_width,
-+                         temp_area + (i + 1)*dest_pitch, dest_width);
-+      }
-+
-+      /* Vertical scaling is in place */
-+      vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width);
- 
--    if (dest_height > source_height)
--    {
--        dest_band_height   = temp_area_height - 1;
--        source_band_height = dest_band_height * source_height / dest_height;
-+      if (interpolation)
-+        vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width);
-+
-+      /* Next band... */
-+      source += (unsigned long) source_band_height  * source_pitch;
-+      dest   += (unsigned long) dest_band_height * dest_pitch;
-     }
-+
-+    return;
-+  }
-+
-+  if (hscale == 2 && hratio == 1)
-+    Scale1Dh = scale1d_2t1_ps;
-+
-+  if (vscale == 2 && vratio == 1) {
-+    if (interlaced)
-+      Scale1Dv = scale1d_2t1_ps;
-     else
--    {
--        source_band_height = temp_area_height - 1;
--        dest_band_height   = source_band_height * vratio / vscale;
-+      Scale1Dv = scale1d_2t1_i;
-+  }
-+
-+  if (source_height == dest_height) {
-+    /* for each band of the image */
-+    for (k = 0; k < (int)dest_height; k++) {
-+      Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width);
-+      source += source_pitch;
-+      dest   += dest_pitch;
-     }
- 
--    /* first row needs to be done so that we can stay one row ahead for vertical zoom */
--    Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width);
-+    return;
-+  }
-+
-+  if (dest_height > source_height) {
-+    dest_band_height   = temp_area_height - 1;
-+    source_band_height = dest_band_height * source_height / dest_height;
-+  } else {
-+    source_band_height = temp_area_height - 1;
-+    dest_band_height   = source_band_height * vratio / vscale;
-+  }
- 
--    /* for each band of the image */
--    bands = (dest_height + dest_band_height - 1) / dest_band_height;
--
--    for (k = 0; k < bands; k++)
--    {
--        /* scale one band horizontally */
--        for (i = 1; i < source_band_height + 1; i++)
--        {
--            if (k * source_band_height + i < (int) source_height)
--            {
--                Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1,
--                         temp_area + i * dest_pitch, 1, hratio, dest_width);
--            }
--            else  /*  Duplicate the last row */
--            {
--                /* copy temp_area row 0 over from last row in the past */
--                duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch);
--            }
--        }
--
--        /* scale one band vertically */
--        for (j = 0; j < (int)dest_width; j++)
--        {
--            Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1,
--                     &dest[j], dest_pitch, vratio, dest_band_height);
--        }
-+  /* first row needs to be done so that we can stay one row ahead for vertical zoom */
-+  Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width);
- 
-+  /* for each band of the image */
-+  bands = (dest_height + dest_band_height - 1) / dest_band_height;
-+
-+  for (k = 0; k < bands; k++) {
-+    /* scale one band horizontally */
-+    for (i = 1; i < source_band_height + 1; i++) {
-+      if (k * source_band_height + i < (int) source_height) {
-+        Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1,
-+                 temp_area + i * dest_pitch, 1, hratio, dest_width);
-+      } else { /*  Duplicate the last row */
-         /* copy temp_area row 0 over from last row in the past */
--        duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch);
-+        duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch);
-+      }
-+    }
- 
--        /* move to the next band */
--        source += source_band_height * source_pitch;
--        dest   += dest_band_height * dest_pitch;
-+    /* scale one band vertically */
-+    for (j = 0; j < (int)dest_width; j++) {
-+      Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1,
-+               &dest[j], dest_pitch, vratio, dest_band_height);
-     }
-+
-+    /* copy temp_area row 0 over from last row in the past */
-+    duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch);
-+
-+    /* move to the next band */
-+    source += source_band_height * source_pitch;
-+    dest   += dest_band_height * dest_pitch;
-+  }
- }
- 
- /****************************************************************************
-@@ -572,57 +538,56 @@ void Scale2D
-  ****************************************************************************/
- void vp8_scale_frame
- (
--    YV12_BUFFER_CONFIG *src,
--    YV12_BUFFER_CONFIG *dst,
--    unsigned char *temp_area,
--    unsigned char temp_height,
--    unsigned int hscale,
--    unsigned int hratio,
--    unsigned int vscale,
--    unsigned int vratio,
--    unsigned int interlaced
--)
--{
--    int i;
--    int dw = (hscale - 1 + src->y_width * hratio) / hscale;
--    int dh = (vscale - 1 + src->y_height * vratio) / vscale;
--
--    /* call our internal scaling routines!! */
--    Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height,
--            (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh,
--            temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
--
--    if (dw < (int)dst->y_width)
--        for (i = 0; i < dh; i++)
--            duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i*dst->y_stride+dw-2], dst->y_width - dw + 1);
--
--    if (dh < (int)dst->y_height)
--        for (i = dh - 1; i < (int)dst->y_height; i++)
--            duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1);
--
--    Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height,
--            (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2,
--            temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
--
--    if (dw / 2 < (int)dst->uv_width)
--        for (i = 0; i < dst->uv_height; i++)
--            duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1);
--
--    if (dh / 2 < (int)dst->uv_height)
--        for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++)
--            duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width);
--
--    Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height,
--            (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2,
--            temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
--
--    if (dw / 2 < (int)dst->uv_width)
--        for (i = 0; i < dst->uv_height; i++)
--            duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1);
--
--    if (dh / 2 < (int) dst->uv_height)
--        for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++)
--            duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width);
-+  YV12_BUFFER_CONFIG *src,
-+  YV12_BUFFER_CONFIG *dst,
-+  unsigned char *temp_area,
-+  unsigned char temp_height,
-+  unsigned int hscale,
-+  unsigned int hratio,
-+  unsigned int vscale,
-+  unsigned int vratio,
-+  unsigned int interlaced
-+) {
-+  int i;
-+  int dw = (hscale - 1 + src->y_width * hratio) / hscale;
-+  int dh = (vscale - 1 + src->y_height * vratio) / vscale;
-+
-+  /* call our internal scaling routines!! */
-+  Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height,
-+          (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh,
-+          temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
-+
-+  if (dw < (int)dst->y_width)
-+    for (i = 0; i < dh; i++)
-+      duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1);
-+
-+  if (dh < (int)dst->y_height)
-+    for (i = dh - 1; i < (int)dst->y_height; i++)
-+      duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1);
-+
-+  Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height,
-+          (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2,
-+          temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
-+
-+  if (dw / 2 < (int)dst->uv_width)
-+    for (i = 0; i < dst->uv_height; i++)
-+      duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1);
-+
-+  if (dh / 2 < (int)dst->uv_height)
-+    for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++)
-+      duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width);
-+
-+  Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height,
-+          (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2,
-+          temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
-+
-+  if (dw / 2 < (int)dst->uv_width)
-+    for (i = 0; i < dst->uv_height; i++)
-+      duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1);
-+
-+  if (dh / 2 < (int) dst->uv_height)
-+    for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++)
-+      duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width);
- }
- /****************************************************************************
-  *
-@@ -651,183 +616,177 @@ void vp8_scale_frame
- static
- int any_ratio_2d_scale
- (
--    SCALE_VARS *si,
--    const unsigned char *source,
--    int source_pitch,
--    unsigned int source_width,
--    unsigned int source_height,
--    unsigned char *dest,
--    unsigned int dest_pitch,
--    unsigned int dest_width,
--    unsigned int dest_height
--)
--{
--    unsigned int i, k;
--    unsigned int src_band_height  = 0;
--    unsigned int dest_band_height = 0;
--
--    /* suggested scale factors */
--    int hs = si->HScale;
--    int hr = si->HRatio;
--    int vs = si->VScale;
--    int vr = si->VRatio;
--
--    /* assume the ratios are scalable instead of should be centered */
--    int ratio_scalable = 1;
--
--    const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch)));
--    const unsigned char *line_src;
--
--    void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
--    void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL;
--    void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL;
--
--    (void) si;
--
--    /* find out the ratio for each direction */
--    switch (hr * 30 / hs)
--    {
-+  SCALE_VARS *si,
-+  const unsigned char *source,
-+  int source_pitch,
-+  unsigned int source_width,
-+  unsigned int source_height,
-+  unsigned char *dest,
-+  unsigned int dest_pitch,
-+  unsigned int dest_width,
-+  unsigned int dest_height
-+) {
-+  unsigned int i, k;
-+  unsigned int src_band_height  = 0;
-+  unsigned int dest_band_height = 0;
-+
-+  /* suggested scale factors */
-+  int hs = si->HScale;
-+  int hr = si->HRatio;
-+  int vs = si->VScale;
-+  int vr = si->VRatio;
-+
-+  /* assume the ratios are scalable instead of should be centered */
-+  int ratio_scalable = 1;
-+
-+  const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch)));
-+  const unsigned char *line_src;
-+
-+  void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
-+  void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL;
-+  void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL;
-+
-+  (void) si;
-+
-+  /* find out the ratio for each direction */
-+  switch (hr * 30 / hs) {
-     case 24:
--        /* 4-5 Scale in Width direction */
--        horiz_line_scale = vp8_horizontal_line_4_5_scale;
--        break;
-+      /* 4-5 Scale in Width direction */
-+      horiz_line_scale = vp8_horizontal_line_4_5_scale;
-+      break;
-     case 22:
--        /* 3-4 Scale in Width direction */
--        horiz_line_scale = vp8_horizontal_line_3_4_scale;
--        break;
-+      /* 3-4 Scale in Width direction */
-+      horiz_line_scale = vp8_horizontal_line_3_4_scale;
-+      break;
- 
-     case 20:
--        /* 4-5 Scale in Width direction */
--        horiz_line_scale = vp8_horizontal_line_2_3_scale;
--        break;
-+      /* 4-5 Scale in Width direction */
-+      horiz_line_scale = vp8_horizontal_line_2_3_scale;
-+      break;
-     case 18:
--        /* 3-5 Scale in Width direction */
--        horiz_line_scale = vp8_horizontal_line_3_5_scale;
--        break;
-+      /* 3-5 Scale in Width direction */
-+      horiz_line_scale = vp8_horizontal_line_3_5_scale;
-+      break;
-     case 15:
--        /* 1-2 Scale in Width direction */
--        horiz_line_scale = vp8_horizontal_line_1_2_scale;
--        break;
-+      /* 1-2 Scale in Width direction */
-+      horiz_line_scale = vp8_horizontal_line_1_2_scale;
-+      break;
-     case 30:
--        /* no scale in Width direction */
--        horiz_line_scale = horizontal_line_copy;
--        break;
-+      /* no scale in Width direction */
-+      horiz_line_scale = horizontal_line_copy;
-+      break;
-     default:
--        /* The ratio is not acceptable now */
--        /* throw("The ratio is not acceptable for now!"); */
--        ratio_scalable = 0;
--        break;
--    }
-+      /* The ratio is not acceptable now */
-+      /* throw("The ratio is not acceptable for now!"); */
-+      ratio_scalable = 0;
-+      break;
-+  }
- 
--    switch (vr * 30 / vs)
--    {
-+  switch (vr * 30 / vs) {
-     case 24:
--        /* 4-5 Scale in vertical direction */
--        vert_band_scale     = vp8_vertical_band_4_5_scale;
--        last_vert_band_scale = vp8_last_vertical_band_4_5_scale;
--        src_band_height     = 4;
--        dest_band_height    = 5;
--        break;
-+      /* 4-5 Scale in vertical direction */
-+      vert_band_scale     = vp8_vertical_band_4_5_scale;
-+      last_vert_band_scale = vp8_last_vertical_band_4_5_scale;
-+      src_band_height     = 4;
-+      dest_band_height    = 5;
-+      break;
-     case 22:
--        /* 3-4 Scale in vertical direction */
--        vert_band_scale     = vp8_vertical_band_3_4_scale;
--        last_vert_band_scale = vp8_last_vertical_band_3_4_scale;
--        src_band_height     = 3;
--        dest_band_height    = 4;
--        break;
-+      /* 3-4 Scale in vertical direction */
-+      vert_band_scale     = vp8_vertical_band_3_4_scale;
-+      last_vert_band_scale = vp8_last_vertical_band_3_4_scale;
-+      src_band_height     = 3;
-+      dest_band_height    = 4;
-+      break;
-     case 20:
--        /* 2-3 Scale in vertical direction */
--        vert_band_scale     = vp8_vertical_band_2_3_scale;
--        last_vert_band_scale = vp8_last_vertical_band_2_3_scale;
--        src_band_height     = 2;
--        dest_band_height    = 3;
--        break;
-+      /* 2-3 Scale in vertical direction */
-+      vert_band_scale     = vp8_vertical_band_2_3_scale;
-+      last_vert_band_scale = vp8_last_vertical_band_2_3_scale;
-+      src_band_height     = 2;
-+      dest_band_height    = 3;
-+      break;
-     case 18:
--        /* 3-5 Scale in vertical direction */
--        vert_band_scale     = vp8_vertical_band_3_5_scale;
--        last_vert_band_scale = vp8_last_vertical_band_3_5_scale;
--        src_band_height     = 3;
--        dest_band_height    = 5;
--        break;
-+      /* 3-5 Scale in vertical direction */
-+      vert_band_scale     = vp8_vertical_band_3_5_scale;
-+      last_vert_band_scale = vp8_last_vertical_band_3_5_scale;
-+      src_band_height     = 3;
-+      dest_band_height    = 5;
-+      break;
-     case 15:
--        /* 1-2 Scale in vertical direction */
--        vert_band_scale     = vp8_vertical_band_1_2_scale;
--        last_vert_band_scale = vp8_last_vertical_band_1_2_scale;
--        src_band_height     = 1;
--        dest_band_height    = 2;
--        break;
-+      /* 1-2 Scale in vertical direction */
-+      vert_band_scale     = vp8_vertical_band_1_2_scale;
-+      last_vert_band_scale = vp8_last_vertical_band_1_2_scale;
-+      src_band_height     = 1;
-+      dest_band_height    = 2;
-+      break;
-     case 30:
--        /* no scale in Width direction */
--        vert_band_scale     = null_scale;
--        last_vert_band_scale = null_scale;
--        src_band_height     = 4;
--        dest_band_height    = 4;
--        break;
-+      /* no scale in Width direction */
-+      vert_band_scale     = null_scale;
-+      last_vert_band_scale = null_scale;
-+      src_band_height     = 4;
-+      dest_band_height    = 4;
-+      break;
-     default:
--        /* The ratio is not acceptable now */
--        /* throw("The ratio is not acceptable for now!"); */
--        ratio_scalable = 0;
--        break;
--    }
-+      /* The ratio is not acceptable now */
-+      /* throw("The ratio is not acceptable for now!"); */
-+      ratio_scalable = 0;
-+      break;
-+  }
- 
--    if (ratio_scalable == 0)
--        return ratio_scalable;
-+  if (ratio_scalable == 0)
-+    return ratio_scalable;
- 
--    horiz_line_scale(source, source_width, dest, dest_width);
-+  horiz_line_scale(source, source_width, dest, dest_width);
- 
--    /* except last band */
--    for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++)
--    {
--        /* scale one band horizontally */
--        for (i = 1; i < src_band_height; i++)
--        {
--            /* Trap case where we could read off the base of the source buffer */
--            line_src = source + i * source_pitch;
-+  /* except last band */
-+  for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) {
-+    /* scale one band horizontally */
-+    for (i = 1; i < src_band_height; i++) {
-+      /* Trap case where we could read off the base of the source buffer */
-+      line_src = source + i * source_pitch;
- 
--            if (line_src < source_base)
--                line_src = source_base;
-+      if (line_src < source_base)
-+        line_src = source_base;
- 
--            horiz_line_scale(line_src, source_width,
--                             dest + i * dest_pitch, dest_width);
--        }
-+      horiz_line_scale(line_src, source_width,
-+                       dest + i * dest_pitch, dest_width);
-+    }
- 
--        /* first line of next band */
--        /* Trap case where we could read off the base of the source buffer */
--        line_src = source + src_band_height * source_pitch;
-+    /* first line of next band */
-+    /* Trap case where we could read off the base of the source buffer */
-+    line_src = source + src_band_height * source_pitch;
- 
--        if (line_src < source_base)
--            line_src = source_base;
-+    if (line_src < source_base)
-+      line_src = source_base;
- 
--        horiz_line_scale(line_src, source_width,
--                         dest + dest_band_height * dest_pitch,
--                         dest_width);
-+    horiz_line_scale(line_src, source_width,
-+                     dest + dest_band_height * dest_pitch,
-+                     dest_width);
- 
--        /* Vertical scaling is in place */
--        vert_band_scale(dest, dest_pitch, dest_width);
-+    /* Vertical scaling is in place */
-+    vert_band_scale(dest, dest_pitch, dest_width);
- 
--        /* Next band... */
--        source += src_band_height  * source_pitch;
--        dest   += dest_band_height * dest_pitch;
--    }
-+    /* Next band... */
-+    source += src_band_height  * source_pitch;
-+    dest   += dest_band_height * dest_pitch;
-+  }
- 
--    /* scale one band horizontally */
--    for (i = 1; i < src_band_height; i++)
--    {
--        /* Trap case where we could read off the base of the source buffer */
--        line_src = source + i * source_pitch;
-+  /* scale one band horizontally */
-+  for (i = 1; i < src_band_height; i++) {
-+    /* Trap case where we could read off the base of the source buffer */
-+    line_src = source + i * source_pitch;
- 
--        if (line_src < source_base)
--            line_src = source_base;
-+    if (line_src < source_base)
-+      line_src = source_base;
- 
--        horiz_line_scale(line_src, source_width,
--                         dest + i * dest_pitch,
--                         dest_width);
--    }
-+    horiz_line_scale(line_src, source_width,
-+                     dest + i * dest_pitch,
-+                     dest_width);
-+  }
- 
--    /* Vertical scaling is in place */
--    last_vert_band_scale(dest, dest_pitch, dest_width);
-+  /* Vertical scaling is in place */
-+  last_vert_band_scale(dest, dest_pitch, dest_width);
- 
--    return ratio_scalable;
-+  return ratio_scalable;
- }
- 
- /****************************************************************************
-@@ -849,70 +808,69 @@ int any_ratio_2d_scale
-  *
-  ****************************************************************************/
- static
--int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset)
--{
--    int i;
--    int ew;
--    int eh;
--
--    /* suggested scale factors */
--    int hs = scale_vars->HScale;
--    int hr = scale_vars->HRatio;
--    int vs = scale_vars->VScale;
--    int vr = scale_vars->VRatio;
--
--    int ratio_scalable = 1;
--
--    int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs;
--    int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs;
--    int dw = scale_vars->expanded_frame_width;
--    int dh = scale_vars->expanded_frame_height;
--    YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config;
--    YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config;
--
--    if (hr == 3)
--        ew = (sw + 2) / 3 * 3 * hs / hr;
--    else
--        ew = (sw + 7) / 8 * 8 * hs / hr;
--
--    if (vr == 3)
--        eh = (sh + 2) / 3 * 3 * vs / vr;
--    else
--        eh = (sh + 7) / 8 * 8 * vs / vr;
--
--    ratio_scalable = any_ratio_2d_scale(scale_vars,
--                                        (const unsigned char *)src_yuv_config->y_buffer,
--                                        src_yuv_config->y_stride, sw, sh,
--                                        (unsigned char *) dst_yuv_config->y_buffer + YOffset,
--                                        dst_yuv_config->y_stride, dw, dh);
--
--    for (i = 0; i < eh; i++)
--        duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw);
--
--    for (i = dh; i < eh; i++)
--        duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew);
--
--    if (ratio_scalable == 0)
--        return ratio_scalable;
-+int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) {
-+  int i;
-+  int ew;
-+  int eh;
-+
-+  /* suggested scale factors */
-+  int hs = scale_vars->HScale;
-+  int hr = scale_vars->HRatio;
-+  int vs = scale_vars->VScale;
-+  int vr = scale_vars->VRatio;
-+
-+  int ratio_scalable = 1;
-+
-+  int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs;
-+  int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs;
-+  int dw = scale_vars->expanded_frame_width;
-+  int dh = scale_vars->expanded_frame_height;
-+  YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config;
-+  YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config;
-+
-+  if (hr == 3)
-+    ew = (sw + 2) / 3 * 3 * hs / hr;
-+  else
-+    ew = (sw + 7) / 8 * 8 * hs / hr;
-+
-+  if (vr == 3)
-+    eh = (sh + 2) / 3 * 3 * vs / vr;
-+  else
-+    eh = (sh + 7) / 8 * 8 * vs / vr;
-+
-+  ratio_scalable = any_ratio_2d_scale(scale_vars,
-+                                      (const unsigned char *)src_yuv_config->y_buffer,
-+                                      src_yuv_config->y_stride, sw, sh,
-+                                      (unsigned char *) dst_yuv_config->y_buffer + YOffset,
-+                                      dst_yuv_config->y_stride, dw, dh);
-+
-+  for (i = 0; i < eh; i++)
-+    duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw);
-+
-+  for (i = dh; i < eh; i++)
-+    duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew);
-+
-+  if (ratio_scalable == 0)
-+    return ratio_scalable;
- 
--    sw = (sw + 1) >> 1;
--    sh = (sh + 1) >> 1;
--    dw = (dw + 1) >> 1;
--    dh = (dh + 1) >> 1;
-+  sw = (sw + 1) >> 1;
-+  sh = (sh + 1) >> 1;
-+  dw = (dw + 1) >> 1;
-+  dh = (dh + 1) >> 1;
- 
--    any_ratio_2d_scale(scale_vars,
--                       (const unsigned char *)src_yuv_config->u_buffer,
--                       src_yuv_config->y_stride / 2, sw, sh,
--                       (unsigned char *)dst_yuv_config->u_buffer + UVOffset,
--                       dst_yuv_config->uv_stride, dw, dh);
-+  any_ratio_2d_scale(scale_vars,
-+                     (const unsigned char *)src_yuv_config->u_buffer,
-+                     src_yuv_config->y_stride / 2, sw, sh,
-+                     (unsigned char *)dst_yuv_config->u_buffer + UVOffset,
-+                     dst_yuv_config->uv_stride, dw, dh);
- 
--    any_ratio_2d_scale(scale_vars,
--                       (const unsigned char *)src_yuv_config->v_buffer,
--                       src_yuv_config->y_stride / 2, sw, sh,
--                       (unsigned char *)dst_yuv_config->v_buffer + UVOffset,
--                       dst_yuv_config->uv_stride, dw, dh);
-+  any_ratio_2d_scale(scale_vars,
-+                     (const unsigned char *)src_yuv_config->v_buffer,
-+                     src_yuv_config->y_stride / 2, sw, sh,
-+                     (unsigned char *)dst_yuv_config->v_buffer + UVOffset,
-+                     dst_yuv_config->uv_stride, dw, dh);
- 
--    return ratio_scalable;
-+  return ratio_scalable;
- }
- 
- /****************************************************************************
-@@ -931,52 +889,48 @@ int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset)
-  *
-  ****************************************************************************/
- static void
--center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config)
--{
--    int i;
--    int row_offset, col_offset;
--    unsigned char *src_data_pointer;
--    unsigned char *dst_data_pointer;
--
--    /* center values */
--    row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2;
--    col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2;
--
--    /* Y's */
--    src_data_pointer = src_yuv_config->y_buffer;
--    dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset;
--
--    for (i = 0; i < src_yuv_config->y_height; i++)
--    {
--        duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width);
--        dst_data_pointer += dst_yuv_config->y_stride;
--        src_data_pointer += src_yuv_config->y_stride;
--    }
--
--    row_offset /= 2;
--    col_offset /= 2;
--
--    /* U's */
--    src_data_pointer = src_yuv_config->u_buffer;
--    dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
--
--    for (i = 0; i < src_yuv_config->uv_height; i++)
--    {
--        duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width);
--        dst_data_pointer += dst_yuv_config->uv_stride;
--        src_data_pointer += src_yuv_config->uv_stride;
--    }
--
--    /* V's */
--    src_data_pointer = src_yuv_config->v_buffer;
--    dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
--
--    for (i = 0; i < src_yuv_config->uv_height; i++)
--    {
--        duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width);
--        dst_data_pointer += dst_yuv_config->uv_stride;
--        src_data_pointer += src_yuv_config->uv_stride;
--    }
-+center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) {
-+  int i;
-+  int row_offset, col_offset;
-+  unsigned char *src_data_pointer;
-+  unsigned char *dst_data_pointer;
-+
-+  /* center values */
-+  row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2;
-+  col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2;
-+
-+  /* Y's */
-+  src_data_pointer = src_yuv_config->y_buffer;
-+  dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset;
-+
-+  for (i = 0; i < src_yuv_config->y_height; i++) {
-+    duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width);
-+    dst_data_pointer += dst_yuv_config->y_stride;
-+    src_data_pointer += src_yuv_config->y_stride;
-+  }
-+
-+  row_offset /= 2;
-+  col_offset /= 2;
-+
-+  /* U's */
-+  src_data_pointer = src_yuv_config->u_buffer;
-+  dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
-+
-+  for (i = 0; i < src_yuv_config->uv_height; i++) {
-+    duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width);
-+    dst_data_pointer += dst_yuv_config->uv_stride;
-+    src_data_pointer += src_yuv_config->uv_stride;
-+  }
-+
-+  /* V's */
-+  src_data_pointer = src_yuv_config->v_buffer;
-+  dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
-+
-+  for (i = 0; i < src_yuv_config->uv_height; i++) {
-+    duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width);
-+    dst_data_pointer += dst_yuv_config->uv_stride;
-+    src_data_pointer += src_yuv_config->uv_stride;
-+  }
- }
- 
- /****************************************************************************
-@@ -999,61 +953,58 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con
- void
- vp8_yv12_scale_or_center
- (
--    YV12_BUFFER_CONFIG *src_yuv_config,
--    YV12_BUFFER_CONFIG *dst_yuv_config,
--    int expanded_frame_width,
--    int expanded_frame_height,
--    int scaling_mode,
--    int HScale,
--    int HRatio,
--    int VScale,
--    int VRatio
--)
--{
--    /*if ( ppi->post_processing_level )
--          update_umvborder ( ppi, frame_buffer );*/
--
--
--    switch (scaling_mode)
--    {
-+  YV12_BUFFER_CONFIG *src_yuv_config,
-+  YV12_BUFFER_CONFIG *dst_yuv_config,
-+  int expanded_frame_width,
-+  int expanded_frame_height,
-+  int scaling_mode,
-+  int HScale,
-+  int HRatio,
-+  int VScale,
-+  int VRatio
-+) {
-+  /*if ( ppi->post_processing_level )
-+        update_umvborder ( ppi, frame_buffer );*/
-+
-+
-+  switch (scaling_mode) {
-     case SCALE_TO_FIT:
--    case MAINTAIN_ASPECT_RATIO:
--    {
--        SCALE_VARS scale_vars;
--        /* center values */
-+    case MAINTAIN_ASPECT_RATIO: {
-+      SCALE_VARS scale_vars;
-+      /* center values */
- #if 1
--        int row = (dst_yuv_config->y_height - expanded_frame_height) / 2;
--        int col = (dst_yuv_config->y_width  - expanded_frame_width) / 2;
--        /*int YOffset  = row * dst_yuv_config->y_width + col;
--        int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/
--        int YOffset  = row * dst_yuv_config->y_stride + col;
--        int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1);
-+      int row = (dst_yuv_config->y_height - expanded_frame_height) / 2;
-+      int col = (dst_yuv_config->y_width  - expanded_frame_width) / 2;
-+      /*int YOffset  = row * dst_yuv_config->y_width + col;
-+      int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/
-+      int YOffset  = row * dst_yuv_config->y_stride + col;
-+      int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1);
- #else
--        int row = (src_yuv_config->y_height - expanded_frame_height) / 2;
--        int col = (src_yuv_config->y_width  - expanded_frame_width) / 2;
--        int YOffset  = row * src_yuv_config->y_width + col;
--        int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1);
-+      int row = (src_yuv_config->y_height - expanded_frame_height) / 2;
-+      int col = (src_yuv_config->y_width  - expanded_frame_width) / 2;
-+      int YOffset  = row * src_yuv_config->y_width + col;
-+      int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1);
- #endif
- 
--        scale_vars.dst_yuv_config = dst_yuv_config;
--        scale_vars.src_yuv_config = src_yuv_config;
--        scale_vars.HScale = HScale;
--        scale_vars.HRatio = HRatio;
--        scale_vars.VScale = VScale;
--        scale_vars.VRatio = VRatio;
--        scale_vars.expanded_frame_width = expanded_frame_width;
--        scale_vars.expanded_frame_height = expanded_frame_height;
-+      scale_vars.dst_yuv_config = dst_yuv_config;
-+      scale_vars.src_yuv_config = src_yuv_config;
-+      scale_vars.HScale = HScale;
-+      scale_vars.HRatio = HRatio;
-+      scale_vars.VScale = VScale;
-+      scale_vars.VRatio = VRatio;
-+      scale_vars.expanded_frame_width = expanded_frame_width;
-+      scale_vars.expanded_frame_height = expanded_frame_height;
- 
--        /* perform center and scale */
--        any_ratio_frame_scale(&scale_vars, YOffset, UVOffset);
-+      /* perform center and scale */
-+      any_ratio_frame_scale(&scale_vars, YOffset, UVOffset);
- 
--        break;
-+      break;
-     }
-     case CENTER:
--        center_image(src_yuv_config, dst_yuv_config);
--        break;
-+      center_image(src_yuv_config, dst_yuv_config);
-+      break;
- 
-     default:
--        break;
--    }
-+      break;
-+  }
- }
-diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
-index eff594e..4cb2a41 100644
---- a/vpx_scale/generic/yv12config.c
-+++ b/vpx_scale/generic/yv12config.c
-@@ -20,81 +20,73 @@
-  *
-  ****************************************************************************/
- int
--vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf)
--{
--    if (ybf)
--    {
--        vpx_free(ybf->buffer_alloc);
--
--        /* buffer_alloc isn't accessed by most functions.  Rather y_buffer,
--          u_buffer and v_buffer point to buffer_alloc and are used.  Clear out
--          all of this so that a freed pointer isn't inadvertently used */
--        vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG));
--    }
--    else
--    {
--        return -1;
--    }
--
--    return 0;
-+vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
-+  if (ybf) {
-+    vpx_free(ybf->buffer_alloc);
-+
-+    /* buffer_alloc isn't accessed by most functions.  Rather y_buffer,
-+      u_buffer and v_buffer point to buffer_alloc and are used.  Clear out
-+      all of this so that a freed pointer isn't inadvertently used */
-+    vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG));
-+  } else {
-+    return -1;
-+  }
-+
-+  return 0;
- }
- 
- /****************************************************************************
-  *
-  ****************************************************************************/
- int
--vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border)
--{
--/*NOTE:*/
--
--    if (ybf)
--    {
--        int y_stride = ((width + 2 * border) + 31) & ~31;
--        int yplane_size = (height + 2 * border) * y_stride;
--        int uv_width = width >> 1;
--        int uv_height = height >> 1;
--        /** There is currently a bunch of code which assumes
--          *  uv_stride == y_stride/2, so enforce this here. */
--        int uv_stride = y_stride >> 1;
--        int uvplane_size = (uv_height + border) * uv_stride;
--
--        vp8_yv12_de_alloc_frame_buffer(ybf);
--
--        /** Only support allocating buffers that have a height and width that
--          *  are multiples of 16, and a border that's a multiple of 32.
--          * The border restriction is required to get 16-byte alignment of the
--          *  start of the chroma rows without intoducing an arbitrary gap
--          *  between planes, which would break the semantics of things like
--          *  vpx_img_set_rect(). */
--        if ((width & 0xf) | (height & 0xf) | (border & 0x1f))
--            return -3;
--
--        ybf->y_width  = width;
--        ybf->y_height = height;
--        ybf->y_stride = y_stride;
--
--        ybf->uv_width = uv_width;
--        ybf->uv_height = uv_height;
--        ybf->uv_stride = uv_stride;
--
--        ybf->border = border;
--        ybf->frame_size = yplane_size + 2 * uvplane_size;
--
--        ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
--
--        if (ybf->buffer_alloc == NULL)
--            return -1;
--
--        ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
--        ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2  * uv_stride) + border / 2;
--        ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2  * uv_stride) + border / 2;
--
--        ybf->corrupted = 0; /* assume not currupted by errors */
--    }
--    else
--    {
--        return -2;
--    }
--
--    return 0;
-+vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) {
-+  /*NOTE:*/
-+
-+  if (ybf) {
-+    int y_stride = ((width + 2 * border) + 31) & ~31;
-+    int yplane_size = (height + 2 * border) * y_stride;
-+    int uv_width = width >> 1;
-+    int uv_height = height >> 1;
-+    /** There is currently a bunch of code which assumes
-+      *  uv_stride == y_stride/2, so enforce this here. */
-+    int uv_stride = y_stride >> 1;
-+    int uvplane_size = (uv_height + border) * uv_stride;
-+
-+    vp8_yv12_de_alloc_frame_buffer(ybf);
-+
-+    /** Only support allocating buffers that have a height and width that
-+      *  are multiples of 16, and a border that's a multiple of 32.
-+      * The border restriction is required to get 16-byte alignment of the
-+      *  start of the chroma rows without intoducing an arbitrary gap
-+      *  between planes, which would break the semantics of things like
-+      *  vpx_img_set_rect(). */
-+    if ((width & 0xf) | (height & 0xf) | (border & 0x1f))
-+      return -3;
-+
-+    ybf->y_width  = width;
-+    ybf->y_height = height;
-+    ybf->y_stride = y_stride;
-+
-+    ybf->uv_width = uv_width;
-+    ybf->uv_height = uv_height;
-+    ybf->uv_stride = uv_stride;
-+
-+    ybf->border = border;
-+    ybf->frame_size = yplane_size + 2 * uvplane_size;
-+
-+    ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
-+
-+    if (ybf->buffer_alloc == NULL)
-+      return -1;
-+
-+    ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
-+    ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2  * uv_stride) + border / 2;
-+    ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2  * uv_stride) + border / 2;
-+
-+    ybf->corrupted = 0; /* assume not currupted by errors */
-+  } else {
-+    return -2;
-+  }
-+
-+  return 0;
- }
-diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
-index 638633b..247078c 100644
---- a/vpx_scale/generic/yv12extend.c
-+++ b/vpx_scale/generic/yv12extend.c
-@@ -21,184 +21,174 @@
-  *
-  ****************************************************************************/
- void
--vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf)
--{
--    int i;
--    unsigned char *src_ptr1, *src_ptr2;
--    unsigned char *dest_ptr1, *dest_ptr2;
--
--    unsigned int Border;
--    int plane_stride;
--    int plane_height;
--    int plane_width;
--
--    /***********/
--    /* Y Plane */
--    /***********/
--    Border = ybf->border;
--    plane_stride = ybf->y_stride;
--    plane_height = ybf->y_height;
--    plane_width = ybf->y_width;
--
--    /* copy the left and right most columns out */
--    src_ptr1 = ybf->y_buffer;
--    src_ptr2 = src_ptr1 + plane_width - 1;
--    dest_ptr1 = src_ptr1 - Border;
--    dest_ptr2 = src_ptr2 + 1;
--
--    for (i = 0; i < plane_height; i++)
--    {
--        vpx_memset(dest_ptr1, src_ptr1[0], Border);
--        vpx_memset(dest_ptr2, src_ptr2[0], Border);
--        src_ptr1  += plane_stride;
--        src_ptr2  += plane_stride;
--        dest_ptr1 += plane_stride;
--        dest_ptr2 += plane_stride;
--    }
--
--    /* Now copy the top and bottom source lines into each line of the respective borders */
--    src_ptr1 = ybf->y_buffer - Border;
--    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
--    dest_ptr1 = src_ptr1 - (Border * plane_stride);
--    dest_ptr2 = src_ptr2 + plane_stride;
--
--    for (i = 0; i < (int)Border; i++)
--    {
--        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
--        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
--        dest_ptr1 += plane_stride;
--        dest_ptr2 += plane_stride;
--    }
--
--
--    /***********/
--    /* U Plane */
--    /***********/
--    plane_stride = ybf->uv_stride;
--    plane_height = ybf->uv_height;
--    plane_width = ybf->uv_width;
--    Border /= 2;
--
--    /* copy the left and right most columns out */
--    src_ptr1 = ybf->u_buffer;
--    src_ptr2 = src_ptr1 + plane_width - 1;
--    dest_ptr1 = src_ptr1 - Border;
--    dest_ptr2 = src_ptr2 + 1;
--
--    for (i = 0; i < plane_height; i++)
--    {
--        vpx_memset(dest_ptr1, src_ptr1[0], Border);
--        vpx_memset(dest_ptr2, src_ptr2[0], Border);
--        src_ptr1  += plane_stride;
--        src_ptr2  += plane_stride;
--        dest_ptr1 += plane_stride;
--        dest_ptr2 += plane_stride;
--    }
--
--    /* Now copy the top and bottom source lines into each line of the respective borders */
--    src_ptr1 = ybf->u_buffer - Border;
--    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
--    dest_ptr1 = src_ptr1 - (Border * plane_stride);
--    dest_ptr2 = src_ptr2 + plane_stride;
--
--    for (i = 0; i < (int)(Border); i++)
--    {
--        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
--        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
--        dest_ptr1 += plane_stride;
--        dest_ptr2 += plane_stride;
--    }
--
--    /***********/
--    /* V Plane */
--    /***********/
--
--    /* copy the left and right most columns out */
--    src_ptr1 = ybf->v_buffer;
--    src_ptr2 = src_ptr1 + plane_width - 1;
--    dest_ptr1 = src_ptr1 - Border;
--    dest_ptr2 = src_ptr2 + 1;
--
--    for (i = 0; i < plane_height; i++)
--    {
--        vpx_memset(dest_ptr1, src_ptr1[0], Border);
--        vpx_memset(dest_ptr2, src_ptr2[0], Border);
--        src_ptr1  += plane_stride;
--        src_ptr2  += plane_stride;
--        dest_ptr1 += plane_stride;
--        dest_ptr2 += plane_stride;
--    }
--
--    /* Now copy the top and bottom source lines into each line of the respective borders */
--    src_ptr1 = ybf->v_buffer - Border;
--    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
--    dest_ptr1 = src_ptr1 - (Border * plane_stride);
--    dest_ptr2 = src_ptr2 + plane_stride;
--
--    for (i = 0; i < (int)(Border); i++)
--    {
--        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
--        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
--        dest_ptr1 += plane_stride;
--        dest_ptr2 += plane_stride;
--    }
-+vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
-+  int i;
-+  unsigned char *src_ptr1, *src_ptr2;
-+  unsigned char *dest_ptr1, *dest_ptr2;
-+
-+  unsigned int Border;
-+  int plane_stride;
-+  int plane_height;
-+  int plane_width;
-+
-+  /***********/
-+  /* Y Plane */
-+  /***********/
-+  Border = ybf->border;
-+  plane_stride = ybf->y_stride;
-+  plane_height = ybf->y_height;
-+  plane_width = ybf->y_width;
-+
-+  /* copy the left and right most columns out */
-+  src_ptr1 = ybf->y_buffer;
-+  src_ptr2 = src_ptr1 + plane_width - 1;
-+  dest_ptr1 = src_ptr1 - Border;
-+  dest_ptr2 = src_ptr2 + 1;
-+
-+  for (i = 0; i < plane_height; i++) {
-+    vpx_memset(dest_ptr1, src_ptr1[0], Border);
-+    vpx_memset(dest_ptr2, src_ptr2[0], Border);
-+    src_ptr1  += plane_stride;
-+    src_ptr2  += plane_stride;
-+    dest_ptr1 += plane_stride;
-+    dest_ptr2 += plane_stride;
-+  }
-+
-+  /* Now copy the top and bottom source lines into each line of the respective borders */
-+  src_ptr1 = ybf->y_buffer - Border;
-+  src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-+  dest_ptr1 = src_ptr1 - (Border * plane_stride);
-+  dest_ptr2 = src_ptr2 + plane_stride;
-+
-+  for (i = 0; i < (int)Border; i++) {
-+    vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-+    vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-+    dest_ptr1 += plane_stride;
-+    dest_ptr2 += plane_stride;
-+  }
-+
-+
-+  /***********/
-+  /* U Plane */
-+  /***********/
-+  plane_stride = ybf->uv_stride;
-+  plane_height = ybf->uv_height;
-+  plane_width = ybf->uv_width;
-+  Border /= 2;
-+
-+  /* copy the left and right most columns out */
-+  src_ptr1 = ybf->u_buffer;
-+  src_ptr2 = src_ptr1 + plane_width - 1;
-+  dest_ptr1 = src_ptr1 - Border;
-+  dest_ptr2 = src_ptr2 + 1;
-+
-+  for (i = 0; i < plane_height; i++) {
-+    vpx_memset(dest_ptr1, src_ptr1[0], Border);
-+    vpx_memset(dest_ptr2, src_ptr2[0], Border);
-+    src_ptr1  += plane_stride;
-+    src_ptr2  += plane_stride;
-+    dest_ptr1 += plane_stride;
-+    dest_ptr2 += plane_stride;
-+  }
-+
-+  /* Now copy the top and bottom source lines into each line of the respective borders */
-+  src_ptr1 = ybf->u_buffer - Border;
-+  src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-+  dest_ptr1 = src_ptr1 - (Border * plane_stride);
-+  dest_ptr2 = src_ptr2 + plane_stride;
-+
-+  for (i = 0; i < (int)(Border); i++) {
-+    vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-+    vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-+    dest_ptr1 += plane_stride;
-+    dest_ptr2 += plane_stride;
-+  }
-+
-+  /***********/
-+  /* V Plane */
-+  /***********/
-+
-+  /* copy the left and right most columns out */
-+  src_ptr1 = ybf->v_buffer;
-+  src_ptr2 = src_ptr1 + plane_width - 1;
-+  dest_ptr1 = src_ptr1 - Border;
-+  dest_ptr2 = src_ptr2 + 1;
-+
-+  for (i = 0; i < plane_height; i++) {
-+    vpx_memset(dest_ptr1, src_ptr1[0], Border);
-+    vpx_memset(dest_ptr2, src_ptr2[0], Border);
-+    src_ptr1  += plane_stride;
-+    src_ptr2  += plane_stride;
-+    dest_ptr1 += plane_stride;
-+    dest_ptr2 += plane_stride;
-+  }
-+
-+  /* Now copy the top and bottom source lines into each line of the respective borders */
-+  src_ptr1 = ybf->v_buffer - Border;
-+  src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-+  dest_ptr1 = src_ptr1 - (Border * plane_stride);
-+  dest_ptr2 = src_ptr2 + plane_stride;
-+
-+  for (i = 0; i < (int)(Border); i++) {
-+    vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-+    vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-+    dest_ptr1 += plane_stride;
-+    dest_ptr2 += plane_stride;
-+  }
- }
- 
- 
- static void
--extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf)
--{
--    int i;
--    unsigned char *src_ptr1, *src_ptr2;
--    unsigned char *dest_ptr1, *dest_ptr2;
--
--    unsigned int Border;
--    int plane_stride;
--    int plane_height;
--    int plane_width;
--
--    /***********/
--    /* Y Plane */
--    /***********/
--    Border = ybf->border;
--    plane_stride = ybf->y_stride;
--    plane_height = ybf->y_height;
--    plane_width = ybf->y_width;
--
--    /* copy the left and right most columns out */
--    src_ptr1 = ybf->y_buffer;
--    src_ptr2 = src_ptr1 + plane_width - 1;
--    dest_ptr1 = src_ptr1 - Border;
--    dest_ptr2 = src_ptr2 + 1;
--
--    for (i = 0; i < plane_height; i++)
--    {
--        vpx_memset(dest_ptr1, src_ptr1[0], Border);
--        vpx_memset(dest_ptr2, src_ptr2[0], Border);
--        src_ptr1  += plane_stride;
--        src_ptr2  += plane_stride;
--        dest_ptr1 += plane_stride;
--        dest_ptr2 += plane_stride;
--    }
--
--    /* Now copy the top and bottom source lines into each line of the respective borders */
--    src_ptr1 = ybf->y_buffer - Border;
--    src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
--    dest_ptr1 = src_ptr1 - (Border * plane_stride);
--    dest_ptr2 = src_ptr2 + plane_stride;
--
--    for (i = 0; i < (int)Border; i++)
--    {
--        vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
--        vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
--        dest_ptr1 += plane_stride;
--        dest_ptr2 += plane_stride;
--    }
--
--    plane_stride /= 2;
--    plane_height /= 2;
--    plane_width /= 2;
--    Border /= 2;
-+extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) {
-+  int i;
-+  unsigned char *src_ptr1, *src_ptr2;
-+  unsigned char *dest_ptr1, *dest_ptr2;
-+
-+  unsigned int Border;
-+  int plane_stride;
-+  int plane_height;
-+  int plane_width;
-+
-+  /***********/
-+  /* Y Plane */
-+  /***********/
-+  Border = ybf->border;
-+  plane_stride = ybf->y_stride;
-+  plane_height = ybf->y_height;
-+  plane_width = ybf->y_width;
-+
-+  /* copy the left and right most columns out */
-+  src_ptr1 = ybf->y_buffer;
-+  src_ptr2 = src_ptr1 + plane_width - 1;
-+  dest_ptr1 = src_ptr1 - Border;
-+  dest_ptr2 = src_ptr2 + 1;
-+
-+  for (i = 0; i < plane_height; i++) {
-+    vpx_memset(dest_ptr1, src_ptr1[0], Border);
-+    vpx_memset(dest_ptr2, src_ptr2[0], Border);
-+    src_ptr1  += plane_stride;
-+    src_ptr2  += plane_stride;
-+    dest_ptr1 += plane_stride;
-+    dest_ptr2 += plane_stride;
-+  }
-+
-+  /* Now copy the top and bottom source lines into each line of the respective borders */
-+  src_ptr1 = ybf->y_buffer - Border;
-+  src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
-+  dest_ptr1 = src_ptr1 - (Border * plane_stride);
-+  dest_ptr2 = src_ptr2 + plane_stride;
-+
-+  for (i = 0; i < (int)Border; i++) {
-+    vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
-+    vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
-+    dest_ptr1 += plane_stride;
-+    dest_ptr2 += plane_stride;
-+  }
-+
-+  plane_stride /= 2;
-+  plane_height /= 2;
-+  plane_width /= 2;
-+  Border /= 2;
- 
- }
- 
-@@ -221,57 +211,53 @@ extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf)
-  *
-  ****************************************************************************/
- void
--vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
--{
--    int row;
--    unsigned char *source, *dest;
--
--    source = src_ybc->y_buffer;
--    dest = dst_ybc->y_buffer;
--
--    for (row = 0; row < src_ybc->y_height; row++)
--    {
--        vpx_memcpy(dest, source, src_ybc->y_width);
--        source += src_ybc->y_stride;
--        dest   += dst_ybc->y_stride;
--    }
--
--    source = src_ybc->u_buffer;
--    dest = dst_ybc->u_buffer;
--
--    for (row = 0; row < src_ybc->uv_height; row++)
--    {
--        vpx_memcpy(dest, source, src_ybc->uv_width);
--        source += src_ybc->uv_stride;
--        dest   += dst_ybc->uv_stride;
--    }
--
--    source = src_ybc->v_buffer;
--    dest = dst_ybc->v_buffer;
--
--    for (row = 0; row < src_ybc->uv_height; row++)
--    {
--        vpx_memcpy(dest, source, src_ybc->uv_width);
--        source += src_ybc->uv_stride;
--        dest   += dst_ybc->uv_stride;
--    }
--
--    vp8_yv12_extend_frame_borders_c(dst_ybc);
-+vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc,
-+                      YV12_BUFFER_CONFIG *dst_ybc) {
-+  int row;
-+  unsigned char *source, *dest;
-+
-+  source = src_ybc->y_buffer;
-+  dest = dst_ybc->y_buffer;
-+
-+  for (row = 0; row < src_ybc->y_height; row++) {
-+    vpx_memcpy(dest, source, src_ybc->y_width);
-+    source += src_ybc->y_stride;
-+    dest   += dst_ybc->y_stride;
-+  }
-+
-+  source = src_ybc->u_buffer;
-+  dest = dst_ybc->u_buffer;
-+
-+  for (row = 0; row < src_ybc->uv_height; row++) {
-+    vpx_memcpy(dest, source, src_ybc->uv_width);
-+    source += src_ybc->uv_stride;
-+    dest   += dst_ybc->uv_stride;
-+  }
-+
-+  source = src_ybc->v_buffer;
-+  dest = dst_ybc->v_buffer;
-+
-+  for (row = 0; row < src_ybc->uv_height; row++) {
-+    vpx_memcpy(dest, source, src_ybc->uv_width);
-+    source += src_ybc->uv_stride;
-+    dest   += dst_ybc->uv_stride;
-+  }
-+
-+  vp8_yv12_extend_frame_borders_c(dst_ybc);
- }
- 
--void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
--{
--    int row;
--    unsigned char *source, *dest;
-+void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc,
-+                       YV12_BUFFER_CONFIG *dst_ybc) {
-+  int row;
-+  unsigned char *source, *dest;
- 
- 
--    source = src_ybc->y_buffer;
--    dest = dst_ybc->y_buffer;
-+  source = src_ybc->y_buffer;
-+  dest = dst_ybc->y_buffer;
- 
--    for (row = 0; row < src_ybc->y_height; row++)
--    {
--        vpx_memcpy(dest, source, src_ybc->y_width);
--        source += src_ybc->y_stride;
--        dest   += dst_ybc->y_stride;
--    }
-+  for (row = 0; row < src_ybc->y_height; row++) {
-+    vpx_memcpy(dest, source, src_ybc->y_width);
-+    source += src_ybc->y_stride;
-+    dest   += dst_ybc->y_stride;
-+  }
- }
-diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h
-index 39de181..c535252 100644
---- a/vpx_scale/include/generic/vpxscale_arbitrary.h
-+++ b/vpx_scale/include/generic/vpxscale_arbitrary.h
-@@ -14,33 +14,32 @@
- 
- #include "vpx_scale/yv12config.h"
- 
--typedef struct
--{
--    int in_width;
--    int in_height;
--
--    int out_width;
--    int out_height;
--    int max_usable_out_width;
--
--    // numerator for the width and height
--    int nw;
--    int nh;
--    int nh_uv;
--
--    // output to input correspondance array
--    short *l_w;
--    short *l_h;
--    short *l_h_uv;
--
--    // polyphase coefficients
--    short *c_w;
--    short *c_h;
--    short *c_h_uv;
--
--    // buffer for horizontal filtering.
--    unsigned char *hbuf;
--    unsigned char *hbuf_uv;
-+typedef struct {
-+  int in_width;
-+  int in_height;
-+
-+  int out_width;
-+  int out_height;
-+  int max_usable_out_width;
-+
-+  // numerator for the width and height
-+  int nw;
-+  int nh;
-+  int nh_uv;
-+
-+  // output to input correspondance array
-+  short *l_w;
-+  short *l_h;
-+  short *l_h_uv;
-+
-+  // polyphase coefficients
-+  short *c_w;
-+  short *c_h;
-+  short *c_h_uv;
-+
-+  // buffer for horizontal filtering.
-+  unsigned char *hbuf;
-+  unsigned char *hbuf_uv;
- } BICUBIC_SCALER_STRUCT;
- 
- int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height);
-diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h
-index 1476e64..5581385 100644
---- a/vpx_scale/scale_mode.h
-+++ b/vpx_scale/scale_mode.h
-@@ -17,12 +17,11 @@
- #ifndef SCALE_MODE_H
- #define SCALE_MODE_H
- 
--typedef enum
--{
--    MAINTAIN_ASPECT_RATIO   = 0x0,
--    SCALE_TO_FIT            = 0x1,
--    CENTER                  = 0x2,
--    OTHER                   = 0x3
-+typedef enum {
-+  MAINTAIN_ASPECT_RATIO   = 0x0,
-+  SCALE_TO_FIT            = 0x1,
-+  CENTER                  = 0x2,
-+  OTHER                   = 0x3
- } SCALE_MODE;
- 
- 
-diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h
-index 8919a24..3c2194d 100644
---- a/vpx_scale/vpxscale.h
-+++ b/vpx_scale/vpxscale.h
-@@ -14,29 +14,24 @@
- 
- #include "vpx_scale/yv12config.h"
- 
--extern void vp8_yv12_scale_or_center
--(
--    YV12_BUFFER_CONFIG *src_yuv_config,
--    YV12_BUFFER_CONFIG *dst_yuv_config,
--    int expanded_frame_width,
--    int expanded_frame_height,
--    int scaling_mode,
--    int HScale,
--    int HRatio,
--    int VScale,
--    int VRatio
--);
--extern void vp8_scale_frame
--(
--    YV12_BUFFER_CONFIG *src,
--    YV12_BUFFER_CONFIG *dst,
--    unsigned char *temp_area,
--    unsigned char temp_height,
--    unsigned int hscale,
--    unsigned int hratio,
--    unsigned int vscale,
--    unsigned int vratio,
--    unsigned int interlaced
--);
-+extern void vp8_yv12_scale_or_center(YV12_BUFFER_CONFIG *src_yuv_config,
-+                                     YV12_BUFFER_CONFIG *dst_yuv_config,
-+                                     int expanded_frame_width,
-+                                     int expanded_frame_height,
-+                                     int scaling_mode,
-+                                     int HScale,
-+                                     int HRatio,
-+                                     int VScale,
-+                                     int VRatio);
-+
-+extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src,
-+                            YV12_BUFFER_CONFIG *dst,
-+                            unsigned char *temp_area,
-+                            unsigned char temp_height,
-+                            unsigned int hscale,
-+                            unsigned int hratio,
-+                            unsigned int vscale,
-+                            unsigned int vratio,
-+                            unsigned int interlaced);
- 
- #endif
-diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c
-index 3711fe5..2d96cc7 100644
---- a/vpx_scale/win32/scaleopt.c
-+++ b/vpx_scale/win32/scaleopt.c
-@@ -61,114 +61,112 @@ __declspec(align(16)) const static unsigned short const35_1[] = { 102, 205,  51,
- static
- void horizontal_line_3_5_scale_mmx
- (
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    (void) dest_width;
-+  const unsigned char *source,
-+  unsigned int source_width,
-+  unsigned char *dest,
-+  unsigned int dest_width
-+) {
-+  (void) dest_width;
- 
--    __asm
--    {
-+  __asm {
- 
--        push ebx
-+    push ebx
- 
--        mov         esi,    source
--        mov         edi,    dest
-+    mov         esi,    source
-+    mov         edi,    dest
- 
--        mov         ecx,    source_width
--        lea         edx,    [esi+ecx-3];
-+    mov         ecx,    source_width
-+    lea         edx,    [esi+ecx-3];
- 
--        movq        mm5,    const35_1       // mm5 = 66 xx cd xx 33 xx 9a xx
--        movq        mm6,    const35_2       // mm6 = 9a xx 33 xx cd xx 66 xx
-+    movq        mm5,    const35_1       // mm5 = 66 xx cd xx 33 xx 9a xx
-+    movq        mm6,    const35_2       // mm6 = 9a xx 33 xx cd xx 66 xx
- 
--        movq        mm4,    round_values     // mm4 = 80 xx 80 xx 80 xx 80 xx
--        pxor        mm7,    mm7             // clear mm7
-+    movq        mm4,    round_values     // mm4 = 80 xx 80 xx 80 xx 80 xx
-+    pxor        mm7,    mm7             // clear mm7
- 
--        horiz_line_3_5_loop:
-+    horiz_line_3_5_loop:
- 
--        mov        eax,    DWORD PTR [esi] // eax = 00 01 02 03
--        mov        ebx,    eax
-+    mov        eax,    DWORD PTR [esi] // eax = 00 01 02 03
-+    mov        ebx,    eax
- 
--        and         ebx,    0xffff00        // ebx = xx 01 02 xx
--        mov         ecx,    eax             // ecx = 00 01 02 03
-+    and         ebx,    0xffff00        // ebx = xx 01 02 xx
-+    mov         ecx,    eax             // ecx = 00 01 02 03
- 
--        and         eax,    0xffff0000      // eax = xx xx 02 03
--        xor         ecx,    eax             // ecx = 00 01 xx xx
-+    and         eax,    0xffff0000      // eax = xx xx 02 03
-+    xor         ecx,    eax             // ecx = 00 01 xx xx
- 
--        shr         ebx,    8               // ebx = 01 02 xx xx
--        or          eax,    ebx             // eax = 01 02 02 03
-+    shr         ebx,    8               // ebx = 01 02 xx xx
-+    or          eax,    ebx             // eax = 01 02 02 03
- 
--        shl         ebx,    16              // ebx = xx xx 01 02
--        movd        mm1,    eax             // mm1 = 01 02 02 03 xx xx xx xx
-+    shl         ebx,    16              // ebx = xx xx 01 02
-+    movd        mm1,    eax             // mm1 = 01 02 02 03 xx xx xx xx
- 
--        or          ebx,    ecx             // ebx = 00 01 01 02
--        punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 02 xx 03 xx
-+    or          ebx,    ecx             // ebx = 00 01 01 02
-+    punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 02 xx 03 xx
- 
--        movd        mm0,    ebx             // mm0 = 00 01 01 02
--        pmullw      mm1,    mm6             //
-+    movd        mm0,    ebx             // mm0 = 00 01 01 02
-+    pmullw      mm1,    mm6             //
- 
--        punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 01 xx 02 xx
--        pmullw      mm0,    mm5             //
-+    punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 01 xx 02 xx
-+    pmullw      mm0,    mm5             //
- 
--        mov         [edi],  ebx             // writeoutput 00 xx xx xx
--        add         esi,    3
-+    mov         [edi],  ebx             // writeoutput 00 xx xx xx
-+    add         esi,    3
- 
--        add         edi,    5
--        paddw       mm0,    mm1
-+    add         edi,    5
-+    paddw       mm0,    mm1
- 
--        paddw       mm0,    mm4
--        psrlw       mm0,    8
-+    paddw       mm0,    mm4
-+    psrlw       mm0,    8
- 
--        cmp         esi,    edx
--        packuswb    mm0,    mm7
-+    cmp         esi,    edx
-+    packuswb    mm0,    mm7
- 
--        movd        DWORD Ptr [edi-4], mm0
--        jl          horiz_line_3_5_loop
-+    movd        DWORD Ptr [edi-4], mm0
-+    jl          horiz_line_3_5_loop
- 
--//Exit:
--        mov         eax,    DWORD PTR [esi] // eax = 00 01 02 03
--        mov         ebx,    eax
-+// Exit:
-+    mov         eax,    DWORD PTR [esi] // eax = 00 01 02 03
-+    mov         ebx,    eax
- 
--        and         ebx,    0xffff00        // ebx = xx 01 02 xx
--        mov         ecx,    eax             // ecx = 00 01 02 03
-+    and         ebx,    0xffff00        // ebx = xx 01 02 xx
-+    mov         ecx,    eax             // ecx = 00 01 02 03
- 
--        and         eax,    0xffff0000      // eax = xx xx 02 03
--        xor         ecx,    eax             // ecx = 00 01 xx xx
-+    and         eax,    0xffff0000      // eax = xx xx 02 03
-+    xor         ecx,    eax             // ecx = 00 01 xx xx
- 
--        shr         ebx,    8               // ebx = 01 02 xx xx
--        or          eax,    ebx             // eax = 01 02 02 03
-+    shr         ebx,    8               // ebx = 01 02 xx xx
-+    or          eax,    ebx             // eax = 01 02 02 03
- 
--        shl         eax,    8               // eax = xx 01 02 02
--        and         eax,    0xffff0000      // eax = xx xx 02 02
-+    shl         eax,    8               // eax = xx 01 02 02
-+    and         eax,    0xffff0000      // eax = xx xx 02 02
- 
--        or          eax,    ebx             // eax = 01 02 02 02
-+    or          eax,    ebx             // eax = 01 02 02 02
- 
--        shl         ebx,    16              // ebx = xx xx 01 02
--        movd        mm1,    eax             // mm1 = 01 02 02 02 xx xx xx xx
-+    shl         ebx,    16              // ebx = xx xx 01 02
-+    movd        mm1,    eax             // mm1 = 01 02 02 02 xx xx xx xx
- 
--        or          ebx,    ecx             // ebx = 00 01 01 02
--        punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 02 xx 02 xx
-+    or          ebx,    ecx             // ebx = 00 01 01 02
-+    punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 02 xx 02 xx
- 
--        movd        mm0,    ebx             // mm0 = 00 01 01 02
--        pmullw      mm1,    mm6             //
-+    movd        mm0,    ebx             // mm0 = 00 01 01 02
-+    pmullw      mm1,    mm6             //
- 
--        punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 01 xx 02 xx
--        pmullw      mm0,    mm5             //
-+    punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 01 xx 02 xx
-+    pmullw      mm0,    mm5             //
- 
--        mov         [edi],  ebx             // writeoutput 00 xx xx xx
--        paddw       mm0,    mm1
-+    mov         [edi],  ebx             // writeoutput 00 xx xx xx
-+    paddw       mm0,    mm1
- 
--        paddw       mm0,    mm4
--        psrlw       mm0,    8
-+    paddw       mm0,    mm4
-+    psrlw       mm0,    8
- 
--        packuswb    mm0,    mm7
--        movd        DWORD Ptr [edi+1], mm0
-+    packuswb    mm0,    mm7
-+    movd        DWORD Ptr [edi+1], mm0
- 
--        pop ebx
-+    pop ebx
- 
--    }
-+  }
- 
- }
- 
-@@ -194,120 +192,118 @@ void horizontal_line_3_5_scale_mmx
- static
- void horizontal_line_4_5_scale_mmx
- (
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    (void)dest_width;
-+  const unsigned char *source,
-+  unsigned int source_width,
-+  unsigned char *dest,
-+  unsigned int dest_width
-+) {
-+  (void)dest_width;
- 
--    __asm
--    {
-+  __asm {
- 
--        mov         esi,    source
--        mov         edi,    dest
-+    mov         esi,    source
-+    mov         edi,    dest
- 
--        mov         ecx,    source_width
--        lea         edx,    [esi+ecx-8];
-+    mov         ecx,    source_width
-+    lea         edx,    [esi+ecx-8];
- 
--        movq        mm5,    const45_1       // mm5 = 33 xx 66 xx 9a xx cd xx
--        movq        mm6,    const45_2       // mm6 = cd xx 9a xx 66 xx 33 xx
-+    movq        mm5,    const45_1       // mm5 = 33 xx 66 xx 9a xx cd xx
-+    movq        mm6,    const45_2       // mm6 = cd xx 9a xx 66 xx 33 xx
- 
--        movq        mm4,    round_values     // mm4 = 80 xx 80 xx 80 xx 80 xx
--        pxor        mm7,    mm7             // clear mm7
-+    movq        mm4,    round_values     // mm4 = 80 xx 80 xx 80 xx 80 xx
-+    pxor        mm7,    mm7             // clear mm7
- 
--        horiz_line_4_5_loop:
-+    horiz_line_4_5_loop:
- 
--        movq        mm0,    QWORD PTR [esi]           // mm0 = 00 01 02 03 04 05 06 07
--        movq        mm1,    QWORD PTR [esi+1];        // mm1 = 01 02 03 04 05 06 07 08
-+    movq        mm0,    QWORD PTR [esi]           // mm0 = 00 01 02 03 04 05 06 07
-+    movq        mm1,    QWORD PTR [esi+1];        // mm1 = 01 02 03 04 05 06 07 08
- 
--        movq        mm2,    mm0             // mm2 = 00 01 02 03 04 05 06 07
--        movq        mm3,    mm1             // mm3 = 01 02 03 04 05 06 07 08
-+    movq        mm2,    mm0             // mm2 = 00 01 02 03 04 05 06 07
-+    movq        mm3,    mm1             // mm3 = 01 02 03 04 05 06 07 08
- 
--        movd        DWORD PTR [edi],  mm0             // write output 00 xx xx xx
--        punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 02 xx 03 xx
-+    movd        DWORD PTR [edi],  mm0             // write output 00 xx xx xx
-+    punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 02 xx 03 xx
- 
--        punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 03 xx 04 xx
--        pmullw      mm0,    mm5             // 00* 51 01*102 02*154 03*205
-+    punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 03 xx 04 xx
-+    pmullw      mm0,    mm5             // 00* 51 01*102 02*154 03*205
- 
--        pmullw      mm1,    mm6             // 01*205 02*154 03*102 04* 51
--        punpckhbw   mm2,    mm7             // mm2 = 04 xx 05 xx 06 xx 07 xx
-+    pmullw      mm1,    mm6             // 01*205 02*154 03*102 04* 51
-+    punpckhbw   mm2,    mm7             // mm2 = 04 xx 05 xx 06 xx 07 xx
- 
--        movd        DWORD PTR [edi+5], mm2            // write ouput 05 xx xx xx
--        pmullw      mm2,    mm5             // 04* 51 05*102 06*154 07*205
-+    movd        DWORD PTR [edi+5], mm2            // write ouput 05 xx xx xx
-+    pmullw      mm2,    mm5             // 04* 51 05*102 06*154 07*205
- 
--        punpckhbw   mm3,    mm7             // mm3 = 05 xx 06 xx 07 xx 08 xx
--        pmullw      mm3,    mm6             // 05*205 06*154 07*102 08* 51
-+    punpckhbw   mm3,    mm7             // mm3 = 05 xx 06 xx 07 xx 08 xx
-+    pmullw      mm3,    mm6             // 05*205 06*154 07*102 08* 51
- 
--        paddw       mm0,    mm1             // added round values
--        paddw       mm0,    mm4
-+    paddw       mm0,    mm1             // added round values
-+    paddw       mm0,    mm4
- 
--        psrlw       mm0,    8               // output: 01 xx 02 xx 03 xx 04 xx
--        packuswb    mm0,    mm7
-+    psrlw       mm0,    8               // output: 01 xx 02 xx 03 xx 04 xx
-+    packuswb    mm0,    mm7
- 
--        movd        DWORD PTR [edi+1], mm0  // write output 01 02 03 04
--        add         edi,    10
-+    movd        DWORD PTR [edi+1], mm0  // write output 01 02 03 04
-+    add         edi,    10
- 
--        add         esi,    8
--        paddw       mm2,    mm3             //
-+    add         esi,    8
-+    paddw       mm2,    mm3             //
- 
--        paddw       mm2,    mm4             // added round values
--        cmp         esi,    edx
-+    paddw       mm2,    mm4             // added round values
-+    cmp         esi,    edx
- 
--        psrlw       mm2,    8
--        packuswb    mm2,    mm7
-+    psrlw       mm2,    8
-+    packuswb    mm2,    mm7
- 
--        movd        DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09
--        jl         horiz_line_4_5_loop
-+    movd        DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09
-+    jl         horiz_line_4_5_loop
- 
--//Exit:
--        movq        mm0,    [esi]           // mm0 = 00 01 02 03 04 05 06 07
--        movq        mm1,    mm0             // mm1 = 00 01 02 03 04 05 06 07
-+// Exit:
-+    movq        mm0,    [esi]           // mm0 = 00 01 02 03 04 05 06 07
-+    movq        mm1,    mm0             // mm1 = 00 01 02 03 04 05 06 07
- 
--        movq        mm2,    mm0             // mm2 = 00 01 02 03 04 05 06 07
--        psrlq       mm1,    8               // mm1 = 01 02 03 04 05 06 07 00
-+    movq        mm2,    mm0             // mm2 = 00 01 02 03 04 05 06 07
-+    psrlq       mm1,    8               // mm1 = 01 02 03 04 05 06 07 00
- 
--        movq        mm3,    mask45          // mm3 = 00 00 00 00 00 00 ff 00
--        pand        mm3,    mm1             // mm3 = 00 00 00 00 00 00 07 00
-+    movq        mm3,    mask45          // mm3 = 00 00 00 00 00 00 ff 00
-+    pand        mm3,    mm1             // mm3 = 00 00 00 00 00 00 07 00
- 
--        psllq       mm3,    8               // mm3 = 00 00 00 00 00 00 00 07
--        por         mm1,    mm3             // mm1 = 01 02 03 04 05 06 07 07
-+    psllq       mm3,    8               // mm3 = 00 00 00 00 00 00 00 07
-+    por         mm1,    mm3             // mm1 = 01 02 03 04 05 06 07 07
- 
--        movq        mm3,    mm1
-+    movq        mm3,    mm1
- 
--        movd        DWORD PTR [edi],  mm0   // write output 00 xx xx xx
--        punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 02 xx 03 xx
-+    movd        DWORD PTR [edi],  mm0   // write output 00 xx xx xx
-+    punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 02 xx 03 xx
- 
--        punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 03 xx 04 xx
--        pmullw      mm0,    mm5             // 00* 51 01*102 02*154 03*205
-+    punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 03 xx 04 xx
-+    pmullw      mm0,    mm5             // 00* 51 01*102 02*154 03*205
- 
--        pmullw      mm1,    mm6             // 01*205 02*154 03*102 04* 51
--        punpckhbw   mm2,    mm7             // mm2 = 04 xx 05 xx 06 xx 07 xx
-+    pmullw      mm1,    mm6             // 01*205 02*154 03*102 04* 51
-+    punpckhbw   mm2,    mm7             // mm2 = 04 xx 05 xx 06 xx 07 xx
- 
--        movd        DWORD PTR [edi+5], mm2  // write ouput 05 xx xx xx
--        pmullw      mm2,    mm5             // 04* 51 05*102 06*154 07*205
-+    movd        DWORD PTR [edi+5], mm2  // write ouput 05 xx xx xx
-+    pmullw      mm2,    mm5             // 04* 51 05*102 06*154 07*205
- 
--        punpckhbw   mm3,    mm7             // mm3 = 05 xx 06 xx 07 xx 08 xx
--        pmullw      mm3,    mm6             // 05*205 06*154 07*102 07* 51
-+    punpckhbw   mm3,    mm7             // mm3 = 05 xx 06 xx 07 xx 08 xx
-+    pmullw      mm3,    mm6             // 05*205 06*154 07*102 07* 51
- 
--        paddw       mm0,    mm1             // added round values
--        paddw       mm0,    mm4
-+    paddw       mm0,    mm1             // added round values
-+    paddw       mm0,    mm4
- 
--        psrlw       mm0,    8               // output: 01 xx 02 xx 03 xx 04 xx
--        packuswb    mm0,    mm7             // 01 02 03 04 xx xx xx xx
-+    psrlw       mm0,    8               // output: 01 xx 02 xx 03 xx 04 xx
-+    packuswb    mm0,    mm7             // 01 02 03 04 xx xx xx xx
- 
--        movd        DWORD PTR [edi+1], mm0  // write output 01 02 03 04
--        paddw       mm2,    mm3             //
-+    movd        DWORD PTR [edi+1], mm0  // write output 01 02 03 04
-+    paddw       mm2,    mm3             //
- 
--        paddw       mm2,    mm4             // added round values
--        psrlw       mm2,    8
-+    paddw       mm2,    mm4             // added round values
-+    psrlw       mm2,    8
- 
--        packuswb    mm2,    mm7
--        movd        DWORD PTR [edi+6], mm2  // writeoutput 06 07 08 09
-+    packuswb    mm2,    mm7
-+    movd        DWORD PTR [edi+6], mm2  // writeoutput 06 07 08 09
- 
- 
--    }
-+  }
- }
- 
- /****************************************************************************
-@@ -332,167 +328,165 @@ void horizontal_line_4_5_scale_mmx
- static
- void vertical_band_4_5_scale_mmx
- (
--    unsigned char *dest,
--    unsigned int dest_pitch,
--    unsigned int dest_width
--)
--{
--    __asm
--    {
-+  unsigned char *dest,
-+  unsigned int dest_pitch,
-+  unsigned int dest_width
-+) {
-+  __asm {
- 
--        mov         esi,    dest                    // Get the source and destination pointer
--        mov         ecx,    dest_pitch               // Get the pitch size
-+    mov         esi,    dest                    // Get the source and destination pointer
-+    mov         ecx,    dest_pitch               // Get the pitch size
- 
--        lea         edi,    [esi+ecx*2]             // tow lines below
--        add         edi,    ecx                     // three lines below
-+    lea         edi,    [esi+ecx*2]             // tow lines below
-+    add         edi,    ecx                     // three lines below
- 
--        pxor        mm7,    mm7                     // clear out mm7
--        mov         edx,    dest_width               // Loop counter
-+    pxor        mm7,    mm7                     // clear out mm7
-+    mov         edx,    dest_width               // Loop counter
- 
--        vs_4_5_loop:
-+    vs_4_5_loop:
- 
--        movq        mm0,    QWORD ptr [esi]         // src[0];
--        movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
-+    movq        mm0,    QWORD ptr [esi]         // src[0];
-+    movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
- 
--        movq        mm2,    mm0                     // Make a copy
--        punpcklbw   mm0,    mm7                     // unpack low to word
-+    movq        mm2,    mm0                     // Make a copy
-+    punpcklbw   mm0,    mm7                     // unpack low to word
- 
--        movq        mm5,    one_fifth
--        punpckhbw   mm2,    mm7                     // unpack high to word
-+    movq        mm5,    one_fifth
-+    punpckhbw   mm2,    mm7                     // unpack high to word
- 
--        pmullw      mm0,    mm5                     // a * 1/5
-+    pmullw      mm0,    mm5                     // a * 1/5
- 
--        movq        mm3,    mm1                     // make a copy
--        punpcklbw   mm1,    mm7                     // unpack low to word
-+    movq        mm3,    mm1                     // make a copy
-+    punpcklbw   mm1,    mm7                     // unpack low to word
- 
--        pmullw      mm2,    mm5                     // a * 1/5
--        movq        mm6,    four_fifths               // constan
-+    pmullw      mm2,    mm5                     // a * 1/5
-+    movq        mm6,    four_fifths               // constan
- 
--        movq        mm4,    mm1                     // copy of low b
--        pmullw      mm4,    mm6                     // b * 4/5
-+    movq        mm4,    mm1                     // copy of low b
-+    pmullw      mm4,    mm6                     // b * 4/5
- 
--        punpckhbw   mm3,    mm7                     // unpack high to word
--        movq        mm5,    mm3                     // copy of high b
-+    punpckhbw   mm3,    mm7                     // unpack high to word
-+    movq        mm5,    mm3                     // copy of high b
- 
--        pmullw      mm5,    mm6                     // b * 4/5
--        paddw       mm0,    mm4                     // a * 1/5 + b * 4/5
-+    pmullw      mm5,    mm6                     // b * 4/5
-+    paddw       mm0,    mm4                     // a * 1/5 + b * 4/5
- 
--        paddw       mm2,    mm5                     // a * 1/5 + b * 4/5
--        paddw       mm0,    round_values             // + 128
-+    paddw       mm2,    mm5                     // a * 1/5 + b * 4/5
-+    paddw       mm0,    round_values             // + 128
- 
--        paddw       mm2,    round_values             // + 128
--        psrlw       mm0,    8
-+    paddw       mm2,    round_values             // + 128
-+    psrlw       mm0,    8
- 
--        psrlw       mm2,    8
--        packuswb    mm0,    mm2                     // des [1]
-+    psrlw       mm2,    8
-+    packuswb    mm0,    mm2                     // des [1]
- 
--        movq        QWORD ptr [esi+ecx], mm0        // write des[1]
--        movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
-+    movq        QWORD ptr [esi+ecx], mm0        // write des[1]
-+    movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
- 
--        // mm1, mm3 --- Src[1]
--        // mm0 --- Src[2]
--        // mm7 for unpacking
-+    // mm1, mm3 --- Src[1]
-+    // mm0 --- Src[2]
-+    // mm7 for unpacking
- 
--        movq        mm5,    two_fifths
--        movq        mm2,    mm0                     // make a copy
-+    movq        mm5,    two_fifths
-+    movq        mm2,    mm0                     // make a copy
- 
--        pmullw      mm1,    mm5                     // b * 2/5
--        movq        mm6,    three_fifths
-+    pmullw      mm1,    mm5                     // b * 2/5
-+    movq        mm6,    three_fifths
- 
- 
--        punpcklbw   mm0,    mm7                     // unpack low to word
--        pmullw      mm3,    mm5                     // b * 2/5
-+    punpcklbw   mm0,    mm7                     // unpack low to word
-+    pmullw      mm3,    mm5                     // b * 2/5
- 
--        movq        mm4,    mm0                     // make copy of c
--        punpckhbw   mm2,    mm7                     // unpack high to word
-+    movq        mm4,    mm0                     // make copy of c
-+    punpckhbw   mm2,    mm7                     // unpack high to word
- 
--        pmullw      mm4,    mm6                     // c * 3/5
--        movq        mm5,    mm2
-+    pmullw      mm4,    mm6                     // c * 3/5
-+    movq        mm5,    mm2
- 
--        pmullw      mm5,    mm6                     // c * 3/5
--        paddw       mm1,    mm4                     // b * 2/5 + c * 3/5
-+    pmullw      mm5,    mm6                     // c * 3/5
-+    paddw       mm1,    mm4                     // b * 2/5 + c * 3/5
- 
--        paddw       mm3,    mm5                     // b * 2/5 + c * 3/5
--        paddw       mm1,    round_values             // + 128
-+    paddw       mm3,    mm5                     // b * 2/5 + c * 3/5
-+    paddw       mm1,    round_values             // + 128
- 
--        paddw       mm3,    round_values             // + 128
--        psrlw       mm1,    8
-+    paddw       mm3,    round_values             // + 128
-+    psrlw       mm1,    8
- 
--        psrlw       mm3,    8
--        packuswb    mm1,    mm3                     // des[2]
-+    psrlw       mm3,    8
-+    packuswb    mm1,    mm3                     // des[2]
- 
--        movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
--        movq        mm1,    [edi]                   // mm1=Src[3];
-+    movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
-+    movq        mm1,    [edi]                   // mm1=Src[3];
- 
--        // mm0, mm2 --- Src[2]
--        // mm1 --- Src[3]
--        // mm6 --- 3/5
--        // mm7 for unpacking
-+    // mm0, mm2 --- Src[2]
-+    // mm1 --- Src[3]
-+    // mm6 --- 3/5
-+    // mm7 for unpacking
- 
--        pmullw      mm0,    mm6                     // c * 3/5
--        movq        mm5,    two_fifths               // mm5 = 2/5
-+    pmullw      mm0,    mm6                     // c * 3/5
-+    movq        mm5,    two_fifths               // mm5 = 2/5
- 
--        movq        mm3,    mm1                     // make a copy
--        pmullw      mm2,    mm6                     // c * 3/5
-+    movq        mm3,    mm1                     // make a copy
-+    pmullw      mm2,    mm6                     // c * 3/5
- 
--        punpcklbw   mm1,    mm7                     // unpack low
--        movq        mm4,    mm1                     // make a copy
-+    punpcklbw   mm1,    mm7                     // unpack low
-+    movq        mm4,    mm1                     // make a copy
- 
--        punpckhbw   mm3,    mm7                     // unpack high
--        pmullw      mm4,    mm5                     // d * 2/5
-+    punpckhbw   mm3,    mm7                     // unpack high
-+    pmullw      mm4,    mm5                     // d * 2/5
- 
--        movq        mm6,    mm3                     // make a copy
--        pmullw      mm6,    mm5                     // d * 2/5
-+    movq        mm6,    mm3                     // make a copy
-+    pmullw      mm6,    mm5                     // d * 2/5
- 
--        paddw       mm0,    mm4                     // c * 3/5 + d * 2/5
--        paddw       mm2,    mm6                     // c * 3/5 + d * 2/5
-+    paddw       mm0,    mm4                     // c * 3/5 + d * 2/5
-+    paddw       mm2,    mm6                     // c * 3/5 + d * 2/5
- 
--        paddw       mm0,    round_values             // + 128
--        paddw       mm2,    round_values             // + 128
-+    paddw       mm0,    round_values             // + 128
-+    paddw       mm2,    round_values             // + 128
- 
--        psrlw       mm0,    8
--        psrlw       mm2,    8
-+    psrlw       mm0,    8
-+    psrlw       mm2,    8
- 
--        packuswb    mm0,    mm2                     // des[3]
--        movq        QWORD ptr [edi], mm0            // write des[3]
-+    packuswb    mm0,    mm2                     // des[3]
-+    movq        QWORD ptr [edi], mm0            // write des[3]
- 
--        //  mm1, mm3 --- Src[3]
--        //  mm7 -- cleared for unpacking
-+    //  mm1, mm3 --- Src[3]
-+    //  mm7 -- cleared for unpacking
- 
--        movq        mm0,    [edi+ecx*2]             // mm0, Src[0] of the next group
-+    movq        mm0,    [edi+ecx*2]             // mm0, Src[0] of the next group
- 
--        movq        mm5,    four_fifths              // mm5 = 4/5
--        pmullw      mm1,    mm5                     // d * 4/5
-+    movq        mm5,    four_fifths              // mm5 = 4/5
-+    pmullw      mm1,    mm5                     // d * 4/5
- 
--        movq        mm6,    one_fifth                // mm6 = 1/5
--        movq        mm2,    mm0                     // make a copy
-+    movq        mm6,    one_fifth                // mm6 = 1/5
-+    movq        mm2,    mm0                     // make a copy
- 
--        pmullw      mm3,    mm5                     // d * 4/5
--        punpcklbw   mm0,    mm7                     // unpack low
-+    pmullw      mm3,    mm5                     // d * 4/5
-+    punpcklbw   mm0,    mm7                     // unpack low
- 
--        pmullw      mm0,    mm6                     // an * 1/5
--        punpckhbw   mm2,    mm7                     // unpack high
-+    pmullw      mm0,    mm6                     // an * 1/5
-+    punpckhbw   mm2,    mm7                     // unpack high
- 
--        paddw       mm1,    mm0                     // d * 4/5 + an * 1/5
--        pmullw      mm2,    mm6                     // an * 1/5
-+    paddw       mm1,    mm0                     // d * 4/5 + an * 1/5
-+    pmullw      mm2,    mm6                     // an * 1/5
- 
--        paddw       mm3,    mm2                     // d * 4/5 + an * 1/5
--        paddw       mm1,    round_values             // + 128
-+    paddw       mm3,    mm2                     // d * 4/5 + an * 1/5
-+    paddw       mm1,    round_values             // + 128
- 
--        paddw       mm3,    round_values             // + 128
--        psrlw       mm1,    8
-+    paddw       mm3,    round_values             // + 128
-+    psrlw       mm1,    8
- 
--        psrlw       mm3,    8
--        packuswb    mm1,    mm3                     // des[4]
-+    psrlw       mm3,    8
-+    packuswb    mm1,    mm3                     // des[4]
- 
--        movq        QWORD ptr [edi+ecx], mm1        // write des[4]
-+    movq        QWORD ptr [edi+ecx], mm1        // write des[4]
- 
--        add         edi,    8
--        add         esi,    8
-+    add         edi,    8
-+    add         esi,    8
- 
--        sub         edx,    8
--        jg         vs_4_5_loop
--    }
-+    sub         edx,    8
-+    jg         vs_4_5_loop
-+  }
- }
- 
- /****************************************************************************
-@@ -517,139 +511,137 @@ void vertical_band_4_5_scale_mmx
- static
- void last_vertical_band_4_5_scale_mmx
- (
--    unsigned char *dest,
--    unsigned int dest_pitch,
--    unsigned int dest_width
--)
--{
--    __asm
--    {
--        mov         esi,    dest                    // Get the source and destination pointer
--        mov         ecx,    dest_pitch               // Get the pitch size
-+  unsigned char *dest,
-+  unsigned int dest_pitch,
-+  unsigned int dest_width
-+) {
-+  __asm {
-+    mov         esi,    dest                    // Get the source and destination pointer
-+    mov         ecx,    dest_pitch               // Get the pitch size
- 
--        lea         edi,    [esi+ecx*2]             // tow lines below
--        add         edi,    ecx                     // three lines below
-+    lea         edi,    [esi+ecx*2]             // tow lines below
-+    add         edi,    ecx                     // three lines below
- 
--        pxor        mm7,    mm7                     // clear out mm7
--        mov         edx,    dest_width               // Loop counter
-+    pxor        mm7,    mm7                     // clear out mm7
-+    mov         edx,    dest_width               // Loop counter
- 
--        last_vs_4_5_loop:
-+    last_vs_4_5_loop:
- 
--        movq        mm0,    QWORD ptr [esi]         // src[0];
--        movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
-+    movq        mm0,    QWORD ptr [esi]         // src[0];
-+    movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
- 
--        movq        mm2,    mm0                     // Make a copy
--        punpcklbw   mm0,    mm7                     // unpack low to word
-+    movq        mm2,    mm0                     // Make a copy
-+    punpcklbw   mm0,    mm7                     // unpack low to word
- 
--        movq        mm5,    one_fifth
--        punpckhbw   mm2,    mm7                     // unpack high to word
-+    movq        mm5,    one_fifth
-+    punpckhbw   mm2,    mm7                     // unpack high to word
- 
--        pmullw      mm0,    mm5                     // a * 1/5
-+    pmullw      mm0,    mm5                     // a * 1/5
- 
--        movq        mm3,    mm1                     // make a copy
--        punpcklbw   mm1,    mm7                     // unpack low to word
-+    movq        mm3,    mm1                     // make a copy
-+    punpcklbw   mm1,    mm7                     // unpack low to word
- 
--        pmullw      mm2,    mm5                     // a * 1/5
--        movq        mm6,    four_fifths               // constan
-+    pmullw      mm2,    mm5                     // a * 1/5
-+    movq        mm6,    four_fifths               // constan
- 
--        movq        mm4,    mm1                     // copy of low b
--        pmullw      mm4,    mm6                     // b * 4/5
-+    movq        mm4,    mm1                     // copy of low b
-+    pmullw      mm4,    mm6                     // b * 4/5
- 
--        punpckhbw   mm3,    mm7                     // unpack high to word
--        movq        mm5,    mm3                     // copy of high b
-+    punpckhbw   mm3,    mm7                     // unpack high to word
-+    movq        mm5,    mm3                     // copy of high b
- 
--        pmullw      mm5,    mm6                     // b * 4/5
--        paddw       mm0,    mm4                     // a * 1/5 + b * 4/5
-+    pmullw      mm5,    mm6                     // b * 4/5
-+    paddw       mm0,    mm4                     // a * 1/5 + b * 4/5
- 
--        paddw       mm2,    mm5                     // a * 1/5 + b * 4/5
--        paddw       mm0,    round_values             // + 128
-+    paddw       mm2,    mm5                     // a * 1/5 + b * 4/5
-+    paddw       mm0,    round_values             // + 128
- 
--        paddw       mm2,    round_values             // + 128
--        psrlw       mm0,    8
-+    paddw       mm2,    round_values             // + 128
-+    psrlw       mm0,    8
- 
--        psrlw       mm2,    8
--        packuswb    mm0,    mm2                     // des [1]
-+    psrlw       mm2,    8
-+    packuswb    mm0,    mm2                     // des [1]
- 
--        movq        QWORD ptr [esi+ecx], mm0        // write des[1]
--        movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
-+    movq        QWORD ptr [esi+ecx], mm0        // write des[1]
-+    movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
- 
--        // mm1, mm3 --- Src[1]
--        // mm0 --- Src[2]
--        // mm7 for unpacking
-+    // mm1, mm3 --- Src[1]
-+    // mm0 --- Src[2]
-+    // mm7 for unpacking
- 
--        movq        mm5,    two_fifths
--        movq        mm2,    mm0                     // make a copy
-+    movq        mm5,    two_fifths
-+    movq        mm2,    mm0                     // make a copy
- 
--        pmullw      mm1,    mm5                     // b * 2/5
--        movq        mm6,    three_fifths
-+    pmullw      mm1,    mm5                     // b * 2/5
-+    movq        mm6,    three_fifths
- 
- 
--        punpcklbw   mm0,    mm7                     // unpack low to word
--        pmullw      mm3,    mm5                     // b * 2/5
-+    punpcklbw   mm0,    mm7                     // unpack low to word
-+    pmullw      mm3,    mm5                     // b * 2/5
- 
--        movq        mm4,    mm0                     // make copy of c
--        punpckhbw   mm2,    mm7                     // unpack high to word
-+    movq        mm4,    mm0                     // make copy of c
-+    punpckhbw   mm2,    mm7                     // unpack high to word
- 
--        pmullw      mm4,    mm6                     // c * 3/5
--        movq        mm5,    mm2
-+    pmullw      mm4,    mm6                     // c * 3/5
-+    movq        mm5,    mm2
- 
--        pmullw      mm5,    mm6                     // c * 3/5
--        paddw       mm1,    mm4                     // b * 2/5 + c * 3/5
-+    pmullw      mm5,    mm6                     // c * 3/5
-+    paddw       mm1,    mm4                     // b * 2/5 + c * 3/5
- 
--        paddw       mm3,    mm5                     // b * 2/5 + c * 3/5
--        paddw       mm1,    round_values             // + 128
-+    paddw       mm3,    mm5                     // b * 2/5 + c * 3/5
-+    paddw       mm1,    round_values             // + 128
- 
--        paddw       mm3,    round_values             // + 128
--        psrlw       mm1,    8
-+    paddw       mm3,    round_values             // + 128
-+    psrlw       mm1,    8
- 
--        psrlw       mm3,    8
--        packuswb    mm1,    mm3                     // des[2]
-+    psrlw       mm3,    8
-+    packuswb    mm1,    mm3                     // des[2]
- 
--        movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
--        movq        mm1,    [edi]                   // mm1=Src[3];
-+    movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
-+    movq        mm1,    [edi]                   // mm1=Src[3];
- 
--        movq        QWORD ptr [edi+ecx], mm1        // write des[4];
-+    movq        QWORD ptr [edi+ecx], mm1        // write des[4];
- 
--        // mm0, mm2 --- Src[2]
--        // mm1 --- Src[3]
--        // mm6 --- 3/5
--        // mm7 for unpacking
-+    // mm0, mm2 --- Src[2]
-+    // mm1 --- Src[3]
-+    // mm6 --- 3/5
-+    // mm7 for unpacking
- 
--        pmullw      mm0,    mm6                     // c * 3/5
--        movq        mm5,    two_fifths               // mm5 = 2/5
-+    pmullw      mm0,    mm6                     // c * 3/5
-+    movq        mm5,    two_fifths               // mm5 = 2/5
- 
--        movq        mm3,    mm1                     // make a copy
--        pmullw      mm2,    mm6                     // c * 3/5
-+    movq        mm3,    mm1                     // make a copy
-+    pmullw      mm2,    mm6                     // c * 3/5
- 
--        punpcklbw   mm1,    mm7                     // unpack low
--        movq        mm4,    mm1                     // make a copy
-+    punpcklbw   mm1,    mm7                     // unpack low
-+    movq        mm4,    mm1                     // make a copy
- 
--        punpckhbw   mm3,    mm7                     // unpack high
--        pmullw      mm4,    mm5                     // d * 2/5
-+    punpckhbw   mm3,    mm7                     // unpack high
-+    pmullw      mm4,    mm5                     // d * 2/5
- 
--        movq        mm6,    mm3                     // make a copy
--        pmullw      mm6,    mm5                     // d * 2/5
-+    movq        mm6,    mm3                     // make a copy
-+    pmullw      mm6,    mm5                     // d * 2/5
- 
--        paddw       mm0,    mm4                     // c * 3/5 + d * 2/5
--        paddw       mm2,    mm6                     // c * 3/5 + d * 2/5
-+    paddw       mm0,    mm4                     // c * 3/5 + d * 2/5
-+    paddw       mm2,    mm6                     // c * 3/5 + d * 2/5
- 
--        paddw       mm0,    round_values             // + 128
--        paddw       mm2,    round_values             // + 128
-+    paddw       mm0,    round_values             // + 128
-+    paddw       mm2,    round_values             // + 128
- 
--        psrlw       mm0,    8
--        psrlw       mm2,    8
-+    psrlw       mm0,    8
-+    psrlw       mm2,    8
- 
--        packuswb    mm0,    mm2                     // des[3]
--        movq        QWORD ptr [edi], mm0            // write des[3]
-+    packuswb    mm0,    mm2                     // des[3]
-+    movq        QWORD ptr [edi], mm0            // write des[3]
- 
--        //  mm1, mm3 --- Src[3]
--        //  mm7 -- cleared for unpacking
--        add         edi,    8
--        add         esi,    8
-+    //  mm1, mm3 --- Src[3]
-+    //  mm7 -- cleared for unpacking
-+    add         edi,    8
-+    add         esi,    8
- 
--        sub         edx,    8
--        jg          last_vs_4_5_loop
--    }
-+    sub         edx,    8
-+    jg          last_vs_4_5_loop
-+  }
- }
- 
- /****************************************************************************
-@@ -674,153 +666,151 @@ void last_vertical_band_4_5_scale_mmx
- static
- void vertical_band_3_5_scale_mmx
- (
--    unsigned char *dest,
--    unsigned int dest_pitch,
--    unsigned int dest_width
--)
--{
--    __asm
--    {
--        mov         esi,    dest                    // Get the source and destination pointer
--        mov         ecx,    dest_pitch               // Get the pitch size
-+  unsigned char *dest,
-+  unsigned int dest_pitch,
-+  unsigned int dest_width
-+) {
-+  __asm {
-+    mov         esi,    dest                    // Get the source and destination pointer
-+    mov         ecx,    dest_pitch               // Get the pitch size
- 
--        lea         edi,    [esi+ecx*2]             // tow lines below
--        add         edi,    ecx                     // three lines below
-+    lea         edi,    [esi+ecx*2]             // tow lines below
-+    add         edi,    ecx                     // three lines below
- 
--        pxor        mm7,    mm7                     // clear out mm7
--        mov         edx,    dest_width               // Loop counter
-+    pxor        mm7,    mm7                     // clear out mm7
-+    mov         edx,    dest_width               // Loop counter
- 
--        vs_3_5_loop:
-+    vs_3_5_loop:
- 
--        movq        mm0,    QWORD ptr [esi]         // src[0];
--        movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
-+    movq        mm0,    QWORD ptr [esi]         // src[0];
-+    movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
- 
--        movq        mm2,    mm0                     // Make a copy
--        punpcklbw   mm0,    mm7                     // unpack low to word
-+    movq        mm2,    mm0                     // Make a copy
-+    punpcklbw   mm0,    mm7                     // unpack low to word
- 
--        movq        mm5,    two_fifths               // mm5 = 2/5
--        punpckhbw   mm2,    mm7                     // unpack high to word
-+    movq        mm5,    two_fifths               // mm5 = 2/5
-+    punpckhbw   mm2,    mm7                     // unpack high to word
- 
--        pmullw      mm0,    mm5                     // a * 2/5
-+    pmullw      mm0,    mm5                     // a * 2/5
- 
--        movq        mm3,    mm1                     // make a copy
--        punpcklbw   mm1,    mm7                     // unpack low to word
-+    movq        mm3,    mm1                     // make a copy
-+    punpcklbw   mm1,    mm7                     // unpack low to word
- 
--        pmullw      mm2,    mm5                     // a * 2/5
--        movq        mm6,    three_fifths             // mm6 = 3/5
-+    pmullw      mm2,    mm5                     // a * 2/5
-+    movq        mm6,    three_fifths             // mm6 = 3/5
- 
--        movq        mm4,    mm1                     // copy of low b
--        pmullw      mm4,    mm6                     // b * 3/5
-+    movq        mm4,    mm1                     // copy of low b
-+    pmullw      mm4,    mm6                     // b * 3/5
- 
--        punpckhbw   mm3,    mm7                     // unpack high to word
--        movq        mm5,    mm3                     // copy of high b
-+    punpckhbw   mm3,    mm7                     // unpack high to word
-+    movq        mm5,    mm3                     // copy of high b
- 
--        pmullw      mm5,    mm6                     // b * 3/5
--        paddw       mm0,    mm4                     // a * 2/5 + b * 3/5
-+    pmullw      mm5,    mm6                     // b * 3/5
-+    paddw       mm0,    mm4                     // a * 2/5 + b * 3/5
- 
--        paddw       mm2,    mm5                     // a * 2/5 + b * 3/5
--        paddw       mm0,    round_values             // + 128
-+    paddw       mm2,    mm5                     // a * 2/5 + b * 3/5
-+    paddw       mm0,    round_values             // + 128
- 
--        paddw       mm2,    round_values             // + 128
--        psrlw       mm0,    8
-+    paddw       mm2,    round_values             // + 128
-+    psrlw       mm0,    8
- 
--        psrlw       mm2,    8
--        packuswb    mm0,    mm2                     // des [1]
-+    psrlw       mm2,    8
-+    packuswb    mm0,    mm2                     // des [1]
- 
--        movq        QWORD ptr [esi+ecx], mm0        // write des[1]
--        movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
-+    movq        QWORD ptr [esi+ecx], mm0        // write des[1]
-+    movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
- 
--        // mm1, mm3 --- Src[1]
--        // mm0 --- Src[2]
--        // mm7 for unpacking
-+    // mm1, mm3 --- Src[1]
-+    // mm0 --- Src[2]
-+    // mm7 for unpacking
- 
--        movq        mm4,    mm1                     // b low
--        pmullw      mm1,    four_fifths              // b * 4/5 low
-+    movq        mm4,    mm1                     // b low
-+    pmullw      mm1,    four_fifths              // b * 4/5 low
- 
--        movq        mm5,    mm3                     // b high
--        pmullw      mm3,    four_fifths              // b * 4/5 high
-+    movq        mm5,    mm3                     // b high
-+    pmullw      mm3,    four_fifths              // b * 4/5 high
- 
--        movq        mm2,    mm0                     // c
--        pmullw      mm4,    one_fifth                // b * 1/5
-+    movq        mm2,    mm0                     // c
-+    pmullw      mm4,    one_fifth                // b * 1/5
- 
--        punpcklbw   mm0,    mm7                     // c low
--        pmullw      mm5,    one_fifth                // b * 1/5
-+    punpcklbw   mm0,    mm7                     // c low
-+    pmullw      mm5,    one_fifth                // b * 1/5
- 
--        movq        mm6,    mm0                     // make copy of c low
--        punpckhbw   mm2,    mm7                     // c high
-+    movq        mm6,    mm0                     // make copy of c low
-+    punpckhbw   mm2,    mm7                     // c high
- 
--        pmullw      mm6,    one_fifth                // c * 1/5 low
--        movq        mm7,    mm2                     // make copy of c high
-+    pmullw      mm6,    one_fifth                // c * 1/5 low
-+    movq        mm7,    mm2                     // make copy of c high
- 
--        pmullw      mm7,    one_fifth                // c * 1/5 high
--        paddw       mm1,    mm6                     // b * 4/5 + c * 1/5 low
-+    pmullw      mm7,    one_fifth                // c * 1/5 high
-+    paddw       mm1,    mm6                     // b * 4/5 + c * 1/5 low
- 
--        paddw       mm3,    mm7                     // b * 4/5 + c * 1/5 high
--        movq        mm6,    mm0                     // make copy of c low
-+    paddw       mm3,    mm7                     // b * 4/5 + c * 1/5 high
-+    movq        mm6,    mm0                     // make copy of c low
- 
--        pmullw      mm6,    four_fifths              // c * 4/5 low
--        movq        mm7,    mm2                     // make copy of c high
-+    pmullw      mm6,    four_fifths              // c * 4/5 low
-+    movq        mm7,    mm2                     // make copy of c high
- 
--        pmullw      mm7,    four_fifths              // c * 4/5 high
-+    pmullw      mm7,    four_fifths              // c * 4/5 high
- 
--        paddw       mm4,    mm6                     // b * 1/5 + c * 4/5 low
--        paddw       mm5,    mm7                     // b * 1/5 + c * 4/5 high
-+    paddw       mm4,    mm6                     // b * 1/5 + c * 4/5 low
-+    paddw       mm5,    mm7                     // b * 1/5 + c * 4/5 high
- 
--        paddw       mm1,    round_values             // + 128
--        paddw       mm3,    round_values             // + 128
-+    paddw       mm1,    round_values             // + 128
-+    paddw       mm3,    round_values             // + 128
- 
--        psrlw       mm1,    8
--        psrlw       mm3,    8
-+    psrlw       mm1,    8
-+    psrlw       mm3,    8
- 
--        packuswb    mm1,    mm3                     // des[2]
--        movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
-+    packuswb    mm1,    mm3                     // des[2]
-+    movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
- 
--        paddw       mm4,    round_values             // + 128
--        paddw       mm5,    round_values             // + 128
-+    paddw       mm4,    round_values             // + 128
-+    paddw       mm5,    round_values             // + 128
- 
--        psrlw       mm4,    8
--        psrlw       mm5,    8
-+    psrlw       mm4,    8
-+    psrlw       mm5,    8
- 
--        packuswb    mm4,    mm5                     // des[3]
--        movq        QWORD ptr [edi], mm4            // write des[3]
-+    packuswb    mm4,    mm5                     // des[3]
-+    movq        QWORD ptr [edi], mm4            // write des[3]
- 
--        //  mm0, mm2 --- Src[3]
-+    //  mm0, mm2 --- Src[3]
- 
--        pxor        mm7,    mm7                     // clear mm7 for unpacking
--        movq        mm1,    [edi+ecx*2]             // mm1 = Src[0] of the next group
-+    pxor        mm7,    mm7                     // clear mm7 for unpacking
-+    movq        mm1,    [edi+ecx*2]             // mm1 = Src[0] of the next group
- 
--        movq        mm5,    three_fifths             // mm5 = 3/5
--        pmullw      mm0,    mm5                     // d * 3/5
-+    movq        mm5,    three_fifths             // mm5 = 3/5
-+    pmullw      mm0,    mm5                     // d * 3/5
- 
--        movq        mm6,    two_fifths                // mm6 = 2/5
--        movq        mm3,    mm1                     // make a copy
-+    movq        mm6,    two_fifths                // mm6 = 2/5
-+    movq        mm3,    mm1                     // make a copy
- 
--        pmullw      mm2,    mm5                     // d * 3/5
--        punpcklbw   mm1,    mm7                     // unpack low
-+    pmullw      mm2,    mm5                     // d * 3/5
-+    punpcklbw   mm1,    mm7                     // unpack low
- 
--        pmullw      mm1,    mm6                     // an * 2/5
--        punpckhbw   mm3,    mm7                     // unpack high
-+    pmullw      mm1,    mm6                     // an * 2/5
-+    punpckhbw   mm3,    mm7                     // unpack high
- 
--        paddw       mm0,    mm1                     // d * 3/5 + an * 2/5
--        pmullw      mm3,    mm6                     // an * 2/5
-+    paddw       mm0,    mm1                     // d * 3/5 + an * 2/5
-+    pmullw      mm3,    mm6                     // an * 2/5
- 
--        paddw       mm2,    mm3                     // d * 3/5 + an * 2/5
--        paddw       mm0,    round_values             // + 128
-+    paddw       mm2,    mm3                     // d * 3/5 + an * 2/5
-+    paddw       mm0,    round_values             // + 128
- 
--        paddw       mm2,    round_values             // + 128
--        psrlw       mm0,    8
-+    paddw       mm2,    round_values             // + 128
-+    psrlw       mm0,    8
- 
--        psrlw       mm2,    8
--        packuswb    mm0,    mm2                     // des[4]
-+    psrlw       mm2,    8
-+    packuswb    mm0,    mm2                     // des[4]
- 
--        movq        QWORD ptr [edi+ecx], mm0        // write des[4]
-+    movq        QWORD ptr [edi+ecx], mm0        // write des[4]
- 
--        add         edi,    8
--        add         esi,    8
-+    add         edi,    8
-+    add         esi,    8
- 
--        sub         edx,    8
--        jg          vs_3_5_loop
--    }
-+    sub         edx,    8
-+    jg          vs_3_5_loop
-+  }
- }
- 
- /****************************************************************************
-@@ -845,129 +835,127 @@ void vertical_band_3_5_scale_mmx
- static
- void last_vertical_band_3_5_scale_mmx
- (
--    unsigned char *dest,
--    unsigned int dest_pitch,
--    unsigned int dest_width
--)
--{
--    __asm
--    {
--        mov         esi,    dest                    // Get the source and destination pointer
--        mov         ecx,    dest_pitch               // Get the pitch size
-+  unsigned char *dest,
-+  unsigned int dest_pitch,
-+  unsigned int dest_width
-+) {
-+  __asm {
-+    mov         esi,    dest                    // Get the source and destination pointer
-+    mov         ecx,    dest_pitch               // Get the pitch size
- 
--        lea         edi,    [esi+ecx*2]             // tow lines below
--        add         edi,    ecx                     // three lines below
-+    lea         edi,    [esi+ecx*2]             // tow lines below
-+    add         edi,    ecx                     // three lines below
- 
--        pxor        mm7,    mm7                     // clear out mm7
--        mov         edx,    dest_width               // Loop counter
-+    pxor        mm7,    mm7                     // clear out mm7
-+    mov         edx,    dest_width               // Loop counter
- 
- 
--        last_vs_3_5_loop:
-+    last_vs_3_5_loop:
- 
--        movq        mm0,    QWORD ptr [esi]         // src[0];
--        movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
-+    movq        mm0,    QWORD ptr [esi]         // src[0];
-+    movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
- 
--        movq        mm2,    mm0                     // Make a copy
--        punpcklbw   mm0,    mm7                     // unpack low to word
-+    movq        mm2,    mm0                     // Make a copy
-+    punpcklbw   mm0,    mm7                     // unpack low to word
- 
--        movq        mm5,    two_fifths               // mm5 = 2/5
--        punpckhbw   mm2,    mm7                     // unpack high to word
-+    movq        mm5,    two_fifths               // mm5 = 2/5
-+    punpckhbw   mm2,    mm7                     // unpack high to word
- 
--        pmullw      mm0,    mm5                     // a * 2/5
-+    pmullw      mm0,    mm5                     // a * 2/5
- 
--        movq        mm3,    mm1                     // make a copy
--        punpcklbw   mm1,    mm7                     // unpack low to word
-+    movq        mm3,    mm1                     // make a copy
-+    punpcklbw   mm1,    mm7                     // unpack low to word
- 
--        pmullw      mm2,    mm5                     // a * 2/5
--        movq        mm6,    three_fifths             // mm6 = 3/5
-+    pmullw      mm2,    mm5                     // a * 2/5
-+    movq        mm6,    three_fifths             // mm6 = 3/5
- 
--        movq        mm4,    mm1                     // copy of low b
--        pmullw      mm4,    mm6                     // b * 3/5
-+    movq        mm4,    mm1                     // copy of low b
-+    pmullw      mm4,    mm6                     // b * 3/5
- 
--        punpckhbw   mm3,    mm7                     // unpack high to word
--        movq        mm5,    mm3                     // copy of high b
-+    punpckhbw   mm3,    mm7                     // unpack high to word
-+    movq        mm5,    mm3                     // copy of high b
- 
--        pmullw      mm5,    mm6                     // b * 3/5
--        paddw       mm0,    mm4                     // a * 2/5 + b * 3/5
-+    pmullw      mm5,    mm6                     // b * 3/5
-+    paddw       mm0,    mm4                     // a * 2/5 + b * 3/5
- 
--        paddw       mm2,    mm5                     // a * 2/5 + b * 3/5
--        paddw       mm0,    round_values             // + 128
-+    paddw       mm2,    mm5                     // a * 2/5 + b * 3/5
-+    paddw       mm0,    round_values             // + 128
- 
--        paddw       mm2,    round_values             // + 128
--        psrlw       mm0,    8
-+    paddw       mm2,    round_values             // + 128
-+    psrlw       mm0,    8
- 
--        psrlw       mm2,    8
--        packuswb    mm0,    mm2                     // des [1]
-+    psrlw       mm2,    8
-+    packuswb    mm0,    mm2                     // des [1]
- 
--        movq        QWORD ptr [esi+ecx], mm0        // write des[1]
--        movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
-+    movq        QWORD ptr [esi+ecx], mm0        // write des[1]
-+    movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
- 
- 
- 
--        // mm1, mm3 --- Src[1]
--        // mm0 --- Src[2]
--        // mm7 for unpacking
-+    // mm1, mm3 --- Src[1]
-+    // mm0 --- Src[2]
-+    // mm7 for unpacking
- 
--        movq        mm4,    mm1                     // b low
--        pmullw      mm1,    four_fifths              // b * 4/5 low
-+    movq        mm4,    mm1                     // b low
-+    pmullw      mm1,    four_fifths              // b * 4/5 low
- 
--        movq        QWORD ptr [edi+ecx], mm0        // write des[4]
-+    movq        QWORD ptr [edi+ecx], mm0        // write des[4]
- 
--        movq        mm5,    mm3                     // b high
--        pmullw      mm3,    four_fifths              // b * 4/5 high
-+    movq        mm5,    mm3                     // b high
-+    pmullw      mm3,    four_fifths              // b * 4/5 high
- 
--        movq        mm2,    mm0                     // c
--        pmullw      mm4,    one_fifth                // b * 1/5
-+    movq        mm2,    mm0                     // c
-+    pmullw      mm4,    one_fifth                // b * 1/5
- 
--        punpcklbw   mm0,    mm7                     // c low
--        pmullw      mm5,    one_fifth                // b * 1/5
-+    punpcklbw   mm0,    mm7                     // c low
-+    pmullw      mm5,    one_fifth                // b * 1/5
- 
--        movq        mm6,    mm0                     // make copy of c low
--        punpckhbw   mm2,    mm7                     // c high
-+    movq        mm6,    mm0                     // make copy of c low
-+    punpckhbw   mm2,    mm7                     // c high
- 
--        pmullw      mm6,    one_fifth                // c * 1/5 low
--        movq        mm7,    mm2                     // make copy of c high
-+    pmullw      mm6,    one_fifth                // c * 1/5 low
-+    movq        mm7,    mm2                     // make copy of c high
- 
--        pmullw      mm7,    one_fifth                // c * 1/5 high
--        paddw       mm1,    mm6                     // b * 4/5 + c * 1/5 low
-+    pmullw      mm7,    one_fifth                // c * 1/5 high
-+    paddw       mm1,    mm6                     // b * 4/5 + c * 1/5 low
- 
--        paddw       mm3,    mm7                     // b * 4/5 + c * 1/5 high
--        movq        mm6,    mm0                     // make copy of c low
-+    paddw       mm3,    mm7                     // b * 4/5 + c * 1/5 high
-+    movq        mm6,    mm0                     // make copy of c low
- 
--        pmullw      mm6,    four_fifths              // c * 4/5 low
--        movq        mm7,    mm2                     // make copy of c high
-+    pmullw      mm6,    four_fifths              // c * 4/5 low
-+    movq        mm7,    mm2                     // make copy of c high
- 
--        pmullw      mm7,    four_fifths              // c * 4/5 high
-+    pmullw      mm7,    four_fifths              // c * 4/5 high
- 
--        paddw       mm4,    mm6                     // b * 1/5 + c * 4/5 low
--        paddw       mm5,    mm7                     // b * 1/5 + c * 4/5 high
-+    paddw       mm4,    mm6                     // b * 1/5 + c * 4/5 low
-+    paddw       mm5,    mm7                     // b * 1/5 + c * 4/5 high
- 
--        paddw       mm1,    round_values             // + 128
--        paddw       mm3,    round_values             // + 128
-+    paddw       mm1,    round_values             // + 128
-+    paddw       mm3,    round_values             // + 128
- 
--        psrlw       mm1,    8
--        psrlw       mm3,    8
-+    psrlw       mm1,    8
-+    psrlw       mm3,    8
- 
--        packuswb    mm1,    mm3                     // des[2]
--        movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
-+    packuswb    mm1,    mm3                     // des[2]
-+    movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
- 
--        paddw       mm4,    round_values             // + 128
--        paddw       mm5,    round_values             // + 128
-+    paddw       mm4,    round_values             // + 128
-+    paddw       mm5,    round_values             // + 128
- 
--        psrlw       mm4,    8
--        psrlw       mm5,    8
-+    psrlw       mm4,    8
-+    psrlw       mm5,    8
- 
--        packuswb    mm4,    mm5                     // des[3]
--        movq        QWORD ptr [edi], mm4            // write des[3]
-+    packuswb    mm4,    mm5                     // des[3]
-+    movq        QWORD ptr [edi], mm4            // write des[3]
- 
--        //  mm0, mm2 --- Src[3]
-+    //  mm0, mm2 --- Src[3]
- 
--        add         edi,    8
--        add         esi,    8
-+    add         edi,    8
-+    add         esi,    8
- 
--        sub         edx,    8
--        jg          last_vs_3_5_loop
--    }
-+    sub         edx,    8
-+    jg          last_vs_3_5_loop
-+  }
- }
- 
- /****************************************************************************
-@@ -992,52 +980,50 @@ void last_vertical_band_3_5_scale_mmx
- static
- void vertical_band_1_2_scale_mmx
- (
--    unsigned char *dest,
--    unsigned int dest_pitch,
--    unsigned int dest_width
--)
--{
--    __asm
--    {
-+  unsigned char *dest,
-+  unsigned int dest_pitch,
-+  unsigned int dest_width
-+) {
-+  __asm {
- 
--        mov         esi,    dest                    // Get the source and destination pointer
--        mov         ecx,    dest_pitch               // Get the pitch size
-+    mov         esi,    dest                    // Get the source and destination pointer
-+    mov         ecx,    dest_pitch               // Get the pitch size
- 
--        pxor        mm7,    mm7                     // clear out mm7
--        mov         edx,    dest_width               // Loop counter
-+    pxor        mm7,    mm7                     // clear out mm7
-+    mov         edx,    dest_width               // Loop counter
- 
--        vs_1_2_loop:
-+    vs_1_2_loop:
- 
--        movq        mm0,    [esi]                   // get Src[0]
--        movq        mm1,    [esi + ecx * 2]         // get Src[1]
-+    movq        mm0,    [esi]                   // get Src[0]
-+    movq        mm1,    [esi + ecx * 2]         // get Src[1]
- 
--        movq        mm2,    mm0                     // make copy before unpack
--        movq        mm3,    mm1                     // make copy before unpack
-+    movq        mm2,    mm0                     // make copy before unpack
-+    movq        mm3,    mm1                     // make copy before unpack
- 
--        punpcklbw   mm0,    mm7                     // low Src[0]
--        movq        mm6,    four_ones                // mm6= 1, 1, 1, 1
-+    punpcklbw   mm0,    mm7                     // low Src[0]
-+    movq        mm6,    four_ones                // mm6= 1, 1, 1, 1
- 
--        punpcklbw   mm1,    mm7                     // low Src[1]
--        paddw       mm0,    mm1                     // low (a + b)
-+    punpcklbw   mm1,    mm7                     // low Src[1]
-+    paddw       mm0,    mm1                     // low (a + b)
- 
--        punpckhbw   mm2,    mm7                     // high Src[0]
--        paddw       mm0,    mm6                     // low (a + b + 1)
-+    punpckhbw   mm2,    mm7                     // high Src[0]
-+    paddw       mm0,    mm6                     // low (a + b + 1)
- 
--        punpckhbw   mm3,    mm7
--        paddw       mm2,    mm3                     // high (a + b )
-+    punpckhbw   mm3,    mm7
-+    paddw       mm2,    mm3                     // high (a + b )
- 
--        psraw       mm0,    1                       // low (a + b +1 )/2
--        paddw       mm2,    mm6                     // high (a + b + 1)
-+    psraw       mm0,    1                       // low (a + b +1 )/2
-+    paddw       mm2,    mm6                     // high (a + b + 1)
- 
--        psraw       mm2,    1                       // high (a + b + 1)/2
--        packuswb    mm0,    mm2                     // pack results
-+    psraw       mm2,    1                       // high (a + b + 1)/2
-+    packuswb    mm0,    mm2                     // pack results
- 
--        movq        [esi+ecx], mm0                  // write out eight bytes
--        add         esi,    8
-+    movq        [esi+ecx], mm0                  // write out eight bytes
-+    add         esi,    8
- 
--        sub         edx,    8
--        jg          vs_1_2_loop
--    }
-+    sub         edx,    8
-+    jg          vs_1_2_loop
-+  }
- 
- }
- 
-@@ -1063,28 +1049,26 @@ void vertical_band_1_2_scale_mmx
- static
- void last_vertical_band_1_2_scale_mmx
- (
--    unsigned char *dest,
--    unsigned int dest_pitch,
--    unsigned int dest_width
--)
--{
--    __asm
--    {
--        mov         esi,    dest                    // Get the source and destination pointer
--        mov         ecx,    dest_pitch               // Get the pitch size
-+  unsigned char *dest,
-+  unsigned int dest_pitch,
-+  unsigned int dest_width
-+) {
-+  __asm {
-+    mov         esi,    dest                    // Get the source and destination pointer
-+    mov         ecx,    dest_pitch               // Get the pitch size
- 
--        mov         edx,    dest_width               // Loop counter
-+    mov         edx,    dest_width               // Loop counter
- 
--        last_vs_1_2_loop:
-+    last_vs_1_2_loop:
- 
--        movq        mm0,    [esi]                   // get Src[0]
--        movq        [esi+ecx], mm0                  // write out eight bytes
-+    movq        mm0,    [esi]                   // get Src[0]
-+    movq        [esi+ecx], mm0                  // write out eight bytes
- 
--        add         esi,    8
--        sub         edx,    8
-+    add         esi,    8
-+    sub         edx,    8
- 
--        jg         last_vs_1_2_loop
--    }
-+    jg         last_vs_1_2_loop
-+  }
- }
- 
- /****************************************************************************
-@@ -1108,106 +1092,104 @@ void last_vertical_band_1_2_scale_mmx
- static
- void horizontal_line_1_2_scale_mmx
- (
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    (void) dest_width;
-+  const unsigned char *source,
-+  unsigned int source_width,
-+  unsigned char *dest,
-+  unsigned int dest_width
-+) {
-+  (void) dest_width;
- 
--    __asm
--    {
--        mov         esi,    source
--        mov         edi,    dest
-+  __asm {
-+    mov         esi,    source
-+    mov         edi,    dest
- 
--        pxor        mm7,    mm7
--        movq        mm6,    four_ones
-+    pxor        mm7,    mm7
-+    movq        mm6,    four_ones
- 
--        mov         ecx,    source_width
-+    mov         ecx,    source_width
- 
--        hs_1_2_loop:
-+    hs_1_2_loop:
- 
--        movq        mm0,    [esi]
--        movq        mm1,    [esi+1]
-+    movq        mm0,    [esi]
-+    movq        mm1,    [esi+1]
- 
--        movq        mm2,    mm0
--        movq        mm3,    mm1
-+    movq        mm2,    mm0
-+    movq        mm3,    mm1
- 
--        movq        mm4,    mm0
--        punpcklbw   mm0,    mm7
-+    movq        mm4,    mm0
-+    punpcklbw   mm0,    mm7
- 
--        punpcklbw   mm1,    mm7
--        paddw       mm0,    mm1
-+    punpcklbw   mm1,    mm7
-+    paddw       mm0,    mm1
- 
--        paddw       mm0,    mm6
--        punpckhbw   mm2,    mm7
-+    paddw       mm0,    mm6
-+    punpckhbw   mm2,    mm7
- 
--        punpckhbw   mm3,    mm7
--        paddw       mm2,    mm3
-+    punpckhbw   mm3,    mm7
-+    paddw       mm2,    mm3
- 
--        paddw       mm2,    mm6
--        psraw       mm0,    1
-+    paddw       mm2,    mm6
-+    psraw       mm0,    1
- 
--        psraw       mm2,    1
--        packuswb    mm0,    mm2
-+    psraw       mm2,    1
-+    packuswb    mm0,    mm2
- 
--        movq        mm2,    mm4
--        punpcklbw   mm2,    mm0
-+    movq        mm2,    mm4
-+    punpcklbw   mm2,    mm0
- 
--        movq        [edi],  mm2
--        punpckhbw   mm4,    mm0
-+    movq        [edi],  mm2
-+    punpckhbw   mm4,    mm0
- 
--        movq        [edi+8], mm4
--        add         esi,    8
-+    movq        [edi+8], mm4
-+    add         esi,    8
- 
--        add         edi,    16
--        sub         ecx,    8
-+    add         edi,    16
-+    sub         ecx,    8
- 
--        cmp         ecx,    8
--        jg          hs_1_2_loop
-+    cmp         ecx,    8
-+    jg          hs_1_2_loop
- 
- // last eight pixel
- 
--        movq        mm0,    [esi]
--        movq        mm1,    mm0
-+    movq        mm0,    [esi]
-+    movq        mm1,    mm0
- 
--        movq        mm2,    mm0
--        movq        mm3,    mm1
-+    movq        mm2,    mm0
-+    movq        mm3,    mm1
- 
--        psrlq       mm1,    8
--        psrlq       mm3,    56
-+    psrlq       mm1,    8
-+    psrlq       mm3,    56
- 
--        psllq       mm3,    56
--        por         mm1,    mm3
-+    psllq       mm3,    56
-+    por         mm1,    mm3
- 
--        movq        mm3,    mm1
--        movq        mm4,    mm0
-+    movq        mm3,    mm1
-+    movq        mm4,    mm0
- 
--        punpcklbw   mm0,    mm7
--        punpcklbw   mm1,    mm7
-+    punpcklbw   mm0,    mm7
-+    punpcklbw   mm1,    mm7
- 
--        paddw       mm0,    mm1
--        paddw       mm0,    mm6
-+    paddw       mm0,    mm1
-+    paddw       mm0,    mm6
- 
--        punpckhbw   mm2,    mm7
--        punpckhbw   mm3,    mm7
-+    punpckhbw   mm2,    mm7
-+    punpckhbw   mm3,    mm7
- 
--        paddw       mm2,    mm3
--        paddw       mm2,    mm6
-+    paddw       mm2,    mm3
-+    paddw       mm2,    mm6
- 
--        psraw       mm0,    1
--        psraw       mm2,    1
-+    psraw       mm0,    1
-+    psraw       mm2,    1
- 
--        packuswb    mm0,    mm2
--        movq        mm2,    mm4
-+    packuswb    mm0,    mm2
-+    movq        mm2,    mm4
- 
--        punpcklbw   mm2,    mm0
--        movq        [edi],  mm2
-+    punpcklbw   mm2,    mm0
-+    movq        [edi],  mm2
- 
--        punpckhbw   mm4,    mm0
--        movq        [edi+8], mm4
--    }
-+    punpckhbw   mm4,    mm0
-+    movq        [edi+8], mm4
-+  }
- }
- 
- 
-@@ -1240,86 +1222,84 @@ __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128,
- static
- void horizontal_line_5_4_scale_mmx
- (
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    /*
--    unsigned i;
--    unsigned int a, b, c, d, e;
--    unsigned char *des = dest;
--    const unsigned char *src = source;
-+  const unsigned char *source,
-+  unsigned int source_width,
-+  unsigned char *dest,
-+  unsigned int dest_width
-+) {
-+  /*
-+  unsigned i;
-+  unsigned int a, b, c, d, e;
-+  unsigned char *des = dest;
-+  const unsigned char *src = source;
- 
--    (void) dest_width;
-+  (void) dest_width;
- 
--    for ( i=0; i<source_width; i+=5 )
--    {
--        a = src[0];
--        b = src[1];
--        c = src[2];
--        d = src[3];
--        e = src[4];
-+  for ( i=0; i<source_width; i+=5 )
-+  {
-+      a = src[0];
-+      b = src[1];
-+      c = src[2];
-+      d = src[3];
-+      e = src[4];
- 
--        des[0] = a;
--        des[1] = ((b*192 + c* 64 + 128)>>8);
--        des[2] = ((c*128 + d*128 + 128)>>8);
--        des[3] = ((d* 64 + e*192 + 128)>>8);
-+      des[0] = a;
-+      des[1] = ((b*192 + c* 64 + 128)>>8);
-+      des[2] = ((c*128 + d*128 + 128)>>8);
-+      des[3] = ((d* 64 + e*192 + 128)>>8);
- 
--        src += 5;
--        des += 4;
--    }
--    */
--    (void) dest_width;
-+      src += 5;
-+      des += 4;
-+  }
-+  */
-+  (void) dest_width;
- 
--    __asm
--    {
-+  __asm {
- 
--        mov         esi,        source              ;
--        mov         edi,        dest                ;
-+    mov         esi,        source;
-+    mov         edi,        dest;
- 
--        mov         ecx,        source_width         ;
--        movq        mm5,        const54_1           ;
-+    mov         ecx,        source_width;
-+    movq        mm5,        const54_1;
- 
--        pxor        mm7,        mm7                 ;
--        movq        mm6,        const54_2           ;
-+    pxor        mm7,        mm7;
-+    movq        mm6,        const54_2;
- 
--        movq        mm4,        round_values         ;
--        lea         edx,        [esi+ecx]           ;
--        horizontal_line_5_4_loop:
-+    movq        mm4,        round_values;
-+    lea         edx,        [esi+ecx];
-+    horizontal_line_5_4_loop:
- 
--        movq        mm0,        QWORD PTR  [esi]    ;
--        00 01 02 03 04 05 06 07
--        movq        mm1,        mm0                 ;
--        00 01 02 03 04 05 06 07
-+    movq        mm0,        QWORD PTR  [esi];
-+    00 01 02 03 04 05 06 07
-+    movq        mm1,        mm0;
-+    00 01 02 03 04 05 06 07
- 
--        psrlq       mm0,        8                   ;
--        01 02 03 04 05 06 07 xx
--        punpcklbw   mm1,        mm7                 ;
--        xx 00 xx 01 xx 02 xx 03
-+    psrlq       mm0,        8;
-+    01 02 03 04 05 06 07 xx
-+    punpcklbw   mm1,        mm7;
-+    xx 00 xx 01 xx 02 xx 03
- 
--        punpcklbw   mm0,        mm7                 ;
--        xx 01 xx 02 xx 03 xx 04
--        pmullw      mm1,        mm5
-+    punpcklbw   mm0,        mm7;
-+    xx 01 xx 02 xx 03 xx 04
-+    pmullw      mm1,        mm5
- 
--        pmullw      mm0,        mm6
--        add         esi,        5
-+    pmullw      mm0,        mm6
-+    add         esi,        5
- 
--        add         edi,        4
--        paddw       mm1,        mm0
-+    add         edi,        4
-+    paddw       mm1,        mm0
- 
--        paddw       mm1,        mm4
--        psrlw       mm1,        8
-+    paddw       mm1,        mm4
-+    psrlw       mm1,        8
- 
--        cmp         esi,        edx
--        packuswb    mm1,        mm7
-+    cmp         esi,        edx
-+    packuswb    mm1,        mm7
- 
--        movd        DWORD PTR [edi-4], mm1
-+    movd        DWORD PTR [edi-4], mm1
- 
--        jl          horizontal_line_5_4_loop
-+    jl          horizontal_line_5_4_loop
- 
--    }
-+  }
- 
- }
- __declspec(align(16)) const static unsigned short one_fourths[]   = {  64,  64,  64, 64  };
-@@ -1327,86 +1307,84 @@ __declspec(align(16)) const static unsigned short two_fourths[]   = { 128, 128,
- __declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 };
- 
- static
--void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
-+void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
- 
--    __asm
--    {
--        push        ebx
-+  __asm {
-+    push        ebx
- 
--        mov         esi,    source                    // Get the source and destination pointer
--        mov         ecx,    src_pitch               // Get the pitch size
-+    mov         esi,    source                    // Get the source and destination pointer
-+    mov         ecx,    src_pitch               // Get the pitch size
- 
--        mov         edi,    dest                    // tow lines below
--        pxor        mm7,    mm7                     // clear out mm7
-+    mov         edi,    dest                    // tow lines below
-+    pxor        mm7,    mm7                     // clear out mm7
- 
--        mov         edx,    dest_pitch               // Loop counter
--        mov         ebx,    dest_width
-+    mov         edx,    dest_pitch               // Loop counter
-+    mov         ebx,    dest_width
- 
--        vs_5_4_loop:
-+    vs_5_4_loop:
- 
--        movd        mm0,    DWORD ptr [esi]         // src[0];
--        movd        mm1,    DWORD ptr [esi+ecx]     // src[1];
-+    movd        mm0,    DWORD ptr [esi]         // src[0];
-+    movd        mm1,    DWORD ptr [esi+ecx]     // src[1];
- 
--        movd        mm2,    DWORD ptr [esi+ecx*2]
--        lea         eax,    [esi+ecx*2]             //
-+    movd        mm2,    DWORD ptr [esi+ecx*2]
-+    lea         eax,    [esi+ecx*2]             //
- 
--        punpcklbw   mm1,    mm7
--        punpcklbw   mm2,    mm7
-+    punpcklbw   mm1,    mm7
-+    punpcklbw   mm2,    mm7
- 
--        movq        mm3,    mm2
--        pmullw      mm1,    three_fourths
-+    movq        mm3,    mm2
-+    pmullw      mm1,    three_fourths
- 
--        pmullw      mm2,    one_fourths
--        movd        mm4,    [eax+ecx]
-+    pmullw      mm2,    one_fourths
-+    movd        mm4,    [eax+ecx]
- 
--        pmullw      mm3,    two_fourths
--        punpcklbw   mm4,    mm7
-+    pmullw      mm3,    two_fourths
-+    punpcklbw   mm4,    mm7
- 
--        movq        mm5,    mm4
--        pmullw      mm4,    two_fourths
-+    movq        mm5,    mm4
-+    pmullw      mm4,    two_fourths
- 
--        paddw       mm1,    mm2
--        movd        mm6,    [eax+ecx*2]
-+    paddw       mm1,    mm2
-+    movd        mm6,    [eax+ecx*2]
- 
--        pmullw      mm5,    one_fourths
--        paddw       mm1,    round_values;
-+    pmullw      mm5,    one_fourths
-+    paddw       mm1,    round_values;
- 
--        paddw       mm3,    mm4
--        psrlw       mm1,    8
-+    paddw       mm3,    mm4
-+    psrlw       mm1,    8
- 
--        punpcklbw   mm6,    mm7
--        paddw       mm3,    round_values
-+    punpcklbw   mm6,    mm7
-+    paddw       mm3,    round_values
- 
--        pmullw      mm6,    three_fourths
--        psrlw       mm3,    8
-+    pmullw      mm6,    three_fourths
-+    psrlw       mm3,    8
- 
--        packuswb    mm1,    mm7
--        packuswb    mm3,    mm7
-+    packuswb    mm1,    mm7
-+    packuswb    mm3,    mm7
- 
--        movd        DWORD PTR [edi], mm0
--        movd        DWORD PTR [edi+edx], mm1
-+    movd        DWORD PTR [edi], mm0
-+    movd        DWORD PTR [edi+edx], mm1
- 
- 
--        paddw       mm5,    mm6
--        movd        DWORD PTR [edi+edx*2], mm3
-+    paddw       mm5,    mm6
-+    movd        DWORD PTR [edi+edx*2], mm3
- 
--        lea         eax,    [edi+edx*2]
--        paddw       mm5,    round_values
-+    lea         eax,    [edi+edx*2]
-+    paddw       mm5,    round_values
- 
--        psrlw       mm5,    8
--        add         edi,    4
-+    psrlw       mm5,    8
-+    add         edi,    4
- 
--        packuswb    mm5,    mm7
--        movd        DWORD PTR [eax+edx], mm5
-+    packuswb    mm5,    mm7
-+    movd        DWORD PTR [eax+edx], mm5
- 
--        add         esi,    4
--        sub         ebx,    4
-+    add         esi,    4
-+    sub         ebx,    4
- 
--        jg         vs_5_4_loop
-+    jg         vs_5_4_loop
- 
--        pop         ebx
--    }
-+    pop         ebx
-+  }
- }
- 
- 
-@@ -1417,96 +1395,94 @@ __declspec(align(16)) const static unsigned short const53_2[] = {256, 171,  85,
- static
- void horizontal_line_5_3_scale_mmx
- (
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
-+  const unsigned char *source,
-+  unsigned int source_width,
-+  unsigned char *dest,
-+  unsigned int dest_width
-+) {
- 
--    (void) dest_width;
--    __asm
--    {
-+  (void) dest_width;
-+  __asm {
- 
--        mov         esi,        source              ;
--        mov         edi,        dest                ;
-+    mov         esi,        source;
-+    mov         edi,        dest;
- 
--        mov         ecx,        source_width         ;
--        movq        mm5,        const53_1           ;
-+    mov         ecx,        source_width;
-+    movq        mm5,        const53_1;
- 
--        pxor        mm7,        mm7                 ;
--        movq        mm6,        const53_2           ;
-+    pxor        mm7,        mm7;
-+    movq        mm6,        const53_2;
- 
--        movq        mm4,        round_values         ;
--        lea         edx,        [esi+ecx-5]         ;
--        horizontal_line_5_3_loop:
-+    movq        mm4,        round_values;
-+    lea         edx,        [esi+ecx-5];
-+    horizontal_line_5_3_loop:
- 
--        movq        mm0,        QWORD PTR  [esi]    ;
--        00 01 02 03 04 05 06 07
--        movq        mm1,        mm0                 ;
--        00 01 02 03 04 05 06 07
-+    movq        mm0,        QWORD PTR  [esi];
-+    00 01 02 03 04 05 06 07
-+    movq        mm1,        mm0;
-+    00 01 02 03 04 05 06 07
- 
--        psllw       mm0,        8                   ;
--        xx 00 xx 02 xx 04 xx 06
--        psrlw       mm1,        8                   ;
--        01 xx 03 xx 05 xx 07 xx
-+    psllw       mm0,        8;
-+    xx 00 xx 02 xx 04 xx 06
-+    psrlw       mm1,        8;
-+    01 xx 03 xx 05 xx 07 xx
- 
--        psrlw       mm0,        8                   ;
--        00 xx 02 xx 04 xx 06 xx
--        psllq       mm1,        16                  ;
--        xx xx 01 xx 03 xx 05 xx
-+    psrlw       mm0,        8;
-+    00 xx 02 xx 04 xx 06 xx
-+    psllq       mm1,        16;
-+    xx xx 01 xx 03 xx 05 xx
- 
--        pmullw      mm0,        mm6
-+    pmullw      mm0,        mm6
- 
--        pmullw      mm1,        mm5
--        add         esi,        5
-+    pmullw      mm1,        mm5
-+    add         esi,        5
- 
--        add         edi,        3
--        paddw       mm1,        mm0
-+    add         edi,        3
-+    paddw       mm1,        mm0
- 
--        paddw       mm1,        mm4
--        psrlw       mm1,        8
-+    paddw       mm1,        mm4
-+    psrlw       mm1,        8
- 
--        cmp         esi,        edx
--        packuswb    mm1,        mm7
-+    cmp         esi,        edx
-+    packuswb    mm1,        mm7
- 
--        movd        DWORD PTR [edi-3], mm1
--        jl          horizontal_line_5_3_loop
-+    movd        DWORD PTR [edi-3], mm1
-+    jl          horizontal_line_5_3_loop
- 
--//exit condition
--        movq        mm0,        QWORD PTR  [esi]    ;
--        00 01 02 03 04 05 06 07
--        movq        mm1,        mm0                 ;
--        00 01 02 03 04 05 06 07
-+// exit condition
-+    movq        mm0,        QWORD PTR  [esi];
-+    00 01 02 03 04 05 06 07
-+    movq        mm1,        mm0;
-+    00 01 02 03 04 05 06 07
- 
--        psllw       mm0,        8                   ;
--        xx 00 xx 02 xx 04 xx 06
--        psrlw       mm1,        8                   ;
--        01 xx 03 xx 05 xx 07 xx
-+    psllw       mm0,        8;
-+    xx 00 xx 02 xx 04 xx 06
-+    psrlw       mm1,        8;
-+    01 xx 03 xx 05 xx 07 xx
- 
--        psrlw       mm0,        8                   ;
--        00 xx 02 xx 04 xx 06 xx
--        psllq       mm1,        16                  ;
--        xx xx 01 xx 03 xx 05 xx
-+    psrlw       mm0,        8;
-+    00 xx 02 xx 04 xx 06 xx
-+    psllq       mm1,        16;
-+    xx xx 01 xx 03 xx 05 xx
- 
--        pmullw      mm0,        mm6
-+    pmullw      mm0,        mm6
- 
--        pmullw      mm1,        mm5
--        paddw       mm1,        mm0
-+    pmullw      mm1,        mm5
-+    paddw       mm1,        mm0
- 
--        paddw       mm1,        mm4
--        psrlw       mm1,        8
-+    paddw       mm1,        mm4
-+    psrlw       mm1,        8
- 
--        packuswb    mm1,        mm7
--        movd        eax,        mm1
-+    packuswb    mm1,        mm7
-+    movd        eax,        mm1
- 
--        mov         edx,        eax
--        shr         edx,        16
-+    mov         edx,        eax
-+    shr         edx,        16
- 
--        mov         WORD PTR[edi],   ax
--        mov         BYTE PTR[edi+2], dl
-+    mov         WORD PTR[edi],   ax
-+    mov         BYTE PTR[edi+2], dl
- 
--    }
-+  }
- 
- }
- 
-@@ -1514,75 +1490,73 @@ __declspec(align(16)) const static unsigned short one_thirds[] = {  85,  85,  85
- __declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 };
- 
- static
--void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
-+void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
- 
--    __asm
--    {
--        push        ebx
-+  __asm {
-+    push        ebx
- 
--        mov         esi,    source                    // Get the source and destination pointer
--        mov         ecx,    src_pitch               // Get the pitch size
-+    mov         esi,    source                    // Get the source and destination pointer
-+    mov         ecx,    src_pitch               // Get the pitch size
- 
--        mov         edi,    dest                    // tow lines below
--        pxor        mm7,    mm7                     // clear out mm7
-+    mov         edi,    dest                    // tow lines below
-+    pxor        mm7,    mm7                     // clear out mm7
- 
--        mov         edx,    dest_pitch               // Loop counter
--        movq        mm5,    one_thirds
-+    mov         edx,    dest_pitch               // Loop counter
-+    movq        mm5,    one_thirds
- 
--        movq        mm6,    two_thirds
--        mov         ebx,    dest_width;
-+    movq        mm6,    two_thirds
-+    mov         ebx,    dest_width;
- 
--        vs_5_3_loop:
-+    vs_5_3_loop:
- 
--        movd        mm0,    DWORD ptr [esi]         // src[0];
--        movd        mm1,    DWORD ptr [esi+ecx]     // src[1];
-+    movd        mm0,    DWORD ptr [esi]         // src[0];
-+    movd        mm1,    DWORD ptr [esi+ecx]     // src[1];
- 
--        movd        mm2,    DWORD ptr [esi+ecx*2]
--        lea         eax,    [esi+ecx*2]             //
-+    movd        mm2,    DWORD ptr [esi+ecx*2]
-+    lea         eax,    [esi+ecx*2]             //
- 
--        punpcklbw   mm1,    mm7
--        punpcklbw   mm2,    mm7
-+    punpcklbw   mm1,    mm7
-+    punpcklbw   mm2,    mm7
- 
--        pmullw      mm1,    mm5
--        pmullw      mm2,    mm6
-+    pmullw      mm1,    mm5
-+    pmullw      mm2,    mm6
- 
--        movd        mm3,    DWORD ptr [eax+ecx]
--        movd        mm4,    DWORD ptr [eax+ecx*2]
-+    movd        mm3,    DWORD ptr [eax+ecx]
-+    movd        mm4,    DWORD ptr [eax+ecx*2]
- 
--        punpcklbw   mm3,    mm7
--        punpcklbw   mm4,    mm7
-+    punpcklbw   mm3,    mm7
-+    punpcklbw   mm4,    mm7
- 
--        pmullw      mm3,    mm6
--        pmullw      mm4,    mm5
-+    pmullw      mm3,    mm6
-+    pmullw      mm4,    mm5
- 
- 
--        movd        DWORD PTR [edi], mm0
--        paddw       mm1,    mm2
-+    movd        DWORD PTR [edi], mm0
-+    paddw       mm1,    mm2
- 
--        paddw       mm1,    round_values
--        psrlw       mm1,    8
-+    paddw       mm1,    round_values
-+    psrlw       mm1,    8
- 
--        packuswb    mm1,    mm7
--        paddw       mm3,    mm4
-+    packuswb    mm1,    mm7
-+    paddw       mm3,    mm4
- 
--        paddw       mm3,    round_values
--        movd        DWORD PTR [edi+edx], mm1
-+    paddw       mm3,    round_values
-+    movd        DWORD PTR [edi+edx], mm1
- 
--        psrlw       mm3,    8
--        packuswb    mm3,    mm7
-+    psrlw       mm3,    8
-+    packuswb    mm3,    mm7
- 
--        movd        DWORD PTR [edi+edx*2], mm3
-+    movd        DWORD PTR [edi+edx*2], mm3
- 
- 
--        add         edi,    4
--        add         esi,    4
-+    add         edi,    4
-+    add         esi,    4
- 
--        sub         ebx,    4
--        jg          vs_5_3_loop
-+    sub         ebx,    4
-+    jg          vs_5_3_loop
- 
--        pop         ebx
--    }
-+    pop         ebx
-+  }
- }
- 
- 
-@@ -1609,48 +1583,45 @@ void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch,
- static
- void horizontal_line_2_1_scale_mmx
- (
--    const unsigned char *source,
--    unsigned int source_width,
--    unsigned char *dest,
--    unsigned int dest_width
--)
--{
--    (void) dest_width;
--    (void) source_width;
--    __asm
--    {
--        mov         esi,    source
--        mov         edi,    dest
--
--        pxor        mm7,    mm7
--        mov         ecx,    dest_width
--
--        xor         edx,    edx
--        hs_2_1_loop:
--
--        movq        mm0,    [esi+edx*2]
--        psllw       mm0,    8
--
--        psrlw       mm0,    8
--        packuswb    mm0,    mm7
--
--        movd        DWORD Ptr [edi+edx], mm0;
--        add         edx,    4
--
--        cmp         edx,    ecx
--        jl          hs_2_1_loop
--
--    }
-+  const unsigned char *source,
-+  unsigned int source_width,
-+  unsigned char *dest,
-+  unsigned int dest_width
-+) {
-+  (void) dest_width;
-+  (void) source_width;
-+  __asm {
-+    mov         esi,    source
-+    mov         edi,    dest
-+
-+    pxor        mm7,    mm7
-+    mov         ecx,    dest_width
-+
-+    xor         edx,    edx
-+    hs_2_1_loop:
-+
-+    movq        mm0,    [esi+edx*2]
-+    psllw       mm0,    8
-+
-+    psrlw       mm0,    8
-+    packuswb    mm0,    mm7
-+
-+    movd        DWORD Ptr [edi+edx], mm0;
-+    add         edx,    4
-+
-+    cmp         edx,    ecx
-+    jl          hs_2_1_loop
-+
-+  }
- }
- 
- 
- 
- static
--void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
--    (void) dest_pitch;
--    (void) src_pitch;
--    vpx_memcpy(dest, source, dest_width);
-+void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
-+  (void) dest_pitch;
-+  (void) src_pitch;
-+  vpx_memcpy(dest, source, dest_width);
- }
- 
- 
-@@ -1658,91 +1629,88 @@ __declspec(align(16)) const static unsigned short three_sixteenths[] = {  48,  4
- __declspec(align(16)) const static unsigned short ten_sixteenths[]   = { 160, 160, 160, 160 };
- 
- static
--void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
--{
-+void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
- 
--    (void) dest_pitch;
--    __asm
--    {
--        mov         esi,        source
--        mov         edi,        dest
-+  (void) dest_pitch;
-+  __asm {
-+    mov         esi,        source
-+    mov         edi,        dest
- 
--        mov         eax,        src_pitch
--        mov         edx,        dest_width
-+    mov         eax,        src_pitch
-+    mov         edx,        dest_width
- 
--        pxor        mm7,        mm7
--        sub         esi,        eax             //back one line
-+    pxor        mm7,        mm7
-+    sub         esi,        eax             // back one line
- 
- 
--        lea         ecx,        [esi+edx];
--        movq        mm6,        round_values;
-+    lea         ecx,        [esi+edx];
-+    movq        mm6,        round_values;
- 
--        movq        mm5,        three_sixteenths;
--        movq        mm4,        ten_sixteenths;
-+    movq        mm5,        three_sixteenths;
-+    movq        mm4,        ten_sixteenths;
- 
--        vs_2_1_i_loop:
--        movd        mm0,        [esi]           //
--        movd        mm1,        [esi+eax]       //
-+    vs_2_1_i_loop:
-+    movd        mm0,        [esi]           //
-+    movd        mm1,        [esi+eax]       //
- 
--        movd        mm2,        [esi+eax*2]     //
--        punpcklbw   mm0,        mm7
-+    movd        mm2,        [esi+eax*2]     //
-+    punpcklbw   mm0,        mm7
- 
--        pmullw      mm0,        mm5
--        punpcklbw   mm1,        mm7
-+    pmullw      mm0,        mm5
-+    punpcklbw   mm1,        mm7
- 
--        pmullw      mm1,        mm4
--        punpcklbw   mm2,        mm7
-+    pmullw      mm1,        mm4
-+    punpcklbw   mm2,        mm7
- 
--        pmullw      mm2,        mm5
--        paddw       mm0,        round_values
-+    pmullw      mm2,        mm5
-+    paddw       mm0,        round_values
- 
--        paddw       mm1,        mm2
--        paddw       mm0,        mm1
-+    paddw       mm1,        mm2
-+    paddw       mm0,        mm1
- 
--        psrlw       mm0,        8
--        packuswb    mm0,        mm7
-+    psrlw       mm0,        8
-+    packuswb    mm0,        mm7
- 
--        movd        DWORD PTR [edi],        mm0
--        add         esi,        4
-+    movd        DWORD PTR [edi],        mm0
-+    add         esi,        4
- 
--        add         edi,        4;
--        cmp         esi,        ecx
--        jl          vs_2_1_i_loop
-+    add         edi,        4;
-+    cmp         esi,        ecx
-+    jl          vs_2_1_i_loop
- 
--    }
-+  }
- }
- 
- 
- 
- void
--register_mmxscalers(void)
--{
--    vp8_horizontal_line_1_2_scale        = horizontal_line_1_2_scale_mmx;
--    vp8_vertical_band_1_2_scale          = vertical_band_1_2_scale_mmx;
--    vp8_last_vertical_band_1_2_scale      = last_vertical_band_1_2_scale_mmx;
--    vp8_horizontal_line_3_5_scale        = horizontal_line_3_5_scale_mmx;
--    vp8_vertical_band_3_5_scale          = vertical_band_3_5_scale_mmx;
--    vp8_last_vertical_band_3_5_scale      = last_vertical_band_3_5_scale_mmx;
--    vp8_horizontal_line_4_5_scale        = horizontal_line_4_5_scale_mmx;
--    vp8_vertical_band_4_5_scale          = vertical_band_4_5_scale_mmx;
--    vp8_last_vertical_band_4_5_scale      = last_vertical_band_4_5_scale_mmx;
--
--    vp8_horizontal_line_3_4_scale        = vp8cx_horizontal_line_3_4_scale_c;
--    vp8_vertical_band_3_4_scale          = vp8cx_vertical_band_3_4_scale_c;
--    vp8_last_vertical_band_3_4_scale      = vp8cx_last_vertical_band_3_4_scale_c;
--    vp8_horizontal_line_2_3_scale        = vp8cx_horizontal_line_2_3_scale_c;
--    vp8_vertical_band_2_3_scale          = vp8cx_vertical_band_2_3_scale_c;
--    vp8_last_vertical_band_2_3_scale      = vp8cx_last_vertical_band_2_3_scale_c;
--
--
--
--    vp8_vertical_band_5_4_scale           = vertical_band_5_4_scale_mmx;
--    vp8_vertical_band_5_3_scale           = vertical_band_5_3_scale_mmx;
--    vp8_vertical_band_2_1_scale           = vertical_band_2_1_scale_mmx;
--    vp8_vertical_band_2_1_scale_i         = vertical_band_2_1_scale_i_mmx;
--    vp8_horizontal_line_2_1_scale         = horizontal_line_2_1_scale_mmx;
--    vp8_horizontal_line_5_3_scale         = horizontal_line_5_3_scale_mmx;
--    vp8_horizontal_line_5_4_scale         = horizontal_line_5_4_scale_mmx;
-+register_mmxscalers(void) {
-+  vp8_horizontal_line_1_2_scale        = horizontal_line_1_2_scale_mmx;
-+  vp8_vertical_band_1_2_scale          = vertical_band_1_2_scale_mmx;
-+  vp8_last_vertical_band_1_2_scale      = last_vertical_band_1_2_scale_mmx;
-+  vp8_horizontal_line_3_5_scale        = horizontal_line_3_5_scale_mmx;
-+  vp8_vertical_band_3_5_scale          = vertical_band_3_5_scale_mmx;
-+  vp8_last_vertical_band_3_5_scale      = last_vertical_band_3_5_scale_mmx;
-+  vp8_horizontal_line_4_5_scale        = horizontal_line_4_5_scale_mmx;
-+  vp8_vertical_band_4_5_scale          = vertical_band_4_5_scale_mmx;
-+  vp8_last_vertical_band_4_5_scale      = last_vertical_band_4_5_scale_mmx;
-+
-+  vp8_horizontal_line_3_4_scale        = vp8cx_horizontal_line_3_4_scale_c;
-+  vp8_vertical_band_3_4_scale          = vp8cx_vertical_band_3_4_scale_c;
-+  vp8_last_vertical_band_3_4_scale      = vp8cx_last_vertical_band_3_4_scale_c;
-+  vp8_horizontal_line_2_3_scale        = vp8cx_horizontal_line_2_3_scale_c;
-+  vp8_vertical_band_2_3_scale          = vp8cx_vertical_band_2_3_scale_c;
-+  vp8_last_vertical_band_2_3_scale      = vp8cx_last_vertical_band_2_3_scale_c;
-+
-+
-+
-+  vp8_vertical_band_5_4_scale           = vertical_band_5_4_scale_mmx;
-+  vp8_vertical_band_5_3_scale           = vertical_band_5_3_scale_mmx;
-+  vp8_vertical_band_2_1_scale           = vertical_band_2_1_scale_mmx;
-+  vp8_vertical_band_2_1_scale_i         = vertical_band_2_1_scale_i_mmx;
-+  vp8_horizontal_line_2_1_scale         = horizontal_line_2_1_scale_mmx;
-+  vp8_horizontal_line_5_3_scale         = horizontal_line_5_3_scale_mmx;
-+  vp8_horizontal_line_5_4_scale         = horizontal_line_5_4_scale_mmx;
- 
- 
- 
-diff --git a/vpx_scale/win32/scalesystemdependent.c b/vpx_scale/win32/scalesystemdependent.c
-index 19e61c3..98913d1 100644
---- a/vpx_scale/win32/scalesystemdependent.c
-+++ b/vpx_scale/win32/scalesystemdependent.c
-@@ -46,46 +46,42 @@ extern void register_mmxscalers(void);
-  *
-  ****************************************************************************/
- void
--vp8_scale_machine_specific_config(void)
--{
--    // If MMX supported then set to use MMX versions of functions else
--    // use original 'C' versions.
--    int mmx_enabled;
--    int xmm_enabled;
--    int wmt_enabled;
-+vp8_scale_machine_specific_config(void) {
-+  // If MMX supported then set to use MMX versions of functions else
-+  // use original 'C' versions.
-+  int mmx_enabled;
-+  int xmm_enabled;
-+  int wmt_enabled;
- 
--    vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
-+  vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
- 
--    if (mmx_enabled || xmm_enabled || wmt_enabled)
--    {
--        register_mmxscalers();
--    }
--    else
--    {
--        vp8_horizontal_line_1_2_scale        = vp8cx_horizontal_line_1_2_scale_c;
--        vp8_vertical_band_1_2_scale          = vp8cx_vertical_band_1_2_scale_c;
--        vp8_last_vertical_band_1_2_scale      = vp8cx_last_vertical_band_1_2_scale_c;
--        vp8_horizontal_line_3_5_scale        = vp8cx_horizontal_line_3_5_scale_c;
--        vp8_vertical_band_3_5_scale          = vp8cx_vertical_band_3_5_scale_c;
--        vp8_last_vertical_band_3_5_scale      = vp8cx_last_vertical_band_3_5_scale_c;
--        vp8_horizontal_line_3_4_scale        = vp8cx_horizontal_line_3_4_scale_c;
--        vp8_vertical_band_3_4_scale          = vp8cx_vertical_band_3_4_scale_c;
--        vp8_last_vertical_band_3_4_scale      = vp8cx_last_vertical_band_3_4_scale_c;
--        vp8_horizontal_line_2_3_scale        = vp8cx_horizontal_line_2_3_scale_c;
--        vp8_vertical_band_2_3_scale          = vp8cx_vertical_band_2_3_scale_c;
--        vp8_last_vertical_band_2_3_scale      = vp8cx_last_vertical_band_2_3_scale_c;
--        vp8_horizontal_line_4_5_scale        = vp8cx_horizontal_line_4_5_scale_c;
--        vp8_vertical_band_4_5_scale          = vp8cx_vertical_band_4_5_scale_c;
--        vp8_last_vertical_band_4_5_scale      = vp8cx_last_vertical_band_4_5_scale_c;
-+  if (mmx_enabled || xmm_enabled || wmt_enabled) {
-+    register_mmxscalers();
-+  } else {
-+    vp8_horizontal_line_1_2_scale        = vp8cx_horizontal_line_1_2_scale_c;
-+    vp8_vertical_band_1_2_scale          = vp8cx_vertical_band_1_2_scale_c;
-+    vp8_last_vertical_band_1_2_scale      = vp8cx_last_vertical_band_1_2_scale_c;
-+    vp8_horizontal_line_3_5_scale        = vp8cx_horizontal_line_3_5_scale_c;
-+    vp8_vertical_band_3_5_scale          = vp8cx_vertical_band_3_5_scale_c;
-+    vp8_last_vertical_band_3_5_scale      = vp8cx_last_vertical_band_3_5_scale_c;
-+    vp8_horizontal_line_3_4_scale        = vp8cx_horizontal_line_3_4_scale_c;
-+    vp8_vertical_band_3_4_scale          = vp8cx_vertical_band_3_4_scale_c;
-+    vp8_last_vertical_band_3_4_scale      = vp8cx_last_vertical_band_3_4_scale_c;
-+    vp8_horizontal_line_2_3_scale        = vp8cx_horizontal_line_2_3_scale_c;
-+    vp8_vertical_band_2_3_scale          = vp8cx_vertical_band_2_3_scale_c;
-+    vp8_last_vertical_band_2_3_scale      = vp8cx_last_vertical_band_2_3_scale_c;
-+    vp8_horizontal_line_4_5_scale        = vp8cx_horizontal_line_4_5_scale_c;
-+    vp8_vertical_band_4_5_scale          = vp8cx_vertical_band_4_5_scale_c;
-+    vp8_last_vertical_band_4_5_scale      = vp8cx_last_vertical_band_4_5_scale_c;
- 
- 
--        vp8_vertical_band_5_4_scale           = vp8cx_vertical_band_5_4_scale_c;
--        vp8_vertical_band_5_3_scale           = vp8cx_vertical_band_5_3_scale_c;
--        vp8_vertical_band_2_1_scale           = vp8cx_vertical_band_2_1_scale_c;
--        vp8_vertical_band_2_1_scale_i         = vp8cx_vertical_band_2_1_scale_i_c;
--        vp8_horizontal_line_2_1_scale         = vp8cx_horizontal_line_2_1_scale_c;
--        vp8_horizontal_line_5_3_scale         = vp8cx_horizontal_line_5_3_scale_c;
--        vp8_horizontal_line_5_4_scale         = vp8cx_horizontal_line_5_4_scale_c;
-+    vp8_vertical_band_5_4_scale           = vp8cx_vertical_band_5_4_scale_c;
-+    vp8_vertical_band_5_3_scale           = vp8cx_vertical_band_5_3_scale_c;
-+    vp8_vertical_band_2_1_scale           = vp8cx_vertical_band_2_1_scale_c;
-+    vp8_vertical_band_2_1_scale_i         = vp8cx_vertical_band_2_1_scale_i_c;
-+    vp8_horizontal_line_2_1_scale         = vp8cx_horizontal_line_2_1_scale_c;
-+    vp8_horizontal_line_5_3_scale         = vp8cx_horizontal_line_5_3_scale_c;
-+    vp8_horizontal_line_5_4_scale         = vp8cx_horizontal_line_5_4_scale_c;
- 
--    }
-+  }
- }
-diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
-index 800f700..6a8a1fc 100644
---- a/vpx_scale/yv12config.h
-+++ b/vpx_scale/yv12config.h
-@@ -16,54 +16,54 @@ extern "C"
- {
- #endif
- 
--#define VP7BORDERINPIXELS       48
- #define VP8BORDERINPIXELS       32
-+#define VP9BORDERINPIXELS       64
-+#define VP9_INTERP_EXTEND        4
- 
--    /*************************************
--     For INT_YUV:
-+  /*************************************
-+   For INT_YUV:
- 
--     Y = (R+G*2+B)/4;
--     U = (R-B)/2;
--     V =  (G*2 - R - B)/4;
--    And
--     R = Y+U-V;
--     G = Y+V;
--     B = Y-U-V;
--    ************************************/
--    typedef enum
--    {
--        REG_YUV = 0,    /* Regular yuv */
--        INT_YUV = 1     /* The type of yuv that can be tranfer to and from RGB through integer transform */
--              }
--              YUV_TYPE;
-+   Y = (R+G*2+B)/4;
-+   U = (R-B)/2;
-+   V =  (G*2 - R - B)/4;
-+  And
-+   R = Y+U-V;
-+   G = Y+V;
-+   B = Y-U-V;
-+  ************************************/
-+  typedef enum
-+  {
-+    REG_YUV = 0,    /* Regular yuv */
-+    INT_YUV = 1     /* The type of yuv that can be tranfer to and from RGB through integer transform */
-+  }
-+            YUV_TYPE;
- 
--    typedef struct yv12_buffer_config
--    {
--        int   y_width;
--        int   y_height;
--        int   y_stride;
--/*    int   yinternal_width; */
-+  typedef struct yv12_buffer_config {
-+    int   y_width;
-+    int   y_height;
-+    int   y_stride;
-+    /*    int   yinternal_width; */
- 
--        int   uv_width;
--        int   uv_height;
--        int   uv_stride;
--/*    int   uvinternal_width; */
-+    int   uv_width;
-+    int   uv_height;
-+    int   uv_stride;
-+    /*    int   uvinternal_width; */
- 
--        unsigned char *y_buffer;
--        unsigned char *u_buffer;
--        unsigned char *v_buffer;
-+    unsigned char *y_buffer;
-+    unsigned char *u_buffer;
-+    unsigned char *v_buffer;
- 
--        unsigned char *buffer_alloc;
--        int border;
--        int frame_size;
--        YUV_TYPE clrtype;
-+    unsigned char *buffer_alloc;
-+    int border;
-+    int frame_size;
-+    YUV_TYPE clrtype;
- 
--        int corrupted;
--        int flags;
--    } YV12_BUFFER_CONFIG;
-+    int corrupted;
-+    int flags;
-+  } YV12_BUFFER_CONFIG;
- 
--    int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
--    int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
-+  int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
-+  int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
- 
- #ifdef __cplusplus
- }
-diff --git a/vpxdec.c b/vpxdec.c
-index 4482f3d..9b728bf 100644
---- a/vpxdec.c
-+++ b/vpxdec.c
-@@ -52,7 +52,7 @@ static const char *exec_name;
- static const struct
- {
-     char const *name;
--    const vpx_codec_iface_t *iface;
-+    vpx_codec_iface_t *iface;
-     unsigned int             fourcc;
-     unsigned int             fourcc_mask;
- } ifaces[] =
-@@ -152,7 +152,8 @@ static void usage_exit()
-             "write to. If the\n  argument does not include any escape "
-             "characters, the output will be\n  written to a single file. "
-             "Otherwise, the filename will be calculated by\n  expanding "
--            "the following escape characters:\n"
-+            "the following escape characters:\n");
-+    fprintf(stderr,
-             "\n\t%%w   - Frame width"
-             "\n\t%%h   - Frame height"
-             "\n\t%%<n> - Frame number, zero padded to <n> places (1..9)"
-@@ -356,7 +357,7 @@ void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5)
-     }
-     else
-     {
--        if(fwrite(buf, 1, len, out));
-+        (void) fwrite(buf, 1, len, out);
-     }
- }
- 
-@@ -502,7 +503,7 @@ nestegg_seek_cb(int64_t offset, int whence, void * userdata)
-         case NESTEGG_SEEK_CUR: whence = SEEK_CUR; break;
-         case NESTEGG_SEEK_END: whence = SEEK_END; break;
-     };
--    return fseek(userdata, offset, whence)? -1 : 0;
-+    return fseek(userdata, (long)offset, whence)? -1 : 0;
- }
- 
- 
-@@ -559,7 +560,7 @@ webm_guess_framerate(struct input_ctx *input,
-         goto fail;
- 
-     *fps_num = (i - 1) * 1000000;
--    *fps_den = tstamp / 1000;
-+    *fps_den = (unsigned int)(tstamp / 1000);
-     return 0;
- fail:
-     nestegg_destroy(input->nestegg_ctx);
-@@ -580,10 +581,10 @@ file_is_webm(struct input_ctx *input,
-     unsigned int i, n;
-     int          track_type = -1;
- 
--    nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb,
--                     input->infile};
-+    nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0};
-     nestegg_video_params params;
- 
-+    io.userdata = input->infile;
-     if(nestegg_init(&input->nestegg_ctx, io, NULL))
-         goto fail;
- 
-@@ -647,7 +648,7 @@ void generate_filename(const char *pattern, char *out, size_t q_len,
-         {
-             size_t pat_len;
- 
--            // parse the pattern
-+            /* parse the pattern */
-             q[q_len - 1] = '\0';
-             switch(p[1])
-             {
-@@ -677,7 +678,7 @@ void generate_filename(const char *pattern, char *out, size_t q_len,
-         {
-             size_t copy_len;
- 
--            // copy the next segment
-+            /* copy the next segment */
-             if(!next_pat)
-                 copy_len = strlen(p);
-             else
-@@ -922,7 +923,7 @@ int main(int argc, const char **argv_)
-             p = strchr(p, '%');
-             if(p && p[1] >= '1' && p[1] <= '9')
-             {
--                // pattern contains sequence number, so it's not unique.
-+                /* pattern contains sequence number, so it's not unique. */
-                 single_file = 0;
-                 break;
-             }
-@@ -962,7 +963,8 @@ int main(int argc, const char **argv_)
-           That will have to wait until these tools support WebM natively.*/
-         sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
-                 "420jpeg", width, height, fps_num, fps_den, 'p');
--        out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5);
-+        out_put(out, (unsigned char *)buffer,
-+                (unsigned int)strlen(buffer), do_md5);
-     }
- 
-     /* Try to determine the codec from the fourcc. */
-@@ -1040,7 +1042,7 @@ int main(int argc, const char **argv_)
- 
-         vpx_usec_timer_start(&timer);
- 
--        if (vpx_codec_decode(&decoder, buf, buf_sz, NULL, 0))
-+        if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0))
-         {
-             const char *detail = vpx_codec_error_detail(&decoder);
-             fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder));
-@@ -1052,7 +1054,7 @@ int main(int argc, const char **argv_)
-         }
- 
-         vpx_usec_timer_mark(&timer);
--        dx_time += vpx_usec_timer_elapsed(&timer);
-+        dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
- 
-         ++frame_in;
- 
-@@ -1064,9 +1066,14 @@ int main(int argc, const char **argv_)
-         }
-         frames_corrupted += corrupted;
- 
-+        vpx_usec_timer_start(&timer);
-+
-         if ((img = vpx_codec_get_frame(&decoder, &iter)))
-             ++frame_out;
- 
-+        vpx_usec_timer_mark(&timer);
-+        dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
-+
-         if (progress)
-             show_progress(frame_in, frame_out, dx_time);
- 
-diff --git a/vpxenc.c b/vpxenc.c
-index d32b21b..c9547ea 100644
---- a/vpxenc.c
-+++ b/vpxenc.c
-@@ -54,11 +54,7 @@ typedef __int64 off_t;
- #define off_t off64_t
- #endif
- 
--#if defined(_MSC_VER)
--#define LITERALU64(n) n
--#else
--#define LITERALU64(n) n##LLU
--#endif
-+#define LITERALU64(hi,lo) ((((uint64_t)hi)<<32)|lo)
- 
- /* We should use 32-bit file operations in WebM file format
-  * when building ARM executable file (.axf) with RVCT */
-@@ -68,12 +64,28 @@ typedef long off_t;
- #define ftello ftell
- #endif
- 
-+/* Swallow warnings about unused results of fread/fwrite */
-+static size_t wrap_fread(void *ptr, size_t size, size_t nmemb,
-+                         FILE *stream)
-+{
-+    return fread(ptr, size, nmemb, stream);
-+}
-+#define fread wrap_fread
-+
-+static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb,
-+                          FILE *stream)
-+{
-+    return fwrite(ptr, size, nmemb, stream);
-+}
-+#define fwrite wrap_fwrite
-+
-+
- static const char *exec_name;
- 
- static const struct codec_item
- {
-     char const              *name;
--    const vpx_codec_iface_t *iface;
-+    vpx_codec_iface_t       *iface;
-     unsigned int             fourcc;
- } codecs[] =
- {
-@@ -245,7 +257,7 @@ void stats_write(stats_io_t *stats, const void *pkt, size_t len)
- {
-     if (stats->file)
-     {
--        if(fwrite(pkt, 1, len, stats->file));
-+        (void) fwrite(pkt, 1, len, stats->file);
-     }
-     else
-     {
-@@ -338,7 +350,7 @@ static int read_frame(struct input_state *input, vpx_image_t *img)
-              * write_ivf_frame_header() for documentation on the frame header
-              * layout.
-              */
--            if(fread(junk, 1, IVF_FRAME_HDR_SZ, f));
-+            (void) fread(junk, 1, IVF_FRAME_HDR_SZ, f);
-         }
- 
-         for (plane = 0; plane < 3; plane++)
-@@ -468,7 +480,7 @@ static void write_ivf_file_header(FILE *outfile,
-     mem_put_le32(header + 24, frame_cnt);         /* length */
-     mem_put_le32(header + 28, 0);                 /* unused */
- 
--    if(fwrite(header, 1, 32, outfile));
-+    (void) fwrite(header, 1, 32, outfile);
- }
- 
- 
-@@ -482,18 +494,18 @@ static void write_ivf_frame_header(FILE *outfile,
-         return;
- 
-     pts = pkt->data.frame.pts;
--    mem_put_le32(header, pkt->data.frame.sz);
-+    mem_put_le32(header, (int)pkt->data.frame.sz);
-     mem_put_le32(header + 4, pts & 0xFFFFFFFF);
-     mem_put_le32(header + 8, pts >> 32);
- 
--    if(fwrite(header, 1, 12, outfile));
-+    (void) fwrite(header, 1, 12, outfile);
- }
- 
- static void write_ivf_frame_size(FILE *outfile, size_t size)
- {
-     char             header[4];
--    mem_put_le32(header, size);
--    fwrite(header, 1, 4, outfile);
-+    mem_put_le32(header, (int)size);
-+    (void) fwrite(header, 1, 4, outfile);
- }
- 
- 
-@@ -541,13 +553,13 @@ struct EbmlGlobal
- 
- void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
- {
--    if(fwrite(buffer_in, 1, len, glob->stream));
-+    (void) fwrite(buffer_in, 1, len, glob->stream);
- }
- 
- #define WRITE_BUFFER(s) \
- for(i = len-1; i>=0; i--)\
- { \
--    x = *(const s *)buffer_in >> (i * CHAR_BIT); \
-+    x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \
-     Ebml_Write(glob, &x, 1); \
- }
- void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len)
-@@ -597,9 +609,9 @@ static void
- Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc,
-                           unsigned long class_id)
- {
--    //todo this is always taking 8 bytes, this may need later optimization
--    //this is a key that says length unknown
--    uint64_t unknownLen =  LITERALU64(0x01FFFFFFFFFFFFFF);
-+    /* todo this is always taking 8 bytes, this may need later optimization */
-+    /* this is a key that says length unknown */
-+    uint64_t unknownLen = LITERALU64(0x01FFFFFF, 0xFFFFFFFF);
- 
-     Ebml_WriteID(glob, class_id);
-     *ebmlLoc = ftello(glob->stream);
-@@ -617,7 +629,7 @@ Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc)
- 
-     /* Calculate the size of this element */
-     size = pos - *ebmlLoc - 8;
--    size |=  LITERALU64(0x0100000000000000);
-+    size |= LITERALU64(0x01000000,0x00000000);
- 
-     /* Seek back to the beginning of the element and write the new size */
-     fseeko(glob->stream, *ebmlLoc, SEEK_SET);
-@@ -664,7 +676,7 @@ write_webm_seek_info(EbmlGlobal *ebml)
-         Ebml_EndSubElement(ebml, &start);
-     }
-     {
--        //segment info
-+        /* segment info */
-         EbmlLoc startInfo;
-         uint64_t frame_time;
-         char version_string[64];
-@@ -686,7 +698,7 @@ write_webm_seek_info(EbmlGlobal *ebml)
-         Ebml_StartSubElement(ebml, &startInfo, Info);
-         Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
-         Ebml_SerializeFloat(ebml, Segment_Duration,
--                            ebml->last_pts_ms + frame_time);
-+                            (double)(ebml->last_pts_ms + frame_time));
-         Ebml_SerializeString(ebml, 0x4D80, version_string);
-         Ebml_SerializeString(ebml, 0x5741, version_string);
-         Ebml_EndSubElement(ebml, &startInfo);
-@@ -704,16 +716,16 @@ write_webm_file_header(EbmlGlobal                *glob,
-         EbmlLoc start;
-         Ebml_StartSubElement(glob, &start, EBML);
-         Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
--        Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version
--        Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length
--        Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length
--        Ebml_SerializeString(glob, DocType, "webm"); //Doc Type
--        Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version
--        Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version
-+        Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1);
-+        Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4);
-+        Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8);
-+        Ebml_SerializeString(glob, DocType, "webm");
-+        Ebml_SerializeUnsigned(glob, DocTypeVersion, 2);
-+        Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2);
-         Ebml_EndSubElement(glob, &start);
-     }
-     {
--        Ebml_StartSubElement(glob, &glob->startSegment, Segment); //segment
-+        Ebml_StartSubElement(glob, &glob->startSegment, Segment);
-         glob->position_reference = ftello(glob->stream);
-         glob->framerate = *fps;
-         write_webm_seek_info(glob);
-@@ -731,7 +743,7 @@ write_webm_file_header(EbmlGlobal                *glob,
-                 Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
-                 glob->track_id_pos = ftello(glob->stream);
-                 Ebml_SerializeUnsigned32(glob, TrackUID, trackID);
--                Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1
-+                Ebml_SerializeUnsigned(glob, TrackType, 1);
-                 Ebml_SerializeString(glob, CodecID, "V_VP8");
-                 {
-                     unsigned int pixelWidth = cfg->g_w;
-@@ -744,13 +756,13 @@ write_webm_file_header(EbmlGlobal                *glob,
-                     Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
-                     Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt);
-                     Ebml_SerializeFloat(glob, FrameRate, frameRate);
--                    Ebml_EndSubElement(glob, &videoStart); //Video
-+                    Ebml_EndSubElement(glob, &videoStart);
-                 }
--                Ebml_EndSubElement(glob, &start); //Track Entry
-+                Ebml_EndSubElement(glob, &start); /* Track Entry */
-             }
-             Ebml_EndSubElement(glob, &trackStart);
-         }
--        // segment element is open
-+        /* segment element is open */
-     }
- }
- 
-@@ -778,7 +790,7 @@ write_webm_block(EbmlGlobal                *glob,
-     if(pts_ms - glob->cluster_timecode > SHRT_MAX)
-         start_cluster = 1;
-     else
--        block_timecode = pts_ms - glob->cluster_timecode;
-+        block_timecode = (unsigned short)pts_ms - glob->cluster_timecode;
- 
-     is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY);
-     if(start_cluster || is_keyframe)
-@@ -789,9 +801,9 @@ write_webm_block(EbmlGlobal                *glob,
-         /* Open the new cluster */
-         block_timecode = 0;
-         glob->cluster_open = 1;
--        glob->cluster_timecode = pts_ms;
-+        glob->cluster_timecode = (uint32_t)pts_ms;
-         glob->cluster_pos = ftello(glob->stream);
--        Ebml_StartSubElement(glob, &glob->startCluster, Cluster); //cluster
-+        Ebml_StartSubElement(glob, &glob->startCluster, Cluster); /* cluster */
-         Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode);
- 
-         /* Save a cue point if this is a keyframe. */
-@@ -816,7 +828,7 @@ write_webm_block(EbmlGlobal                *glob,
-     /* Write the Simple Block */
-     Ebml_WriteID(glob, SimpleBlock);
- 
--    block_length = pkt->data.frame.sz + 4;
-+    block_length = (unsigned long)pkt->data.frame.sz + 4;
-     block_length |= 0x10000000;
-     Ebml_Serialize(glob, &block_length, sizeof(block_length), 4);
- 
-@@ -833,7 +845,7 @@ write_webm_block(EbmlGlobal                *glob,
-         flags |= 0x08;
-     Ebml_Write(glob, &flags, 1);
- 
--    Ebml_Write(glob, pkt->data.frame.buf, pkt->data.frame.sz);
-+    Ebml_Write(glob, pkt->data.frame.buf, (unsigned long)pkt->data.frame.sz);
- }
- 
- 
-@@ -865,7 +877,6 @@ write_webm_file_footer(EbmlGlobal *glob, long hash)
-                 Ebml_SerializeUnsigned(glob, CueTrack, 1);
-                 Ebml_SerializeUnsigned64(glob, CueClusterPosition,
-                                          cue->loc - glob->position_reference);
--                //Ebml_SerializeUnsigned(glob, CueBlockNumber, cue->blockNumber);
-                 Ebml_EndSubElement(glob, &start);
-             }
-             Ebml_EndSubElement(glob, &start);
-@@ -942,7 +953,7 @@ static double vp8_mse2psnr(double Samples, double Peak, double Mse)
-     if ((double)Mse > 0.0)
-         psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
-     else
--        psnr = 60;      // Limit to prevent / 0
-+        psnr = 60;      /* Limit to prevent / 0 */
- 
-     if (psnr > 60)
-         psnr = 60;
-@@ -978,6 +989,8 @@ static const arg_def_t good_dl          = ARG_DEF(NULL, "good", 0,
-         "Use Good Quality Deadline");
- static const arg_def_t rt_dl            = ARG_DEF(NULL, "rt", 0,
-         "Use Realtime Quality Deadline");
-+static const arg_def_t quietarg         = ARG_DEF("q", "quiet", 0,
-+        "Do not print encode progress");
- static const arg_def_t verbosearg       = ARG_DEF("v", "verbose", 0,
-         "Show encoder parameters");
- static const arg_def_t psnrarg          = ARG_DEF(NULL, "psnr", 0,
-@@ -997,7 +1010,7 @@ static const arg_def_t *main_args[] =
-     &debugmode,
-     &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline,
-     &best_dl, &good_dl, &rt_dl,
--    &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n,
-+    &quietarg, &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n,
-     NULL
- };
- 
-@@ -1225,7 +1238,7 @@ static int merge_hist_buckets(struct hist_bucket *bucket,
-     {
-         int last_bucket = buckets - 1;
- 
--        // merge the small bucket with an adjacent one.
-+        /* merge the small bucket with an adjacent one. */
-         if(small_bucket == 0)
-             merge_bucket = 1;
-         else if(small_bucket == last_bucket)
-@@ -1325,7 +1338,7 @@ static void show_histogram(const struct hist_bucket *bucket,
-         int j;
-         float pct;
- 
--        pct = 100.0 * (float)bucket[i].count / (float)total;
-+        pct = (float)(100.0 * bucket[i].count / total);
-         len = HIST_BAR_MAX * bucket[i].count / scale;
-         if(len < 1)
-             len = 1;
-@@ -1393,7 +1406,7 @@ static void init_rate_histogram(struct rate_hist          *hist,
-      */
-     hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;
- 
--    // prevent division by zero
-+    /* prevent division by zero */
-     if (hist->samples == 0)
-       hist->samples=1;
- 
-@@ -1427,7 +1440,7 @@ static void update_rate_histogram(struct rate_hist          *hist,
- 
-     idx = hist->frames++ % hist->samples;
-     hist->pts[idx] = now;
--    hist->sz[idx] = pkt->data.frame.sz;
-+    hist->sz[idx] = (int)pkt->data.frame.sz;
- 
-     if(now < cfg->rc_buf_initial_sz)
-         return;
-@@ -1449,15 +1462,15 @@ static void update_rate_histogram(struct rate_hist          *hist,
-         return;
- 
-     avg_bitrate = sum_sz * 8 * 1000 / (now - then);
--    idx = avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000);
-+    idx = (int)(avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000));
-     if(idx < 0)
-         idx = 0;
-     if(idx > RATE_BINS-1)
-         idx = RATE_BINS-1;
-     if(hist->bucket[idx].low > avg_bitrate)
--        hist->bucket[idx].low = avg_bitrate;
-+        hist->bucket[idx].low = (int)avg_bitrate;
-     if(hist->bucket[idx].high < avg_bitrate)
--        hist->bucket[idx].high = avg_bitrate;
-+        hist->bucket[idx].high = (int)avg_bitrate;
-     hist->bucket[idx].count++;
-     hist->total++;
- }
-@@ -1495,6 +1508,7 @@ struct global_config
-     int                       usage;
-     int                       deadline;
-     int                       use_i420;
-+    int                       quiet;
-     int                       verbose;
-     int                       limit;
-     int                       show_psnr;
-@@ -1619,6 +1633,8 @@ static void parse_global_config(struct global_config *global, char **argv)
-             global->use_i420 = 0;
-         else if (arg_match(&arg, &use_i420, argi))
-             global->use_i420 = 1;
-+        else if (arg_match(&arg, &quietarg, argi))
-+            global->quiet = 1;
-         else if (arg_match(&arg, &verbosearg, argi))
-             global->verbose = 1;
-         else if (arg_match(&arg, &limit, argi))
-@@ -2000,7 +2016,7 @@ static void set_default_kf_interval(struct stream_state  *stream,
-     {
-         double framerate = (double)global->framerate.num/global->framerate.den;
-         if (framerate > 0.0)
--            stream->config.cfg.kf_max_dist = 5.0*framerate;
-+            stream->config.cfg.kf_max_dist = (unsigned int)(5.0*framerate);
-     }
- }
- 
-@@ -2180,7 +2196,7 @@ static void encode_frame(struct stream_state  *stream,
-                         / cfg->g_timebase.num / global->framerate.num;
-     vpx_usec_timer_start(&timer);
-     vpx_codec_encode(&stream->encoder, img, frame_start,
--                     next_frame_start - frame_start,
-+                     (unsigned long)(next_frame_start - frame_start),
-                      0, global->deadline);
-     vpx_usec_timer_mark(&timer);
-     stream->cx_time += vpx_usec_timer_elapsed(&timer);
-@@ -2224,8 +2240,9 @@ static void get_cx_data(struct stream_state  *stream,
-             {
-                 stream->frames_out++;
-             }
--            fprintf(stderr, " %6luF",
--                    (unsigned long)pkt->data.frame.sz);
-+            if (!global->quiet)
-+                fprintf(stderr, " %6luF",
-+                        (unsigned long)pkt->data.frame.sz);
- 
-             update_rate_histogram(&stream->rate_hist, cfg, pkt);
-             if(stream->config.write_webm)
-@@ -2233,7 +2250,8 @@ static void get_cx_data(struct stream_state  *stream,
-                 /* Update the hash */
-                 if(!stream->ebml.debug)
-                     stream->hash = murmur(pkt->data.frame.buf,
--                                          pkt->data.frame.sz, stream->hash);
-+                                          (int)pkt->data.frame.sz,
-+                                          stream->hash);
- 
-                 write_webm_block(&stream->ebml, cfg, pkt);
-             }
-@@ -2259,15 +2277,16 @@ static void get_cx_data(struct stream_state  *stream,
-                     }
-                 }
- 
--                fwrite(pkt->data.frame.buf, 1,
--                       pkt->data.frame.sz, stream->file);
-+                (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-+                              stream->file);
-             }
-             stream->nbytes += pkt->data.raw.sz;
-             break;
-         case VPX_CODEC_STATS_PKT:
-             stream->frames_out++;
--            fprintf(stderr, " %6luS",
--                   (unsigned long)pkt->data.twopass_stats.sz);
-+            if (!global->quiet)
-+                fprintf(stderr, " %6luS",
-+                       (unsigned long)pkt->data.twopass_stats.sz);
-             stats_write(&stream->stats,
-                         pkt->data.twopass_stats.buf,
-                         pkt->data.twopass_stats.sz);
-@@ -2283,7 +2302,8 @@ static void get_cx_data(struct stream_state  *stream,
-                 stream->psnr_samples_total += pkt->data.psnr.samples[0];
-                 for (i = 0; i < 4; i++)
-                 {
--                    fprintf(stderr, "%.3lf ", pkt->data.psnr.psnr[i]);
-+                    if (!global->quiet)
-+                        fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]);
-                     stream->psnr_totals[i] += pkt->data.psnr.psnr[i];
-                 }
-                 stream->psnr_count++;
-@@ -2306,13 +2326,13 @@ static void show_psnr(struct stream_state  *stream)
-         return;
- 
-     fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index);
--    ovpsnr = vp8_mse2psnr(stream->psnr_samples_total, 255.0,
--                          stream->psnr_sse_total);
--    fprintf(stderr, " %.3lf", ovpsnr);
-+    ovpsnr = vp8_mse2psnr((double)stream->psnr_samples_total, 255.0,
-+                          (double)stream->psnr_sse_total);
-+    fprintf(stderr, " %.3f", ovpsnr);
- 
-     for (i = 0; i < 4; i++)
-     {
--        fprintf(stderr, " %.3lf", stream->psnr_totals[i]/stream->psnr_count);
-+        fprintf(stderr, " %.3f", stream->psnr_totals[i]/stream->psnr_count);
-     }
-     fprintf(stderr, "\n");
- }
-@@ -2320,7 +2340,7 @@ static void show_psnr(struct stream_state  *stream)
- 
- float usec_to_fps(uint64_t usec, unsigned int frames)
- {
--    return usec > 0 ? (float)frames * 1000000.0 / (float)usec : 0;
-+    return (float)(usec > 0 ? frames * 1000000.0 / (float)usec : 0);
- }
- 
- 
-@@ -2437,7 +2457,7 @@ int main(int argc, const char **argv_)
-                 vpx_img_alloc(&raw,
-                               input.use_i420 ? VPX_IMG_FMT_I420
-                                              : VPX_IMG_FMT_YV12,
--                              input.w, input.h, 1);
-+                              input.w, input.h, 32);
- 
-             FOREACH_STREAM(init_rate_histogram(&stream->rate_hist,
-                                                &stream->config.cfg,
-@@ -2462,18 +2482,21 @@ int main(int argc, const char **argv_)
-                 if (frame_avail)
-                     frames_in++;
- 
--                if(stream_cnt == 1)
--                    fprintf(stderr,
--                            "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K",
--                            pass + 1, global.passes, frames_in,
--                            streams->frames_out, (int64_t)streams->nbytes);
--                else
--                    fprintf(stderr,
--                            "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K",
--                            pass + 1, global.passes, frames_in,
--                            cx_time > 9999999 ? cx_time / 1000 : cx_time,
--                            cx_time > 9999999 ? "ms" : "us",
--                            usec_to_fps(cx_time, frames_in));
-+                if (!global.quiet)
-+                {
-+                    if(stream_cnt == 1)
-+                        fprintf(stderr,
-+                                "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K",
-+                                pass + 1, global.passes, frames_in,
-+                                streams->frames_out, (int64_t)streams->nbytes);
-+                    else
-+                        fprintf(stderr,
-+                                "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K",
-+                                pass + 1, global.passes, frames_in,
-+                                cx_time > 9999999 ? cx_time / 1000 : cx_time,
-+                                cx_time > 9999999 ? "ms" : "us",
-+                                usec_to_fps(cx_time, frames_in));
-+                }
- 
-             }
-             else
-@@ -2484,7 +2507,7 @@ int main(int argc, const char **argv_)
-                                         frame_avail ? &raw : NULL,
-                                         frames_in));
-             vpx_usec_timer_mark(&timer);
--            cx_time += vpx_usec_timer_elapsed(&timer);
-+            cx_time += (unsigned long)vpx_usec_timer_elapsed(&timer);
- 
-             FOREACH_STREAM(update_quantizer_histogram(stream));
- 
-@@ -2497,20 +2520,21 @@ int main(int argc, const char **argv_)
-         if(stream_cnt > 1)
-             fprintf(stderr, "\n");
- 
--        FOREACH_STREAM(fprintf(
--            stderr,
--            "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s"
--            " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1,
--            global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,
--            frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0,
--            frames_in ? (int64_t)stream->nbytes * 8
--                        * (int64_t)global.framerate.num / global.framerate.den
--                        / frames_in
--                      : 0,
--            stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,
--            stream->cx_time > 9999999 ? "ms" : "us",
--            usec_to_fps(stream->cx_time, frames_in));
--        );
-+        if (!global.quiet)
-+            FOREACH_STREAM(fprintf(
-+                stderr,
-+                "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s"
-+                " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1,
-+                global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,
-+                frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0,
-+                frames_in ? (int64_t)stream->nbytes * 8
-+                            * (int64_t)global.framerate.num / global.framerate.den
-+                            / frames_in
-+                          : 0,
-+                stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,
-+                stream->cx_time > 9999999 ? "ms" : "us",
-+                usec_to_fps(stream->cx_time, frames_in));
-+            );
- 
-         if (global.show_psnr)
-             FOREACH_STREAM(show_psnr(stream));
-diff --git a/y4minput.c b/y4minput.c
-index dd51421..ff9ffbc 100644
---- a/y4minput.c
-+++ b/y4minput.c
-@@ -662,7 +662,7 @@ int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){
-       _nskip--;
-     }
-     else{
--      ret=fread(buffer+i,1,1,_fin);
-+      ret=(int)fread(buffer+i,1,1,_fin);
-       if(ret<1)return -1;
-     }
-     if(buffer[i]=='\n')break;
-@@ -818,7 +818,7 @@ int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){
-   int  c_sz;
-   int  ret;
-   /*Read and skip the frame header.*/
--  ret=fread(frame,1,6,_fin);
-+  ret=(int)fread(frame,1,6,_fin);
-   if(ret<6)return 0;
-   if(memcmp(frame,"FRAME",5)){
-     fprintf(stderr,"Loss of framing in Y4M input data\n");