diff --git a/.gitignore b/.gitignore index 110146d..4074b0b 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,8 @@ /ivfdec.dox /ivfenc /ivfenc.dox +/libvpx.so* +/libvpx.ver /obj_int_extract /postproc /postproc.c @@ -43,6 +45,7 @@ /simple_encoder /simple_encoder.c /simple_encoder.dox +/test_libvpx /twopass_encoder /twopass_encoder.c /twopass_encoder.dox @@ -55,7 +58,14 @@ /vp8cx_set_ref /vp8cx_set_ref.c /vp8cx_set_ref.dox +/vpx.pc /vpx_config.c /vpx_config.h +/vpx_rtcd.h /vpx_version.h +/vpxdec +/vpxenc TAGS +.cproject +.project +.settings diff --git a/CHANGELOG b/CHANGELOG index dcb9f73..ef64a96 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,32 @@ +2012-12-21 v1.2.0 + This release acts as a checkpoint for a large amount of internal refactoring + and testing. It also contains a number of small bugfixes, so all users are + encouraged to upgrade. + + - Upgrading: + This release is ABI and API compatible with Duclair (v1.0.0). Users + of older releases should refer to the Upgrading notes in this + document for that release. + + - Enhancements: + VP8 optimizations for MIPS dspr2 + vpxenc: add -quiet option + + - Speed: + Encoder and decoder speed is consistent with the Eider release. + + - Quality: + In general, quality is consistent with the Eider release. + + Minor tweaks to ARNR filtering + Minor improvements to real time encoding with multiple temporal layers + + - Bug Fixes: + Fixes multithreaded encoder race condition in loopfilter + Fixes multi-resolution threaded encoding + Fix potential encoder dead-lock after picture resize + + 2012-05-09 v1.1.0 "Eider" This introduces a number of enhancements, mostly focused on real-time encoding. In addition, it fixes a decoder bug (first introduced in diff --git a/README b/README index 0dfb0fe..0475dad 100644 --- a/README +++ b/README @@ -1,5 +1,5 @@ vpx Multi-Format Codec SDK -README - 19 May 2010 +README - 21 June 2012 Welcome to the WebM VP8 Codec SDK! @@ -15,11 +15,19 @@ COMPILING THE APPLICATIONS/LIBRARIES: * Building the documentation requires PHP[3] and Doxygen[4]. If you do not have these packages, you must pass --disable-install-docs to the configure script. + * Downloading the data for the unit tests requires curl[5] and sha1sum. + sha1sum is provided via the GNU coreutils, installed by default on + many *nix platforms, as well as MinGW and Cygwin. If coreutils is not + available, a compatible version of sha1sum can be built from + source[6]. These requirements are optional if not running the unit + tests. [1]: http://www.tortall.net/projects/yasm [2]: http://www.cygwin.com [3]: http://php.net [4]: http://www.doxygen.org + [5]: http://curl.haxx.se + [6]: http://www.microbrew.org/tools/md5sha1sum/ 2. Out-of-tree builds Out of tree builds are a supported method of building the application. For @@ -94,5 +102,5 @@ COMPILING THE APPLICATIONS/LIBRARIES: SUPPORT This library is an open source project supported by its community. Please - please email webm-users@webmproject.org for help. + please email webm-discuss@webmproject.org for help. diff --git a/build/make/Android.mk b/build/make/Android.mk index 6fcd4ae..c6b9cf9 100644 --- a/build/make/Android.mk +++ b/build/make/Android.mk @@ -27,15 +27,22 @@ # Android.mk file in the libvpx directory: # LOCAL_PATH := $(call my-dir) # include $(CLEAR_VARS) -# include libvpx/build/make/Android.mk +# include jni/libvpx/build/make/Android.mk # # There are currently two TARGET_ARCH_ABI targets for ARM. # armeabi and armeabi-v7a. armeabi-v7a is selected by creating an # Application.mk in the jni directory that contains: # APP_ABI := armeabi-v7a # +# By default libvpx will detect at runtime the existance of NEON extension. +# For this we import the 'cpufeatures' module from the NDK sources. +# libvpx can also be configured without this runtime detection method. +# Configuring with --disable-runtime-cpu-detect will assume presence of NEON. +# Configuring with --disable-runtime-cpu-detect --disable-neon will remove any +# NEON dependency. + # To change to building armeabi, run ./libvpx/configure again, but with -# --target=arm5te-android-gcc and and modify the Application.mk file to +# --target=arm5te-android-gcc and modify the Application.mk file to # set APP_ABI := armeabi # # Running ndk-build will build libvpx and include it in your project. @@ -166,7 +173,9 @@ LOCAL_MODULE := libvpx LOCAL_LDLIBS := -llog -LOCAL_STATIC_LIBRARIES := cpufeatures +ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) + LOCAL_STATIC_LIBRARIES := cpufeatures +endif $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_rtcd.h @@ -196,4 +205,7 @@ ifeq ($(CONFIG_VP8_ENCODER), yes) $(LIBVPX_PATH)/vp8/encoder/asm_enc_offsets.c)) endif +ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) $(call import-module,cpufeatures) +endif + diff --git a/build/make/Makefile b/build/make/Makefile index b6cf320..1088c84 100644 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -21,6 +21,7 @@ all: .DEFAULT clean:: .DEFAULT install:: .DEFAULT test:: .DEFAULT +testdata:: .DEFAULT # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be @@ -66,6 +67,7 @@ endif BUILD_ROOT?=. VPATH=$(SRC_PATH_BARE) CFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH) +CXXFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH) ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT)/ -I$(SRC_PATH)/ DIST_DIR?=dist HOSTCC?=gcc @@ -98,6 +100,8 @@ dist: install:: .PHONY: test test:: +.PHONY: testdata +testdata:: $(BUILD_PFX)%.c.d: %.c $(if $(quiet),@echo " [DEP] $@") @@ -111,11 +115,11 @@ $(BUILD_PFX)%.c.o: %.c $(BUILD_PFX)%.cc.d: %.cc $(if $(quiet),@echo " [DEP] $@") $(qexec)mkdir -p $(dir $@) - $(qexec)g++ $(INTERNAL_CFLAGS) $(CFLAGS) -M $< | $(fmt_deps) > $@ + $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -M $< | $(fmt_deps) > $@ $(BUILD_PFX)%.cc.o: %.cc $(if $(quiet),@echo " [CXX] $@") - $(qexec)g++ $(INTERNAL_CFLAGS) $(CFLAGS) -c -o $@ $< + $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $< $(BUILD_PFX)%.asm.d: %.asm $(if $(quiet),@echo " [DEP] $@") @@ -213,7 +217,7 @@ define linkerxx_template $(1): $(filter-out -%,$(2)) $(1): $(if $(quiet),@echo " [LD] $$@") - $(qexec)g++ $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs)) + $(qexec)$$(CXX) $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs)) endef # make-3.80 has a bug with expanding large input strings to the eval function, # which was triggered in some cases by the following component of diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl index c55ed0f..95be467 100755 --- a/build/make/ads2gas.pl +++ b/build/make/ads2gas.pl @@ -26,12 +26,22 @@ print "\t.equ DO1STROUNDING, 0\n"; while () { + undef $comment; + undef $line; + $comment_char = ";"; + $comment_sub = "@"; + + # Handle comments. + if (/$comment_char/) + { + $comment = ""; + ($line, $comment) = /(.*?)$comment_char(.*)/; + $_ = $line; + } + # Load and store alignment s/@/,:/g; - # Comment character - s/;/@/g; - # Hexadecimal constants prefaced by 0x s/#&/#0x/g; @@ -51,16 +61,27 @@ while () s/:SHR:/ >> /g; # Convert ELSE to .else - s/ELSE/.else/g; + s/\bELSE\b/.else/g; # Convert ENDIF to .endif - s/ENDIF/.endif/g; + s/\bENDIF\b/.endif/g; # Convert ELSEIF to .elseif - s/ELSEIF/.elseif/g; + s/\bELSEIF\b/.elseif/g; # Convert LTORG to .ltorg - s/LTORG/.ltorg/g; + s/\bLTORG\b/.ltorg/g; + + # Convert endfunc to nothing. + s/\bendfunc\b//ig; + + # Convert FUNCTION to nothing. + s/\bFUNCTION\b//g; + s/\bfunction\b//g; + + s/\bENTRY\b//g; + s/\bMSARMASM\b/0/g; + s/^\s+end\s+$//g; # Convert IF :DEF:to .if # gcc doesn't have the ability to do a conditional @@ -106,6 +127,7 @@ while () if (s/RN\s+([Rr]\d+|lr)/.req $1/) { print; + print "$comment_sub$comment\n" if defined $comment; next; } @@ -114,6 +136,9 @@ while () s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/; s/IMPORT\s+\|([\$\w]*)\|/.global $1/; + s/EXPORT\s+([\$\w]*)/.global $1/; + s/export\s+([\$\w]*)/.global $1/; + # No vertical bars required; make additional symbol with prepended # underscore s/^\|(\$?\w+)\|/_$1\n\t$1:/g; @@ -124,11 +149,19 @@ while () s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/; # ALIGN directive - s/ALIGN/.balign/g; + s/\bALIGN\b/.balign/g; # ARM code s/\sARM/.arm/g; + # push/pop + s/(push\s+)(r\d+)/stmdb sp\!, \{$2\}/g; + s/(pop\s+)(r\d+)/ldmia sp\!, \{$2\}/g; + + # NEON code + s/(vld1.\d+\s+)(q\d+)/$1\{$2\}/g; + s/(vtbl.\d+\s+[^,]+),([^,]+)/$1,\{$2\}/g; + # eabi_attributes numerical equivalents can be found in the # "ARM IHI 0045C" document. @@ -157,10 +190,10 @@ while () } # EQU directive - s/(.*)EQU(.*)/.equ $1, $2/; + s/(\S+\s+)EQU(\s+\S+)/.equ $1, $2/; # Begin macro definition - if (/MACRO/) { + if (/\bMACRO\b/) { $_ = ; s/^/.macro/; s/\$//g; # remove formal param reference @@ -169,9 +202,10 @@ while () # For macros, use \ to reference formal params s/\$/\\/g; # End macro definition - s/MEND/.endm/; # No need to tell it where to stop assembling + s/\bMEND\b/.endm/; # No need to tell it where to stop assembling next if /^\s*END\s*$/; print; + print "$comment_sub$comment\n" if defined $comment; } # Mark that this object doesn't need an executable stack. diff --git a/build/make/configure.sh b/build/make/configure.sh index 3c772e5..c99a01c 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -166,6 +166,17 @@ is_in(){ add_cflags() { CFLAGS="${CFLAGS} $@" + CXXFLAGS="${CXXFLAGS} $@" +} + + +add_cflags_only() { + CFLAGS="${CFLAGS} $@" +} + + +add_cxxflags_only() { + CXXFLAGS="${CXXFLAGS} $@" } @@ -277,6 +288,13 @@ check_cc() { check_cmd ${CC} ${CFLAGS} "$@" -c -o ${TMP_O} ${TMP_C} } +check_cxx() { + log check_cxx "$@" + cat >${TMP_C} + log_file ${TMP_C} + check_cmd ${CXX} ${CXXFLAGS} "$@" -c -o ${TMP_O} ${TMP_C} +} + check_cpp() { log check_cpp "$@" cat > ${TMP_C} @@ -310,8 +328,25 @@ int x; EOF } +check_cxxflags() { + log check_cxxflags "$@" + + # Catch CFLAGS that trigger CXX warnings + case "$CXX" in + *g++*) check_cxx -Werror "$@" <> $1 << EOF @@ -379,6 +416,7 @@ TOOLCHAIN=${toolchain} ASM_CONVERSION=${asm_conversion_cmd:-${source_path}/build/make/ads2gas.pl} CC=${CC} +CXX=${CXX} AR=${AR} LD=${LD} AS=${AS} @@ -386,6 +424,7 @@ STRIP=${STRIP} NM=${NM} CFLAGS = ${CFLAGS} +CXXFLAGS = ${CXXFLAGS} ARFLAGS = -rus\$(if \$(quiet),c,v) LDFLAGS = ${LDFLAGS} ASFLAGS = ${ASFLAGS} @@ -538,6 +577,7 @@ post_process_cmdline() { setup_gnu_toolchain() { CC=${CC:-${CROSS}gcc} + CXX=${CXX:-${CROSS}g++} AR=${AR:-${CROSS}ar} LD=${LD:-${CROSS}${link_with_cc:-ld}} AS=${AS:-${CROSS}as} @@ -549,10 +589,19 @@ setup_gnu_toolchain() { process_common_toolchain() { if [ -z "$toolchain" ]; then - gcctarget="$(gcc -dumpmachine 2> /dev/null)" + gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}" # detect tgt_isa case "$gcctarget" in + armv6*) + tgt_isa=armv6 + ;; + armv7*) + tgt_isa=armv7 + ;; + armv5te*) + tgt_isa=armv5te + ;; *x86_64*|*amd64*) tgt_isa=x86_64 ;; @@ -718,6 +767,7 @@ process_common_toolchain() { ;; armv5te) soft_enable edsp + disable fast_unaligned ;; esac @@ -733,17 +783,23 @@ process_common_toolchain() { check_add_asflags --defsym ARCHITECTURE=${arch_int} tune_cflags="-mtune=" if [ ${tgt_isa} == "armv7" ]; then + check_add_cflags -march=armv7-a -mfloat-abi=softfp + check_add_asflags -march=armv7-a -mfloat-abi=softfp + if enabled neon then check_add_cflags -mfpu=neon #-ftree-vectorize check_add_asflags -mfpu=neon fi - check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfloat-abi=softfp - check_add_asflags -mcpu=cortex-a8 -mfloat-abi=softfp #-march=armv7-a + + if [ -z "${tune_cpu}" ]; then + tune_cpu=cortex-a8 + fi else check_add_cflags -march=${tgt_isa} check_add_asflags -march=${tgt_isa} fi + enabled debug && add_asflags -g asm_conversion_cmd="${source_path}/build/make/ads2gas.pl" ;; @@ -792,6 +848,7 @@ process_common_toolchain() { -name "arm-linux-androideabi-gcc*" -print -quit` TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi- CC=${TOOLCHAIN_PATH}gcc + CXX=${TOOLCHAIN_PATH}g++ AR=${TOOLCHAIN_PATH}ar LD=${TOOLCHAIN_PATH}gcc AS=${TOOLCHAIN_PATH}as @@ -810,12 +867,17 @@ process_common_toolchain() { add_cflags "--sysroot=${alt_libc}" add_ldflags "--sysroot=${alt_libc}" - add_cflags "-I${SDK_PATH}/sources/android/cpufeatures/" + # linker flag that routes around a CPU bug in some + # Cortex-A8 implementations (NDK Dev Guide) + add_ldflags "-Wl,--fix-cortex-a8" enable pic soft_enable realtime_only if [ ${tgt_isa} == "armv7" ]; then - enable runtime_cpu_detect + soft_enable runtime_cpu_detect + fi + if enabled runtime_cpu_detect; then + add_cflags "-I${SDK_PATH}/sources/android/cpufeatures" fi ;; @@ -827,6 +889,7 @@ process_common_toolchain() { SDK_PATH=${sdk_path} fi TOOLCHAIN_PATH=${SDK_PATH}/usr/bin + CXX=${TOOLCHAIN_PATH}/g++ CC=${TOOLCHAIN_PATH}/gcc AR=${TOOLCHAIN_PATH}/ar LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-llvm-gcc-4.2 @@ -890,13 +953,16 @@ process_common_toolchain() { esac ;; mips*) - CROSS=${CROSS:-mipsel-linux-uclibc-} link_with_cc=gcc setup_gnu_toolchain tune_cflags="-mtune=" + if enabled dspr2; then + check_add_cflags -mips32r2 -mdspr2 + disable fast_unaligned + fi check_add_cflags -march=${tgt_isa} - check_add_asflags -march=${tgt_isa} - check_add_asflags -KPIC + check_add_asflags -march=${tgt_isa} + check_add_asflags -KPIC ;; ppc*) enable ppc @@ -924,6 +990,11 @@ process_common_toolchain() { x86*) bits=32 enabled x86_64 && bits=64 + check_cpp </dev/null 2>&1 && AS=yasm [ "${AS}" = auto -o -z "${AS}" ] \ && die "Neither yasm nor nasm have been found" - ;; + ;; esac log_echo " using $AS" [ "${AS##*/}" = nasm ] && add_asflags -Ox @@ -1065,7 +1143,7 @@ process_common_toolchain() { # Work around longjmp interception on glibc >= 2.11, to improve binary # compatibility. See http://code.google.com/p/webm/issues/detail?id=166 - enabled linux && check_add_cflags -D_FORTIFY_SOURCE=0 + enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 # Check for strip utility variant ${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip @@ -1080,12 +1158,24 @@ EOF # Almost every platform uses pthreads. if enabled multithread; then case ${toolchain} in - *-win*);; + *-win*-vs*);; *-android-gcc);; *) check_header pthread.h && add_extralibs -lpthread esac fi + # only for MIPS platforms + case ${toolchain} in + mips*) + if enabled dspr2; then + if enabled big_endian; then + echo "dspr2 optimizations are available only for little endian platforms" + disable dspr2 + fi + fi + ;; + esac + # for sysconf(3) and friends. check_header unistd.h diff --git a/build/make/gen_asm_deps.sh b/build/make/gen_asm_deps.sh index 717f870..0b4e3aa 100755 --- a/build/make/gen_asm_deps.sh +++ b/build/make/gen_asm_deps.sh @@ -42,7 +42,7 @@ done [ -n "$srcfile" ] || show_help sfx=${sfx:-asm} -includes=$(LC_ALL=C egrep -i "include +\"?+[a-z0-9_/]+\.${sfx}" $srcfile | +includes=$(LC_ALL=C egrep -i "include +\"?[a-z0-9_/]+\.${sfx}" $srcfile | perl -p -e "s;.*?([a-z0-9_/]+.${sfx}).*;\1;") #" restore editor state for inc in ${includes}; do diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c index 04e14a6..bf317bd 100644 --- a/build/make/obj_int_extract.c +++ b/build/make/obj_int_extract.c @@ -680,7 +680,7 @@ int parse_coff(uint8_t *buf, size_t sz) uint32_t symoffset; char **sectionlist; //this array holds all section names in their correct order. - //it is used to check if the symbol is in .bss or .data section. + //it is used to check if the symbol is in .bss or .rdata section. nsections = get_le16(buf + 2); symtab_ptr = get_le32(buf + 8); @@ -725,15 +725,15 @@ int parse_coff(uint8_t *buf, size_t sz) } strcpy(sectionlist[i], sectionname); - if (!strcmp(sectionname, ".data")) sectionrawdata_ptr = get_le32(ptr + 20); + if (!strcmp(sectionname, ".rdata")) sectionrawdata_ptr = get_le32(ptr + 20); ptr += 40; } //log_msg("COFF: Symbol table at offset %u\n", symtab_ptr); - //log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr); + //log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr); - /* The compiler puts the data with non-zero offset in .data section, but puts the data with + /* The compiler puts the data with non-zero offset in .rdata section, but puts the data with zero offset in .bss section. So, if the data in in .bss section, set offset=0. Note from Wiki: In an object module compiled from C, the bss section contains the local variables (but not functions) that were declared with the static keyword, diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh index 1dffde5..ddf9e09 100755 --- a/build/make/rtcd.sh +++ b/build/make/rtcd.sh @@ -211,6 +211,8 @@ common_top() { $(process_forward_decls) $(declare_function_pointers c $ALL_ARCHS) + +void ${symbol:-rtcd}(void); EOF } @@ -231,11 +233,10 @@ x86() { cat <planes[plane]; for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) { - if(fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w), - outfile)); + (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w), + outfile); buf += img->stride[plane]; } } diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c index cc70b00..e2b65ec 100644 --- a/examples/encoder_tmpl.c +++ b/examples/encoder_tmpl.c @@ -85,7 +85,7 @@ static void write_ivf_file_header(FILE *outfile, mem_put_le32(header+24, frame_cnt); /* length */ mem_put_le32(header+28, 0); /* unused */ - if(fwrite(header, 1, 32, outfile)); + (void) fwrite(header, 1, 32, outfile); } @@ -103,7 +103,7 @@ static void write_ivf_frame_header(FILE *outfile, mem_put_le32(header+4, pts&0xFFFFFFFF); mem_put_le32(header+8, pts >> 32); - if(fwrite(header, 1, 12, outfile)); + (void) fwrite(header, 1, 12, outfile); } int main(int argc, char **argv) { diff --git a/examples/encoder_tmpl.txt b/examples/encoder_tmpl.txt index 0042071..1afbd8b 100644 --- a/examples/encoder_tmpl.txt +++ b/examples/encoder_tmpl.txt @@ -61,13 +61,14 @@ if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME case VPX_CODEC_CX_FRAME_PKT: write_ivf_frame_header(outfile, pkt); - if(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, - outfile)); + (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, + outfile); break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY +vpx_img_free(&raw); if(vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY diff --git a/examples/twopass_encoder.txt b/examples/twopass_encoder.txt index 4683bc7..2f81a90 100644 --- a/examples/twopass_encoder.txt +++ b/examples/twopass_encoder.txt @@ -71,5 +71,17 @@ Pass Progress Reporting It's sometimes helpful to see when each pass completes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END printf("Pass %d complete.\n", pass+1); + if(vpx_codec_destroy(&codec)) + die_codec(&codec, "Failed to destroy codec"); } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END + + +Clean-up +----------------------------- +Destruction of the encoder instance must be done on each pass. The +raw image should be destroyed at the end as usual. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY +vpx_img_free(&raw); +free(stats.buf); +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h index 3418e36..e3ce585 100644 --- a/libmkv/EbmlIDs.h +++ b/libmkv/EbmlIDs.h @@ -1,16 +1,16 @@ -// Copyright (c) 2010 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - - +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ #ifndef MKV_DEFS_HPP #define MKV_DEFS_HPP 1 -//Commenting out values not available in webm, but available in matroska +/* Commenting out values not available in webm, but available in matroska */ enum mkv { @@ -22,7 +22,7 @@ enum mkv DocType = 0x4282, DocTypeVersion = 0x4287, DocTypeReadVersion = 0x4285, -// CRC_32 = 0xBF, +/* CRC_32 = 0xBF, */ Void = 0xEC, SignatureSlot = 0x1B538667, SignatureAlgo = 0x7E8A, @@ -32,61 +32,61 @@ enum mkv SignatureElements = 0x7E5B, SignatureElementList = 0x7E7B, SignedElement = 0x6532, - //segment + /* segment */ Segment = 0x18538067, - //Meta Seek Information + /* Meta Seek Information */ SeekHead = 0x114D9B74, Seek = 0x4DBB, SeekID = 0x53AB, SeekPosition = 0x53AC, - //Segment Information + /* Segment Information */ Info = 0x1549A966, -// SegmentUID = 0x73A4, -// SegmentFilename = 0x7384, -// PrevUID = 0x3CB923, -// PrevFilename = 0x3C83AB, -// NextUID = 0x3EB923, -// NextFilename = 0x3E83BB, -// SegmentFamily = 0x4444, -// ChapterTranslate = 0x6924, -// ChapterTranslateEditionUID = 0x69FC, -// ChapterTranslateCodec = 0x69BF, -// ChapterTranslateID = 0x69A5, +/* SegmentUID = 0x73A4, */ +/* SegmentFilename = 0x7384, */ +/* PrevUID = 0x3CB923, */ +/* PrevFilename = 0x3C83AB, */ +/* NextUID = 0x3EB923, */ +/* NextFilename = 0x3E83BB, */ +/* SegmentFamily = 0x4444, */ +/* ChapterTranslate = 0x6924, */ +/* ChapterTranslateEditionUID = 0x69FC, */ +/* ChapterTranslateCodec = 0x69BF, */ +/* ChapterTranslateID = 0x69A5, */ TimecodeScale = 0x2AD7B1, Segment_Duration = 0x4489, DateUTC = 0x4461, -// Title = 0x7BA9, +/* Title = 0x7BA9, */ MuxingApp = 0x4D80, WritingApp = 0x5741, - //Cluster + /* Cluster */ Cluster = 0x1F43B675, Timecode = 0xE7, -// SilentTracks = 0x5854, -// SilentTrackNumber = 0x58D7, -// Position = 0xA7, +/* SilentTracks = 0x5854, */ +/* SilentTrackNumber = 0x58D7, */ +/* Position = 0xA7, */ PrevSize = 0xAB, BlockGroup = 0xA0, Block = 0xA1, -// BlockVirtual = 0xA2, -// BlockAdditions = 0x75A1, -// BlockMore = 0xA6, -// BlockAddID = 0xEE, -// BlockAdditional = 0xA5, +/* BlockVirtual = 0xA2, */ +/* BlockAdditions = 0x75A1, */ +/* BlockMore = 0xA6, */ +/* BlockAddID = 0xEE, */ +/* BlockAdditional = 0xA5, */ BlockDuration = 0x9B, -// ReferencePriority = 0xFA, +/* ReferencePriority = 0xFA, */ ReferenceBlock = 0xFB, -// ReferenceVirtual = 0xFD, -// CodecState = 0xA4, -// Slices = 0x8E, -// TimeSlice = 0xE8, +/* ReferenceVirtual = 0xFD, */ +/* CodecState = 0xA4, */ +/* Slices = 0x8E, */ +/* TimeSlice = 0xE8, */ LaceNumber = 0xCC, -// FrameNumber = 0xCD, -// BlockAdditionID = 0xCB, -// MkvDelay = 0xCE, -// Cluster_Duration = 0xCF, +/* FrameNumber = 0xCD, */ +/* BlockAdditionID = 0xCB, */ +/* MkvDelay = 0xCE, */ +/* Cluster_Duration = 0xCF, */ SimpleBlock = 0xA3, -// EncryptedBlock = 0xAF, - //Track +/* EncryptedBlock = 0xAF, */ + /* Track */ Tracks = 0x1654AE6B, TrackEntry = 0xAE, TrackNumber = 0xD7, @@ -96,28 +96,28 @@ enum mkv FlagDefault = 0x88, FlagForced = 0x55AA, FlagLacing = 0x9C, -// MinCache = 0x6DE7, -// MaxCache = 0x6DF8, +/* MinCache = 0x6DE7, */ +/* MaxCache = 0x6DF8, */ DefaultDuration = 0x23E383, -// TrackTimecodeScale = 0x23314F, -// TrackOffset = 0x537F, -// MaxBlockAdditionID = 0x55EE, +/* TrackTimecodeScale = 0x23314F, */ +/* TrackOffset = 0x537F, */ +/* MaxBlockAdditionID = 0x55EE, */ Name = 0x536E, Language = 0x22B59C, CodecID = 0x86, CodecPrivate = 0x63A2, CodecName = 0x258688, -// AttachmentLink = 0x7446, -// CodecSettings = 0x3A9697, -// CodecInfoURL = 0x3B4040, -// CodecDownloadURL = 0x26B240, -// CodecDecodeAll = 0xAA, -// TrackOverlay = 0x6FAB, -// TrackTranslate = 0x6624, -// TrackTranslateEditionUID = 0x66FC, -// TrackTranslateCodec = 0x66BF, -// TrackTranslateTrackID = 0x66A5, - //video +/* AttachmentLink = 0x7446, */ +/* CodecSettings = 0x3A9697, */ +/* CodecInfoURL = 0x3B4040, */ +/* CodecDownloadURL = 0x26B240, */ +/* CodecDecodeAll = 0xAA, */ +/* TrackOverlay = 0x6FAB, */ +/* TrackTranslate = 0x6624, */ +/* TrackTranslateEditionUID = 0x66FC, */ +/* TrackTranslateCodec = 0x66BF, */ +/* TrackTranslateTrackID = 0x66A5, */ + /* video */ Video = 0xE0, FlagInterlaced = 0x9A, StereoMode = 0x53B8, @@ -131,101 +131,101 @@ enum mkv DisplayHeight = 0x54BA, DisplayUnit = 0x54B2, AspectRatioType = 0x54B3, -// ColourSpace = 0x2EB524, -// GammaValue = 0x2FB523, +/* ColourSpace = 0x2EB524, */ +/* GammaValue = 0x2FB523, */ FrameRate = 0x2383E3, - //end video - //audio + /* end video */ + /* audio */ Audio = 0xE1, SamplingFrequency = 0xB5, OutputSamplingFrequency = 0x78B5, Channels = 0x9F, -// ChannelPositions = 0x7D7B, +/* ChannelPositions = 0x7D7B, */ BitDepth = 0x6264, - //end audio - //content encoding -// ContentEncodings = 0x6d80, -// ContentEncoding = 0x6240, -// ContentEncodingOrder = 0x5031, -// ContentEncodingScope = 0x5032, -// ContentEncodingType = 0x5033, -// ContentCompression = 0x5034, -// ContentCompAlgo = 0x4254, -// ContentCompSettings = 0x4255, -// ContentEncryption = 0x5035, -// ContentEncAlgo = 0x47e1, -// ContentEncKeyID = 0x47e2, -// ContentSignature = 0x47e3, -// ContentSigKeyID = 0x47e4, -// ContentSigAlgo = 0x47e5, -// ContentSigHashAlgo = 0x47e6, - //end content encoding - //Cueing Data + /* end audio */ + /* content encoding */ +/* ContentEncodings = 0x6d80, */ +/* ContentEncoding = 0x6240, */ +/* ContentEncodingOrder = 0x5031, */ +/* ContentEncodingScope = 0x5032, */ +/* ContentEncodingType = 0x5033, */ +/* ContentCompression = 0x5034, */ +/* ContentCompAlgo = 0x4254, */ +/* ContentCompSettings = 0x4255, */ +/* ContentEncryption = 0x5035, */ +/* ContentEncAlgo = 0x47e1, */ +/* ContentEncKeyID = 0x47e2, */ +/* ContentSignature = 0x47e3, */ +/* ContentSigKeyID = 0x47e4, */ +/* ContentSigAlgo = 0x47e5, */ +/* ContentSigHashAlgo = 0x47e6, */ + /* end content encoding */ + /* Cueing Data */ Cues = 0x1C53BB6B, CuePoint = 0xBB, CueTime = 0xB3, CueTrackPositions = 0xB7, CueTrack = 0xF7, CueClusterPosition = 0xF1, - CueBlockNumber = 0x5378, -// CueCodecState = 0xEA, -// CueReference = 0xDB, -// CueRefTime = 0x96, -// CueRefCluster = 0x97, -// CueRefNumber = 0x535F, -// CueRefCodecState = 0xEB, - //Attachment -// Attachments = 0x1941A469, -// AttachedFile = 0x61A7, -// FileDescription = 0x467E, -// FileName = 0x466E, -// FileMimeType = 0x4660, -// FileData = 0x465C, -// FileUID = 0x46AE, -// FileReferral = 0x4675, - //Chapters -// Chapters = 0x1043A770, -// EditionEntry = 0x45B9, -// EditionUID = 0x45BC, -// EditionFlagHidden = 0x45BD, -// EditionFlagDefault = 0x45DB, -// EditionFlagOrdered = 0x45DD, -// ChapterAtom = 0xB6, -// ChapterUID = 0x73C4, -// ChapterTimeStart = 0x91, -// ChapterTimeEnd = 0x92, -// ChapterFlagHidden = 0x98, -// ChapterFlagEnabled = 0x4598, -// ChapterSegmentUID = 0x6E67, -// ChapterSegmentEditionUID = 0x6EBC, -// ChapterPhysicalEquiv = 0x63C3, -// ChapterTrack = 0x8F, -// ChapterTrackNumber = 0x89, -// ChapterDisplay = 0x80, -// ChapString = 0x85, -// ChapLanguage = 0x437C, -// ChapCountry = 0x437E, -// ChapProcess = 0x6944, -// ChapProcessCodecID = 0x6955, -// ChapProcessPrivate = 0x450D, -// ChapProcessCommand = 0x6911, -// ChapProcessTime = 0x6922, -// ChapProcessData = 0x6933, - //Tagging -// Tags = 0x1254C367, -// Tag = 0x7373, -// Targets = 0x63C0, -// TargetTypeValue = 0x68CA, -// TargetType = 0x63CA, -// Tagging_TrackUID = 0x63C5, -// Tagging_EditionUID = 0x63C9, -// Tagging_ChapterUID = 0x63C4, -// AttachmentUID = 0x63C6, -// SimpleTag = 0x67C8, -// TagName = 0x45A3, -// TagLanguage = 0x447A, -// TagDefault = 0x4484, -// TagString = 0x4487, -// TagBinary = 0x4485, + CueBlockNumber = 0x5378 +/* CueCodecState = 0xEA, */ +/* CueReference = 0xDB, */ +/* CueRefTime = 0x96, */ +/* CueRefCluster = 0x97, */ +/* CueRefNumber = 0x535F, */ +/* CueRefCodecState = 0xEB, */ + /* Attachment */ +/* Attachments = 0x1941A469, */ +/* AttachedFile = 0x61A7, */ +/* FileDescription = 0x467E, */ +/* FileName = 0x466E, */ +/* FileMimeType = 0x4660, */ +/* FileData = 0x465C, */ +/* FileUID = 0x46AE, */ +/* FileReferral = 0x4675, */ + /* Chapters */ +/* Chapters = 0x1043A770, */ +/* EditionEntry = 0x45B9, */ +/* EditionUID = 0x45BC, */ +/* EditionFlagHidden = 0x45BD, */ +/* EditionFlagDefault = 0x45DB, */ +/* EditionFlagOrdered = 0x45DD, */ +/* ChapterAtom = 0xB6, */ +/* ChapterUID = 0x73C4, */ +/* ChapterTimeStart = 0x91, */ +/* ChapterTimeEnd = 0x92, */ +/* ChapterFlagHidden = 0x98, */ +/* ChapterFlagEnabled = 0x4598, */ +/* ChapterSegmentUID = 0x6E67, */ +/* ChapterSegmentEditionUID = 0x6EBC, */ +/* ChapterPhysicalEquiv = 0x63C3, */ +/* ChapterTrack = 0x8F, */ +/* ChapterTrackNumber = 0x89, */ +/* ChapterDisplay = 0x80, */ +/* ChapString = 0x85, */ +/* ChapLanguage = 0x437C, */ +/* ChapCountry = 0x437E, */ +/* ChapProcess = 0x6944, */ +/* ChapProcessCodecID = 0x6955, */ +/* ChapProcessPrivate = 0x450D, */ +/* ChapProcessCommand = 0x6911, */ +/* ChapProcessTime = 0x6922, */ +/* ChapProcessData = 0x6933, */ + /* Tagging */ +/* Tags = 0x1254C367, */ +/* Tag = 0x7373, */ +/* Targets = 0x63C0, */ +/* TargetTypeValue = 0x68CA, */ +/* TargetType = 0x63CA, */ +/* Tagging_TrackUID = 0x63C5, */ +/* Tagging_EditionUID = 0x63C9, */ +/* Tagging_ChapterUID = 0x63C4, */ +/* AttachmentUID = 0x63C6, */ +/* SimpleTag = 0x67C8, */ +/* TagName = 0x45A3, */ +/* TagLanguage = 0x447A, */ +/* TagDefault = 0x4484, */ +/* TagString = 0x4487, */ +/* TagBinary = 0x4485, */ }; #endif diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c index fbf2c66..d70f06e 100644 --- a/libmkv/EbmlWriter.c +++ b/libmkv/EbmlWriter.c @@ -1,12 +1,12 @@ -// Copyright (c) 2010 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - - +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ #include "EbmlWriter.h" #include #include @@ -18,11 +18,13 @@ #define LITERALU64(n) n##LLU #endif -void Ebml_WriteLen(EbmlGlobal *glob, long long val) +void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) { - //TODO check and make sure we are not > than 0x0100000000000000LLU - unsigned char size = 8; //size in bytes to output - unsigned long long minVal = LITERALU64(0x00000000000000ff); //mask to compare for byte size + /* TODO check and make sure we are not > than 0x0100000000000000LLU */ + unsigned char size = 8; /* size in bytes to output */ + + /* mask to compare for byte size */ + int64_t minVal = 0xff; for (size = 1; size < 8; size ++) { @@ -32,7 +34,7 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val) minVal = (minVal << 7); } - val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7)); + val |= (((uint64_t)0x80) << ((size - 1) * 7)); Ebml_Serialize(glob, (void *) &val, sizeof(val), size); } @@ -40,23 +42,25 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val) void Ebml_WriteString(EbmlGlobal *glob, const char *str) { const size_t size_ = strlen(str); - const unsigned long long size = size_; + const uint64_t size = size_; Ebml_WriteLen(glob, size); - //TODO: it's not clear from the spec whether the nul terminator - //should be serialized too. For now we omit the null terminator. - Ebml_Write(glob, str, size); + /* TODO: it's not clear from the spec whether the nul terminator + * should be serialized too. For now we omit the null terminator. + */ + Ebml_Write(glob, str, (unsigned long)size); } void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) { const size_t strlen = wcslen(wstr); - //TODO: it's not clear from the spec whether the nul terminator - //should be serialized too. For now we include it. - const unsigned long long size = strlen; + /* TODO: it's not clear from the spec whether the nul terminator + * should be serialized too. For now we include it. + */ + const uint64_t size = strlen; Ebml_WriteLen(glob, size); - Ebml_Write(glob, wstr, size); + Ebml_Write(glob, wstr, (unsigned long)size); } void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) @@ -85,12 +89,12 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) { - unsigned char size = 8; //size in bytes to output + unsigned char size = 8; /* size in bytes to output */ unsigned char sizeSerialized = 0; unsigned long minVal; Ebml_WriteID(glob, class_id); - minVal = 0x7fLU; //mask to compare for byte size + minVal = 0x7fLU; /* mask to compare for byte size */ for (size = 1; size < 4; size ++) { @@ -106,7 +110,7 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); Ebml_Serialize(glob, &ui, sizeof(ui), size); } -//TODO: perhaps this is a poor name for this id serializer helper function +/* TODO: perhaps this is a poor name for this id serializer helper function */ void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) { int size; @@ -168,4 +172,4 @@ void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) } } -//TODO Serialize Date +/* TODO Serialize Date */ diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h index 324c9bc..b94f757 100644 --- a/libmkv/EbmlWriter.h +++ b/libmkv/EbmlWriter.h @@ -1,26 +1,30 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ #ifndef EBMLWRITER_HPP #define EBMLWRITER_HPP - -// Copyright (c) 2010 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -//note: you must define write and serialize functions as well as your own EBML_GLOBAL -//These functions MUST be implemented #include #include "vpx/vpx_integer.h" +/* note: you must define write and serialize functions as well as your own + * EBML_GLOBAL + * + * These functions MUST be implemented + */ + typedef struct EbmlGlobal EbmlGlobal; void Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long); void Ebml_Write(EbmlGlobal *glob, const void *, unsigned long); -///// +/*****/ -void Ebml_WriteLen(EbmlGlobal *glob, long long val); +void Ebml_WriteLen(EbmlGlobal *glob, int64_t val); void Ebml_WriteString(EbmlGlobal *glob, const char *str); void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr); void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id); @@ -28,11 +32,11 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d); -//TODO make this more generic to signed +/* TODO make this more generic to signed */ void Ebml_WriteSigned16(EbmlGlobal *glob, short val); void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s); void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s); void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length); void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize); -//TODO need date function +/* TODO need date function */ #endif diff --git a/libs.mk b/libs.mk index e2ba737..4115dd8 100644 --- a/libs.mk +++ b/libs.mk @@ -20,8 +20,16 @@ endif CODEC_SRCS-yes += CHANGELOG CODEC_SRCS-yes += libs.mk +# If this is a universal (fat) binary, then all the subarchitectures have +# already been built and our job is to stitch them together. The +# BUILD_LIBVPX variable indicates whether we should be building +# (compiling, linking) the library. The LIPO_LIBVPX variable indicates +# that we're stitching. +$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes) + include $(SRC_PATH_BARE)/vpx/vpx_codec.mk CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS)) +CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS)) include $(SRC_PATH_BARE)/vpx_mem/vpx_mem.mk CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS)) @@ -29,17 +37,17 @@ CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS)) include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS)) +include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk +CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS)) + ifeq ($(CONFIG_VP8_ENCODER),yes) VP8_PREFIX=vp8/ include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS)) CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS)) - CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h - CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8cx_arm.mk INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% - CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h CODEC_DOC_SECTIONS += vp8 vp8_encoder endif @@ -48,10 +56,8 @@ ifeq ($(CONFIG_VP8_DECODER),yes) include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS)) CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS)) - CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% - CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h CODEC_DOC_SECTIONS += vp8 vp8_decoder endif @@ -66,6 +72,7 @@ endif ifeq ($(CONFIG_MSVS),yes) CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd) +GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd) # This variable uses deferred expansion intentionally, since the results of # $(wildcard) may change during the course of the Make. VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d)))) @@ -82,29 +89,10 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Release/%) INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Debug/%) endif -# If this is a universal (fat) binary, then all the subarchitectures have -# already been built and our job is to stitch them together. The -# BUILD_LIBVPX variable indicates whether we should be building -# (compiling, linking) the library. The LIPO_LIBVPX variable indicates -# that we're stitching. -$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes) - CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh -CODEC_SRCS-$(BUILD_LIBVPX) += vpx/vpx_integer.h -CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/asm_offsets.h -CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_timer.h -CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem.h CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c -ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) -CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emms.asm -CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h -CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm -CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c -endif -CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c -CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm.h CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec @@ -146,7 +134,7 @@ ifeq ($(CONFIG_MSVS),yes) obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c @cp $(SRC_PATH_BARE)/build/x86-msvs/obj_int_extract.bat . @echo " [CREATE] $@" - $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ + $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ --exe \ --target=$(TOOLCHAIN) \ --name=obj_int_extract \ @@ -162,14 +150,14 @@ PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat vpx.def: $(call enabled,CODEC_EXPORTS) @echo " [CREATE] $@" - $(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\ + $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\ --name=vpx\ --out=$@ $^ CLEAN-OBJS += vpx.def vpx.vcproj: $(CODEC_SRCS) vpx.def @echo " [CREATE] $@" - $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ + $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ --lib \ --target=$(TOOLCHAIN) \ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ @@ -242,6 +230,7 @@ vpx.pc: config.mk libs.mk $(qexec)echo 'Requires:' >> $@ $(qexec)echo 'Conflicts:' >> $@ $(qexec)echo 'Libs: -L$${libdir} -lvpx' >> $@ + $(qexec)echo 'Libs.private: -lm -lpthread' >> $@ $(qexec)echo 'Cflags: -I$${includedir}' >> $@ INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc @@ -284,38 +273,44 @@ OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU' ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC)) $(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S - LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ + @echo " [CREATE] $@" + $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S: $(VP8_PREFIX)common/asm_com_offsets.c CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S $(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S - LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ + @echo " [CREATE] $@" + $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: $(VP8_PREFIX)encoder/asm_enc_offsets.c CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S $(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S - LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ + @echo " [CREATE] $@" + $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: $(VP8_PREFIX)decoder/asm_dec_offsets.c CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S else ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC)) asm_com_offsets.asm: obj_int_extract asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o - ./obj_int_extract rvds $< $(ADS2GAS) > $@ + @echo " [CREATE] $@" + $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o CLEAN-OBJS += asm_com_offsets.asm $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm asm_enc_offsets.asm: obj_int_extract asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o - ./obj_int_extract rvds $< $(ADS2GAS) > $@ + @echo " [CREATE] $@" + $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o CLEAN-OBJS += asm_enc_offsets.asm $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm asm_dec_offsets.asm: obj_int_extract asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o - ./obj_int_extract rvds $< $(ADS2GAS) > $@ + @echo " [CREATE] $@" + $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o CLEAN-OBJS += asm_dec_offsets.asm $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm @@ -328,7 +323,6 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_version.h # # Rule to generate runtime cpu detection files # -$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS))) @echo " [CREATE] $@" $(qexec)$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) \ @@ -337,25 +331,43 @@ $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_S $(RTCD_OPTIONS) $^ > $@ CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h -CODEC_DOC_SRCS += vpx/vpx_codec.h \ - vpx/vpx_decoder.h \ - vpx/vpx_encoder.h \ - vpx/vpx_image.h - ## ## libvpx test directives ## - ifeq ($(CONFIG_UNIT_TESTS),yes) +LIBVPX_TEST_DATA_PATH ?= . + +include $(SRC_PATH_BARE)/test/test.mk +LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS)) +LIBVPX_TEST_BINS=./test_libvpx +LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\ + $(call enabled,LIBVPX_TEST_DATA)) +libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1) + +$(LIBVPX_TEST_DATA): + @echo " [DOWNLOAD] $@" + $(qexec)trap 'rm -f $@' INT TERM &&\ + curl -L -o $@ $(call libvpx_test_data_url,$(@F)) + +testdata:: $(LIBVPX_TEST_DATA) + $(qexec)if [ -x "$$(which sha1sum)" ]; then\ + echo "Checking test data:";\ + if [ -n "$(LIBVPX_TEST_DATA)" ]; then\ + for f in $(call enabled,LIBVPX_TEST_DATA); do\ + grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\ + (cd $(LIBVPX_TEST_DATA_PATH); sha1sum -c);\ + done; \ + fi; \ + else\ + echo "Skipping test data integrity check, sha1sum not found.";\ + fi + ifeq ($(CONFIG_EXTERNAL_BUILD),yes) ifeq ($(CONFIG_MSVS),yes) -LIBVPX_TEST_SRCS=$(filter %_test.cc,$(call enabled,CODEC_SRCS)) -LIBVPX_TEST_BINS=$(sort $(LIBVPX_TEST_SRCS:.cc.o=)) - gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc @echo " [CREATE] $@" - $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ + $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ --lib \ --target=$(TOOLCHAIN) \ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ @@ -368,27 +380,22 @@ gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc PROJECTS-$(CONFIG_MSVS) += gtest.vcproj -define unit_test_vcproj_template -$(notdir $(1:.cc=.vcproj)): $(SRC_PATH_BARE)/$(1) - @echo " [vcproj] $$@" - $$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\ - --exe\ - --target=$$(TOOLCHAIN)\ - --name=$(notdir $(1:.cc=))\ - --ver=$$(CONFIG_VS_VERSION)\ - $$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \ - --out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \ +test_libvpx.vcproj: $(LIBVPX_TEST_SRCS) + @echo " [CREATE] $@" + $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ + --exe \ + --target=$(TOOLCHAIN) \ + --name=test_libvpx \ + --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \ + --ver=$(CONFIG_VS_VERSION) \ + $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ + --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \ -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \ - -L. -lvpxmt -lwinmm -lgtestmt $$^ -endef + -L. -l$(CODEC_LIB) -lwinmm -l$(GTEST_LIB) $^ -$(foreach proj,$(LIBVPX_TEST_BINS),\ - $(eval $(call unit_test_vcproj_template,$(proj)))) +PROJECTS-$(CONFIG_MSVS) += test_libvpx.vcproj -PROJECTS-$(CONFIG_MSVS) += $(foreach proj,$(LIBVPX_TEST_BINS),\ - $(notdir $(proj:.cc=.vcproj))) - -test:: +test:: testdata @set -e; for t in $(addprefix Win32/Release/,$(notdir $(LIBVPX_TEST_BINS:.cc=.exe))); do $$t; done endif else @@ -396,28 +403,35 @@ else include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS)) GTEST_OBJS=$(call objs,$(GTEST_SRCS)) -$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src -$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include +$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src +$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS) LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a $(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS) -LIBVPX_TEST_SRCS=$(filter %_test.cc,$(call enabled,CODEC_SRCS)) -LIBVPX_TEST_OBJS=$(call objs,$(LIBVPX_TEST_SRCS)) -$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src -$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include -LIBVPX_TEST_BINS=$(sort $(LIBVPX_TEST_OBJS:.cc.o=)) +LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS))) +$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src +$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS) +BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS) + +# Install test sources only if codec source is included +INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\ + $(shell find $(SRC_PATH_BARE)/third_party/googletest -type f)) +INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS) +CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx) +CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a) $(foreach bin,$(LIBVPX_TEST_BINS),\ - $(if $(BUILD_LIBVPX),$(eval $(bin): libvpx.a libgtest.a ))\ + $(if $(BUILD_LIBVPX),$(eval $(bin): \ + lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\ $(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\ - $(bin).cc.o \ + $(LIBVPX_TEST_OBJS) \ -L. -lvpx -lgtest -lpthread -lm)\ )))\ $(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\ -test:: $(LIBVPX_TEST_BINS) +test:: $(LIBVPX_TEST_BINS) testdata @set -e; for t in $(LIBVPX_TEST_BINS); do $$t; done endif @@ -435,3 +449,6 @@ libs.doxy: $(CODEC_DOC_SRCS) @echo "PREDEFINED = VPX_CODEC_DISABLE_COMPAT" >> $@ @echo "INCLUDE_PATH += ." >> $@; @echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@ + +## Generate vpx_rtcd.h for all objects +$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c index 63a0e83..cc87788 100644 --- a/nestegg/src/nestegg.c +++ b/nestegg/src/nestegg.c @@ -1272,7 +1272,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac if (total > block_size) return -1; - entry = ne_find_track_entry(ctx, track - 1); + entry = ne_find_track_entry(ctx, (unsigned int)(track - 1)); if (!entry) return -1; @@ -1291,7 +1291,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac pkt = ne_alloc(sizeof(*pkt)); pkt->track = track - 1; - pkt->timecode = abs_timecode * tc_scale * track_scale; + pkt->timecode = (uint64_t)(abs_timecode * tc_scale * track_scale); ctx->log(ctx, NESTEGG_LOG_DEBUG, "%sblock t %lld pts %f f %llx frames: %llu", block_id == ID_BLOCK ? "" : "simple", pkt->track, pkt->timecode / 1e9, flags, frames); @@ -1774,35 +1774,35 @@ nestegg_track_video_params(nestegg * ctx, unsigned int track, if (ne_get_uint(entry->video.pixel_width, &value) != 0) return -1; - params->width = value; + params->width = (unsigned int)value; if (ne_get_uint(entry->video.pixel_height, &value) != 0) return -1; - params->height = value; + params->height = (unsigned int)value; value = 0; ne_get_uint(entry->video.pixel_crop_bottom, &value); - params->crop_bottom = value; + params->crop_bottom = (unsigned int)value; value = 0; ne_get_uint(entry->video.pixel_crop_top, &value); - params->crop_top = value; + params->crop_top = (unsigned int)value; value = 0; ne_get_uint(entry->video.pixel_crop_left, &value); - params->crop_left = value; + params->crop_left = (unsigned int)value; value = 0; ne_get_uint(entry->video.pixel_crop_right, &value); - params->crop_right = value; + params->crop_right = (unsigned int)value; value = params->width; ne_get_uint(entry->video.display_width, &value); - params->display_width = value; + params->display_width = (unsigned int)value; value = params->height; ne_get_uint(entry->video.display_height, &value); - params->display_height = value; + params->display_height = (unsigned int)value; return 0; } @@ -1828,11 +1828,11 @@ nestegg_track_audio_params(nestegg * ctx, unsigned int track, value = 1; ne_get_uint(entry->audio.channels, &value); - params->channels = value; + params->channels = (unsigned int)value; value = 16; ne_get_uint(entry->audio.bit_depth, &value); - params->depth = value; + params->depth = (unsigned int)value; return 0; } @@ -1888,7 +1888,7 @@ nestegg_free_packet(nestegg_packet * pkt) int nestegg_packet_track(nestegg_packet * pkt, unsigned int * track) { - *track = pkt->track; + *track = (unsigned int)pkt->track; return 0; } diff --git a/solution.mk b/solution.mk index 2de1d8d..948305f 100644 --- a/solution.mk +++ b/solution.mk @@ -8,18 +8,19 @@ ## be found in the AUTHORS file in the root of the source tree. ## +# libvpx reverse dependencies (targets that depend on libvpx) +VPX_NONDEPS=$(addsuffix .vcproj,vpx gtest obj_int_extract) +VPX_RDEPS=$(foreach vcp,\ + $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.vcproj=):vpx) vpx.sln: $(wildcard *.vcproj) @echo " [CREATE] $@" $(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \ - $(if $(filter %vpx.vcproj,$^),\ - $(foreach vcp,$(filter-out %vpx.vcproj %gtest.vcproj %obj_int_extract.vcproj,$^),\ - --dep=$(vcp:.vcproj=):vpx) \ - $(foreach vcp,$(filter %_test.vcproj,$^),\ - --dep=$(vcp:.vcproj=):gtest)) \ - --dep=vpx:obj_int_extract \ - --ver=$(CONFIG_VS_VERSION)\ - --out=$@ $^ + $(if $(filter vpx.vcproj,$^),$(VPX_RDEPS)) \ + --dep=vpx:obj_int_extract \ + --dep=test_libvpx:gtest \ + --ver=$(CONFIG_VS_VERSION)\ + --out=$@ $^ vpx.sln.mk: vpx.sln @true diff --git a/test/acm_random.h b/test/acm_random.h new file mode 100644 index 0000000..514894e --- /dev/null +++ b/test/acm_random.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef LIBVPX_TEST_ACM_RANDOM_H_ +#define LIBVPX_TEST_ACM_RANDOM_H_ + +#include + +#include "vpx/vpx_integer.h" + +namespace libvpx_test { + +class ACMRandom { + public: + ACMRandom() { + Reset(DeterministicSeed()); + } + + explicit ACMRandom(int seed) { + Reset(seed); + } + + void Reset(int seed) { + srand(seed); + } + + uint8_t Rand8(void) { + return (rand() >> 8) & 0xff; + } + + int PseudoUniform(int range) { + return (rand() >> 8) % range; + } + + int operator()(int n) { + return PseudoUniform(n); + } + + static int DeterministicSeed(void) { + return 0xbaba; + } +}; + +} // namespace libvpx_test + +#endif // LIBVPX_TEST_ACM_RANDOM_H_ diff --git a/test/altref_test.cc b/test/altref_test.cc new file mode 100644 index 0000000..ca05577 --- /dev/null +++ b/test/altref_test.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" + +namespace { + +// lookahead range: [kLookAheadMin, kLookAheadMax). +const int kLookAheadMin = 5; +const int kLookAheadMax = 26; + +class AltRefTest : public libvpx_test::EncoderTest, + public ::testing::TestWithParam { + protected: + AltRefTest() : altref_count_(0) {} + virtual ~AltRefTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(libvpx_test::kTwoPassGood); + } + + virtual void BeginPassHook(unsigned int pass) { + altref_count_ = 0; + } + + virtual bool Continue() const { + return !HasFatalFailure() && !abort_; + } + + virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, + libvpx_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); + encoder->Control(VP8E_SET_CPUUSED, 3); + } + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE) ++altref_count_; + } + + int altref_count() const { return altref_count_; } + + private: + int altref_count_; +}; + +TEST_P(AltRefTest, MonotonicTimestamps) { + const vpx_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 1000; + cfg_.g_lag_in_frames = GetParam(); + + libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + EXPECT_GE(altref_count(), 1); +} + +INSTANTIATE_TEST_CASE_P(NonZeroLag, AltRefTest, + ::testing::Range(kLookAheadMin, kLookAheadMax)); +} // namespace diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc new file mode 100644 index 0000000..4e21be8 --- /dev/null +++ b/test/boolcoder_test.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +extern "C" { +#include "vp8/encoder/boolhuff.h" +#include "vp8/decoder/dboolhuff.h" +} + +#include +#include +#include +#include +#include +#include + +#include "test/acm_random.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "vpx/vpx_integer.h" + +namespace { +const int num_tests = 10; +} // namespace + +using libvpx_test::ACMRandom; + +TEST(VP8, TestBitIO) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int n = 0; n < num_tests; ++n) { + for (int method = 0; method <= 7; ++method) { // we generate various proba + const int bits_to_test = 1000; + uint8_t probas[bits_to_test]; + + for (int i = 0; i < bits_to_test; ++i) { + const int parity = i & 1; + probas[i] = + (method == 0) ? 0 : (method == 1) ? 255 : + (method == 2) ? 128 : + (method == 3) ? rnd.Rand8() : + (method == 4) ? (parity ? 0 : 255) : + // alternate between low and high proba: + (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) : + (method == 6) ? + (parity ? rnd(64) : 255 - rnd(64)) : + (parity ? rnd(32) : 255 - rnd(32)); + } + for (int bit_method = 0; bit_method <= 3; ++bit_method) { + const int random_seed = 6432; + const int buffer_size = 10000; + ACMRandom bit_rnd(random_seed); + BOOL_CODER bw; + uint8_t bw_buffer[buffer_size]; + vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size); + + int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0; + for (int i = 0; i < bits_to_test; ++i) { + if (bit_method == 2) { + bit = (i & 1); + } else if (bit_method == 3) { + bit = bit_rnd(2); + } + vp8_encode_bool(&bw, bit, static_cast(probas[i])); + } + + vp8_stop_encode(&bw); + + BOOL_DECODER br; + vp8dx_start_decode(&br, bw_buffer, buffer_size); + bit_rnd.Reset(random_seed); + for (int i = 0; i < bits_to_test; ++i) { + if (bit_method == 2) { + bit = (i & 1); + } else if (bit_method == 3) { + bit = bit_rnd(2); + } + GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit) + << "pos: "<< i << " / " << bits_to_test + << " bit_method: " << bit_method + << " method: " << method; + } + } + } + } +} diff --git a/test/config_test.cc b/test/config_test.cc new file mode 100644 index 0000000..c4da46e --- /dev/null +++ b/test/config_test.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/encode_test_driver.h" +#include "test/video_source.h" + +namespace { + +class ConfigTest : public ::libvpx_test::EncoderTest, + public ::testing::TestWithParam { + public: + ConfigTest() : frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(GetParam()); + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + frame_count_in_ = 0; + frame_count_out_ = 0; + } + + virtual void PreEncodeFrameHook(libvpx_test::VideoSource* /*video*/) { + ++frame_count_in_; + abort_ |= (frame_count_in_ >= frame_count_max_); + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t* /*pkt*/) { + ++frame_count_out_; + } + + virtual bool Continue() const { + return !HasFatalFailure() && !abort_; + } + + unsigned int frame_count_in_; + unsigned int frame_count_out_; + unsigned int frame_count_max_; +}; + +TEST_P(ConfigTest, LagIsDisabled) { + frame_count_max_ = 2; + cfg_.g_lag_in_frames = 15; + + libvpx_test::DummyVideoSource video; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + EXPECT_EQ(frame_count_in_, frame_count_out_); +} + +INSTANTIATE_TEST_CASE_P(OnePassModes, ConfigTest, ONE_PASS_TEST_MODES); +} // namespace diff --git a/test/cq_test.cc b/test/cq_test.cc new file mode 100644 index 0000000..42ee2a2 --- /dev/null +++ b/test/cq_test.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" + +// CQ level range: [kCQLevelMin, kCQLevelMax). +const int kCQLevelMin = 4; +const int kCQLevelMax = 63; +const int kCQLevelStep = 8; +const int kCQTargetBitrate = 2000; + +namespace { + +class CQTest : public libvpx_test::EncoderTest, + public ::testing::TestWithParam { + protected: + CQTest() : cq_level_(GetParam()) { init_flags_ = VPX_CODEC_USE_PSNR; } + virtual ~CQTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(libvpx_test::kTwoPassGood); + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + file_size_ = 0; + psnr_ = 0.0; + n_frames_ = 0; + } + + virtual bool Continue() const { + return !HasFatalFailure() && !abort_; + } + + virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, + libvpx_test::Encoder *encoder) { + if (video->frame() == 1) { + if (cfg_.rc_end_usage == VPX_CQ) { + encoder->Control(VP8E_SET_CQ_LEVEL, cq_level_); + } + encoder->Control(VP8E_SET_CPUUSED, 3); + } + } + + virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { + psnr_ += pow(10.0, pkt->data.psnr.psnr[0] / 10.0); + n_frames_++; + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + file_size_ += pkt->data.frame.sz; + } + + double GetLinearPSNROverBitrate() const { + double avg_psnr = log10(psnr_ / n_frames_) * 10.0; + return pow(10.0, avg_psnr / 10.0) / file_size_; + } + + int file_size() const { return file_size_; } + int n_frames() const { return n_frames_; } + + private: + int cq_level_; + int file_size_; + double psnr_; + int n_frames_; +}; + +int prev_actual_bitrate = kCQTargetBitrate; +TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) { + const vpx_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = kCQTargetBitrate; + cfg_.g_lag_in_frames = 25; + + cfg_.rc_end_usage = VPX_CQ; + libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double cq_psnr_lin = GetLinearPSNROverBitrate(); + const int cq_actual_bitrate = file_size() * 8 * 30 / (n_frames() * 1000); + EXPECT_LE(cq_actual_bitrate, kCQTargetBitrate); + EXPECT_LE(cq_actual_bitrate, prev_actual_bitrate); + prev_actual_bitrate = cq_actual_bitrate; + + // try targeting the approximate same bitrate with VBR mode + cfg_.rc_end_usage = VPX_VBR; + cfg_.rc_target_bitrate = cq_actual_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double vbr_psnr_lin = GetLinearPSNROverBitrate(); + EXPECT_GE(cq_psnr_lin, vbr_psnr_lin); +} + +INSTANTIATE_TEST_CASE_P(CQLevelRange, CQTest, + ::testing::Range(kCQLevelMin, kCQLevelMax, + kCQLevelStep)); +} // namespace diff --git a/test/datarate_test.cc b/test/datarate_test.cc new file mode 100644 index 0000000..6fbcb64 --- /dev/null +++ b/test/datarate_test.cc @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +namespace { + +class DatarateTest : public ::libvpx_test::EncoderTest, + public ::testing::TestWithParam { + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(GetParam()); + ResetModel(); + } + + virtual void ResetModel() { + last_pts_ = 0; + bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; + frame_number_ = 0; + first_drop_ = 0; + bits_total_ = 0; + duration_ = 0.0; + } + + virtual bool Continue() const { + return !HasFatalFailure() && !abort_; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + const vpx_rational_t tb = video->timebase(); + timebase_ = static_cast(tb.num) / tb.den; + duration_ = 0; + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + // Time since last timestamp = duration. + vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; + + // TODO(jimbankoski): Remove these lines when the issue: + // http://code.google.com/p/webm/issues/detail?id=496 is fixed. + // For now the codec assumes buffer starts at starting buffer rate + // plus one frame's time. + if (last_pts_ == 0) + duration = 1; + + // Add to the buffer the bits we'd expect from a constant bitrate server. + bits_in_buffer_model_ += duration * timebase_ * cfg_.rc_target_bitrate + * 1000; + + /* Test the buffer model here before subtracting the frame. Do so because + * the way the leaky bucket model works in libvpx is to allow the buffer to + * empty - and then stop showing frames until we've got enough bits to + * show one. As noted in comment below (issue 495), this does not currently + * apply to key frames. For now exclude key frames in condition below. */ + bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true: false; + if (!key_frame) { + ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame " + << pkt->data.frame.pts; + } + + const int frame_size_in_bits = pkt->data.frame.sz * 8; + + // Subtract from the buffer the bits associated with a played back frame. + bits_in_buffer_model_ -= frame_size_in_bits; + + // Update the running total of bits for end of test datarate checks. + bits_total_ += frame_size_in_bits ; + + // If first drop not set and we have a drop set it to this time. + if (!first_drop_ && duration > 1) + first_drop_ = last_pts_ + 1; + + // Update the most recent pts. + last_pts_ = pkt->data.frame.pts; + + // We update this so that we can calculate the datarate minus the last + // frame encoded in the file. + bits_in_last_frame_ = frame_size_in_bits; + + ++frame_number_; + } + + virtual void EndPassHook(void) { + if (bits_total_) { + const double file_size_in_kb = bits_total_ / 1000; /* bits per kilobit */ + + duration_ = (last_pts_ + 1) * timebase_; + + // Effective file datarate includes the time spent prebuffering. + effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0 + / (cfg_.rc_buf_initial_sz / 1000.0 + duration_); + + file_datarate_ = file_size_in_kb / duration_; + } + } + + vpx_codec_pts_t last_pts_; + int bits_in_buffer_model_; + double timebase_; + int frame_number_; + vpx_codec_pts_t first_drop_; + int64_t bits_total_; + double duration_; + double file_datarate_; + double effective_datarate_; + int bits_in_last_frame_; +}; + +TEST_P(DatarateTest, BasicBufferModel) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + // 2 pass cbr datarate control has a bug hidden by the small # of + // frames selected in this encode. The problem is that even if the buffer is + // negative we produce a keyframe on a cutscene. Ignoring datarate + // constraints + // TODO(jimbankoski): ( Fix when issue + // http://code.google.com/p/webm/issues/detail?id=495 is addressed. ) + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 140); + + // There is an issue for low bitrates in real-time mode, where the + // effective_datarate slightly overshoots the target bitrate. + // This is same the issue as noted about (#495). + // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100), + // when the issue is resolved. + for (int i = 100; i < 800; i += 200) { + cfg_.rc_target_bitrate = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_) + << " The datarate for the file exceeds the target!"; + + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3) + << " The datarate for the file missed the target!"; + } +} + +TEST_P(DatarateTest, ChangingDropFrameThresh) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_max_quantizer = 36; + cfg_.rc_end_usage = VPX_CBR; + cfg_.rc_target_bitrate = 200; + cfg_.kf_mode = VPX_KF_DISABLED; + + const int frame_count = 40; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, frame_count); + + // Here we check that the first dropped frame gets earlier and earlier + // as the drop frame threshold is increased. + + const int kDropFrameThreshTestStep = 30; + vpx_codec_pts_t last_drop = frame_count; + for (int i = 1; i < 91; i += kDropFrameThreshTestStep) { + cfg_.rc_dropframe_thresh = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_LE(first_drop_, last_drop) + << " The first dropped frame for drop_thresh " << i + << " > first dropped frame for drop_thresh " + << i - kDropFrameThreshTestStep; + last_drop = first_drop_; + } +} + +INSTANTIATE_TEST_CASE_P(AllModes, DatarateTest, ALL_TEST_MODES); +} // namespace diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc new file mode 100644 index 0000000..84afe7f --- /dev/null +++ b/test/decode_test_driver.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "test/decode_test_driver.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/register_state_check.h" +#include "test/video_source.h" + +namespace libvpx_test { +#if CONFIG_VP8_DECODER +void Decoder::DecodeFrame(const uint8_t *cxdata, int size) { + if (!decoder_.priv) { + const vpx_codec_err_t res_init = vpx_codec_dec_init(&decoder_, + &vpx_codec_vp8_dx_algo, + &cfg_, 0); + ASSERT_EQ(VPX_CODEC_OK, res_init) << DecodeError(); + } + + vpx_codec_err_t res_dec; + REGISTER_STATE_CHECK(res_dec = vpx_codec_decode(&decoder_, + cxdata, size, NULL, 0)); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << DecodeError(); +} + +void DecoderTest::RunLoop(CompressedVideoSource *video) { + vpx_codec_dec_cfg_t dec_cfg = {0}; + Decoder decoder(dec_cfg, 0); + + // Decode frames. + for (video->Begin(); video->cxdata(); video->Next()) { + decoder.DecodeFrame(video->cxdata(), video->frame_size()); + + DxDataIterator dec_iter = decoder.GetDxData(); + const vpx_image_t *img = NULL; + + // Get decompressed data + while ((img = dec_iter.Next())) + DecompressedFrameHook(*img, video->frame_number()); + } +} +#endif +} // namespace libvpx_test diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h new file mode 100644 index 0000000..6408bee --- /dev/null +++ b/test/decode_test_driver.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef TEST_DECODE_TEST_DRIVER_H_ +#define TEST_DECODE_TEST_DRIVER_H_ +#include +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "vpx_config.h" +#include "vpx/vpx_decoder.h" +#include "vpx/vp8dx.h" + +namespace libvpx_test { + +class CompressedVideoSource; + +// Provides an object to handle decoding output +class DxDataIterator { + public: + explicit DxDataIterator(vpx_codec_ctx_t *decoder) + : decoder_(decoder), iter_(NULL) {} + + const vpx_image_t *Next() { + return vpx_codec_get_frame(decoder_, &iter_); + } + + private: + vpx_codec_ctx_t *decoder_; + vpx_codec_iter_t iter_; +}; + +// Provides a simplified interface to manage one video decoding. +// +// TODO: similar to Encoder class, the exact services should be +// added as more tests are added. +class Decoder { + public: + Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline) + : cfg_(cfg), deadline_(deadline) { + memset(&decoder_, 0, sizeof(decoder_)); + } + + ~Decoder() { + vpx_codec_destroy(&decoder_); + } + + void DecodeFrame(const uint8_t *cxdata, int size); + + DxDataIterator GetDxData() { + return DxDataIterator(&decoder_); + } + + void set_deadline(unsigned long deadline) { + deadline_ = deadline; + } + + void Control(int ctrl_id, int arg) { + const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg); + ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError(); + } + + protected: + const char *DecodeError() { + const char *detail = vpx_codec_error_detail(&decoder_); + return detail ? detail : vpx_codec_error(&decoder_); + } + + vpx_codec_ctx_t decoder_; + vpx_codec_dec_cfg_t cfg_; + unsigned int deadline_; +}; + +// Common test functionality for all Decoder tests. +class DecoderTest { + public: + // Main loop. + virtual void RunLoop(CompressedVideoSource *video); + + // Hook to be called on every decompressed frame. + virtual void DecompressedFrameHook(const vpx_image_t& img, + const unsigned int frame_number) {} + + protected: + DecoderTest() {} + + virtual ~DecoderTest() {} +}; + +} // namespace libvpx_test + +#endif // TEST_DECODE_TEST_DRIVER_H_ diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc new file mode 100644 index 0000000..56339ca --- /dev/null +++ b/test/encode_test_driver.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "vpx_config.h" +#include "test/encode_test_driver.h" +#if CONFIG_VP8_DECODER +#include "test/decode_test_driver.h" +#endif +#include "test/register_state_check.h" +#include "test/video_source.h" +#include "third_party/googletest/src/include/gtest/gtest.h" + +namespace libvpx_test { +void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) { + if (video->img()) + EncodeFrameInternal(*video, frame_flags); + else + Flush(); + + // Handle twopass stats + CxDataIterator iter = GetCxData(); + + while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) { + if (pkt->kind != VPX_CODEC_STATS_PKT) + continue; + + stats_->Append(*pkt); + } +} + +void Encoder::EncodeFrameInternal(const VideoSource &video, + const unsigned long frame_flags) { + vpx_codec_err_t res; + const vpx_image_t *img = video.img(); + + // Handle first frame initialization + if (!encoder_.priv) { + cfg_.g_w = img->d_w; + cfg_.g_h = img->d_h; + cfg_.g_timebase = video.timebase(); + cfg_.rc_twopass_stats_in = stats_->buf(); + res = vpx_codec_enc_init(&encoder_, &vpx_codec_vp8_cx_algo, &cfg_, + init_flags_); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } + + // Handle frame resizing + if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) { + cfg_.g_w = img->d_w; + cfg_.g_h = img->d_h; + res = vpx_codec_enc_config_set(&encoder_, &cfg_); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } + + // Encode the frame + REGISTER_STATE_CHECK( + res = vpx_codec_encode(&encoder_, + video.img(), video.pts(), video.duration(), + frame_flags, deadline_)); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); +} + +void Encoder::Flush() { + const vpx_codec_err_t res = vpx_codec_encode(&encoder_, NULL, 0, 0, 0, + deadline_); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); +} + +void EncoderTest::SetMode(TestMode mode) { + switch (mode) { + case kRealTime: + deadline_ = VPX_DL_REALTIME; + break; + + case kOnePassGood: + case kTwoPassGood: + deadline_ = VPX_DL_GOOD_QUALITY; + break; + + case kOnePassBest: + case kTwoPassBest: + deadline_ = VPX_DL_BEST_QUALITY; + break; + + default: + ASSERT_TRUE(false) << "Unexpected mode " << mode; + } + + if (mode == kTwoPassGood || mode == kTwoPassBest) + passes_ = 2; + else + passes_ = 1; +} +// The function should return "true" most of the time, therefore no early +// break-out is implemented within the match checking process. +static bool compare_img(const vpx_image_t *img1, + const vpx_image_t *img2) { + bool match = (img1->fmt == img2->fmt) && + (img1->d_w == img2->d_w) && + (img1->d_h == img2->d_h); + + const unsigned int width_y = img1->d_w; + const unsigned int height_y = img1->d_h; + unsigned int i; + for (i = 0; i < height_y; ++i) + match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], + img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], + width_y) == 0) && match; + const unsigned int width_uv = (img1->d_w + 1) >> 1; + const unsigned int height_uv = (img1->d_h + 1) >> 1; + for (i = 0; i < height_uv; ++i) + match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], + img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], + width_uv) == 0) && match; + for (i = 0; i < height_uv; ++i) + match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], + img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], + width_uv) == 0) && match; + return match; +} + +void EncoderTest::RunLoop(VideoSource *video) { +#if CONFIG_VP8_DECODER + vpx_codec_dec_cfg_t dec_cfg = {0}; +#endif + + stats_.Reset(); + + for (unsigned int pass = 0; pass < passes_; pass++) { + last_pts_ = 0; + + if (passes_ == 1) + cfg_.g_pass = VPX_RC_ONE_PASS; + else if (pass == 0) + cfg_.g_pass = VPX_RC_FIRST_PASS; + else + cfg_.g_pass = VPX_RC_LAST_PASS; + + BeginPassHook(pass); + Encoder encoder(cfg_, deadline_, init_flags_, &stats_); +#if CONFIG_VP8_DECODER + Decoder decoder(dec_cfg, 0); + bool has_cxdata = false; +#endif + bool again; + for (again = true, video->Begin(); again; video->Next()) { + again = video->img() != NULL; + + PreEncodeFrameHook(video); + PreEncodeFrameHook(video, &encoder); + encoder.EncodeFrame(video, frame_flags_); + + CxDataIterator iter = encoder.GetCxData(); + + while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) { + again = true; + + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: +#if CONFIG_VP8_DECODER + has_cxdata = true; + decoder.DecodeFrame((const uint8_t*)pkt->data.frame.buf, + pkt->data.frame.sz); +#endif + ASSERT_GE(pkt->data.frame.pts, last_pts_); + last_pts_ = pkt->data.frame.pts; + FramePktHook(pkt); + break; + + case VPX_CODEC_PSNR_PKT: + PSNRPktHook(pkt); + break; + + default: + break; + } + } + +#if CONFIG_VP8_DECODER + if (has_cxdata) { + const vpx_image_t *img_enc = encoder.GetPreviewFrame(); + DxDataIterator dec_iter = decoder.GetDxData(); + const vpx_image_t *img_dec = dec_iter.Next(); + if(img_enc && img_dec) { + const bool res = compare_img(img_enc, img_dec); + ASSERT_TRUE(res)<< "Encoder/Decoder mismatch found."; + } + } +#endif + if (!Continue()) + break; + } + + EndPassHook(); + + if (!Continue()) + break; + } +} +} // namespace libvpx_test diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h new file mode 100644 index 0000000..0141fa9 --- /dev/null +++ b/test/encode_test_driver.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef TEST_ENCODE_TEST_DRIVER_H_ +#define TEST_ENCODE_TEST_DRIVER_H_ +#include +#include +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "vpx/vpx_encoder.h" +#include "vpx/vp8cx.h" + +namespace libvpx_test { + +class VideoSource; + +enum TestMode { + kRealTime, + kOnePassGood, + kOnePassBest, + kTwoPassGood, + kTwoPassBest +}; +#define ALL_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \ + ::libvpx_test::kOnePassGood, \ + ::libvpx_test::kOnePassBest, \ + ::libvpx_test::kTwoPassGood, \ + ::libvpx_test::kTwoPassBest) + +#define ONE_PASS_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \ + ::libvpx_test::kOnePassGood, \ + ::libvpx_test::kOnePassBest) + + +// Provides an object to handle the libvpx get_cx_data() iteration pattern +class CxDataIterator { + public: + explicit CxDataIterator(vpx_codec_ctx_t *encoder) + : encoder_(encoder), iter_(NULL) {} + + const vpx_codec_cx_pkt_t *Next() { + return vpx_codec_get_cx_data(encoder_, &iter_); + } + + private: + vpx_codec_ctx_t *encoder_; + vpx_codec_iter_t iter_; +}; + +// Implements an in-memory store for libvpx twopass statistics +class TwopassStatsStore { + public: + void Append(const vpx_codec_cx_pkt_t &pkt) { + buffer_.append(reinterpret_cast(pkt.data.twopass_stats.buf), + pkt.data.twopass_stats.sz); + } + + vpx_fixed_buf_t buf() { + const vpx_fixed_buf_t buf = { &buffer_[0], buffer_.size() }; + return buf; + } + + void Reset() { + buffer_.clear(); + } + + protected: + std::string buffer_; +}; + + +// Provides a simplified interface to manage one video encoding pass, given +// a configuration and video source. +// +// TODO(jkoleszar): The exact services it provides and the appropriate +// level of abstraction will be fleshed out as more tests are written. +class Encoder { + public: + Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, + const unsigned long init_flags, TwopassStatsStore *stats) + : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) { + memset(&encoder_, 0, sizeof(encoder_)); + } + + ~Encoder() { + vpx_codec_destroy(&encoder_); + } + + CxDataIterator GetCxData() { + return CxDataIterator(&encoder_); + } + + const vpx_image_t *GetPreviewFrame() { + return vpx_codec_get_preview_frame(&encoder_); + } + // This is a thin wrapper around vpx_codec_encode(), so refer to + // vpx_encoder.h for its semantics. + void EncodeFrame(VideoSource *video, const unsigned long frame_flags); + + // Convenience wrapper for EncodeFrame() + void EncodeFrame(VideoSource *video) { + EncodeFrame(video, 0); + } + + void Control(int ctrl_id, int arg) { + const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } + + void set_deadline(unsigned long deadline) { + deadline_ = deadline; + } + + protected: + const char *EncoderError() { + const char *detail = vpx_codec_error_detail(&encoder_); + return detail ? detail : vpx_codec_error(&encoder_); + } + + // Encode an image + void EncodeFrameInternal(const VideoSource &video, + const unsigned long frame_flags); + + // Flush the encoder on EOS + void Flush(); + + vpx_codec_ctx_t encoder_; + vpx_codec_enc_cfg_t cfg_; + unsigned long deadline_; + unsigned long init_flags_; + TwopassStatsStore *stats_; +}; + +// Common test functionality for all Encoder tests. +// +// This class is a mixin which provides the main loop common to all +// encoder tests. It provides hooks which can be overridden by subclasses +// to implement each test's specific behavior, while centralizing the bulk +// of the boilerplate. Note that it doesn't inherit the gtest testing +// classes directly, so that tests can be parameterized differently. +class EncoderTest { + protected: + EncoderTest() : abort_(false), init_flags_(0), frame_flags_(0), + last_pts_(0) {} + + virtual ~EncoderTest() {} + + // Initialize the cfg_ member with the default configuration. + void InitializeConfig() { + const vpx_codec_err_t res = vpx_codec_enc_config_default( + &vpx_codec_vp8_cx_algo, &cfg_, 0); + ASSERT_EQ(VPX_CODEC_OK, res); + } + + // Map the TestMode enum to the deadline_ and passes_ variables. + void SetMode(TestMode mode); + + // Main loop. + virtual void RunLoop(VideoSource *video); + + // Hook to be called at the beginning of a pass. + virtual void BeginPassHook(unsigned int pass) {} + + // Hook to be called at the end of a pass. + virtual void EndPassHook() {} + + // Hook to be called before encoding a frame. + virtual void PreEncodeFrameHook(VideoSource *video) {} + virtual void PreEncodeFrameHook(VideoSource *video, Encoder *encoder) {} + + // Hook to be called on every compressed data packet. + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {} + + // Hook to be called on every PSNR packet. + virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {} + + // Hook to determine whether the encode loop should continue. + virtual bool Continue() const { return !abort_; } + + bool abort_; + vpx_codec_enc_cfg_t cfg_; + unsigned int passes_; + unsigned long deadline_; + TwopassStatsStore stats_; + unsigned long init_flags_; + unsigned long frame_flags_; + vpx_codec_pts_t last_pts_; +}; + +} // namespace libvpx_test + +#endif // TEST_ENCODE_TEST_DRIVER_H_ diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc new file mode 100644 index 0000000..25c6731 --- /dev/null +++ b/test/error_resilience_test.cc @@ -0,0 +1,90 @@ +/* + Copyright (c) 2012 The WebM project authors. All Rights Reserved. + + Use of this source code is governed by a BSD-style license + that can be found in the LICENSE file in the root of the source + tree. An additional intellectual property rights grant can be found + in the file PATENTS. All contributing project authors may + be found in the AUTHORS file in the root of the source tree. +*/ +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" + +namespace { + +class ErrorResilienceTest : public libvpx_test::EncoderTest, + public ::testing::TestWithParam { + protected: + ErrorResilienceTest() { + psnr_ = 0.0; + nframes_ = 0; + encoding_mode_ = static_cast(GetParam()); + } + virtual ~ErrorResilienceTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + } + + virtual void BeginPassHook(unsigned int /*pass*/) { + psnr_ = 0.0; + nframes_ = 0; + } + + virtual bool Continue() const { + return !HasFatalFailure() && !abort_; + } + + virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + double GetAveragePsnr() const { + if (nframes_) + return psnr_ / nframes_; + return 0.0; + } + + private: + double psnr_; + unsigned int nframes_; + libvpx_test::TestMode encoding_mode_; +}; + +TEST_P(ErrorResilienceTest, OnVersusOff) { + const vpx_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 2000; + cfg_.g_lag_in_frames = 25; + + init_flags_ = VPX_CODEC_USE_PSNR; + + libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 30); + + // Error resilient mode OFF. + cfg_.g_error_resilient = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_resilience_off = GetAveragePsnr(); + EXPECT_GT(psnr_resilience_off, 25.0); + + // Error resilient mode ON. + cfg_.g_error_resilient = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_resilience_on = GetAveragePsnr(); + EXPECT_GT(psnr_resilience_on, 25.0); + + // Test that turning on error resilient mode hurts by 10% at most. + if (psnr_resilience_off > 0.0) { + const double psnr_ratio = psnr_resilience_on / psnr_resilience_off; + EXPECT_GE(psnr_ratio, 0.9); + EXPECT_LE(psnr_ratio, 1.1); + } +} + +INSTANTIATE_TEST_CASE_P(OnOffTest, ErrorResilienceTest, + ONE_PASS_TEST_MODES); +} // namespace diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc new file mode 100644 index 0000000..619b23d --- /dev/null +++ b/test/fdct4x4_test.cc @@ -0,0 +1,169 @@ +/* +* Copyright (c) 2012 The WebM project authors. All Rights Reserved. +* +* Use of this source code is governed by a BSD-style license +* that can be found in the LICENSE file in the root of the source +* tree. An additional intellectual property rights grant can be found +* in the file PATENTS. All contributing project authors may +* be found in the AUTHORS file in the root of the source tree. +*/ + + +#include +#include +#include +#include +#include +#include + + +extern "C" { +#include "vpx_rtcd.h" +} + +#include "test/acm_random.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "vpx/vpx_integer.h" + + +namespace { + +const int cospi8sqrt2minus1 = 20091; +const int sinpi8sqrt2 = 35468; + +void reference_idct4x4(const int16_t *input, int16_t *output) { + const int16_t *ip = input; + int16_t *op = output; + + for (int i = 0; i < 4; ++i) { + const int a1 = ip[0] + ip[8]; + const int b1 = ip[0] - ip[8]; + const int temp1 = (ip[4] * sinpi8sqrt2) >> 16; + const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); + const int c1 = temp1 - temp2; + const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); + const int temp4 = (ip[12] * sinpi8sqrt2) >> 16; + const int d1 = temp3 + temp4; + op[0] = a1 + d1; + op[12] = a1 - d1; + op[4] = b1 + c1; + op[8] = b1 - c1; + ++ip; + ++op; + } + ip = output; + op = output; + for (int i = 0; i < 4; ++i) { + const int a1 = ip[0] + ip[2]; + const int b1 = ip[0] - ip[2]; + const int temp1 = (ip[1] * sinpi8sqrt2) >> 16; + const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); + const int c1 = temp1 - temp2; + const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); + const int temp4 = (ip[3] * sinpi8sqrt2) >> 16; + const int d1 = temp3 + temp4; + op[0] = (a1 + d1 + 4) >> 3; + op[3] = (a1 - d1 + 4) >> 3; + op[1] = (b1 + c1 + 4) >> 3; + op[2] = (b1 - c1 + 4) >> 3; + ip += 4; + op += 4; + } +} + +using libvpx_test::ACMRandom; + +TEST(Vp8FdctTest, SignBiasCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int16_t test_input_block[16]; + int16_t test_output_block[16]; + const int pitch = 8; + int count_sign_block[16][2]; + const int count_test_block = 1000000; + + memset(count_sign_block, 0, sizeof(count_sign_block)); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 16; ++j) + test_input_block[j] = rnd.Rand8() - rnd.Rand8(); + + vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch); + + for (int j = 0; j < 16; ++j) { + if (test_output_block[j] < 0) + ++count_sign_block[j][0]; + else if (test_output_block[j] > 0) + ++count_sign_block[j][1]; + } + } + + bool bias_acceptable = true; + for (int j = 0; j < 16; ++j) + bias_acceptable = bias_acceptable && + (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000); + + EXPECT_EQ(true, bias_acceptable) + << "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]"; + + memset(count_sign_block, 0, sizeof(count_sign_block)); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-15, 15]. + for (int j = 0; j < 16; ++j) + test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4); + + vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch); + + for (int j = 0; j < 16; ++j) { + if (test_output_block[j] < 0) + ++count_sign_block[j][0]; + else if (test_output_block[j] > 0) + ++count_sign_block[j][1]; + } + } + + bias_acceptable = true; + for (int j = 0; j < 16; ++j) + bias_acceptable = bias_acceptable && + (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000); + + EXPECT_EQ(true, bias_acceptable) + << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]"; +}; + +TEST(Vp8FdctTest, RoundTripErrorCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int max_error = 0; + double total_error = 0; + const int count_test_block = 1000000; + for (int i = 0; i < count_test_block; ++i) { + int16_t test_input_block[16]; + int16_t test_temp_block[16]; + int16_t test_output_block[16]; + + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 16; ++j) + test_input_block[j] = rnd.Rand8() - rnd.Rand8(); + + const int pitch = 8; + vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch); + reference_idct4x4(test_temp_block, test_output_block); + + for (int j = 0; j < 16; ++j) { + const int diff = test_input_block[j] - test_output_block[j]; + const int error = diff * diff; + if (max_error < error) + max_error = error; + total_error += error; + } + } + + EXPECT_GE(1, max_error ) + << "Error: FDCT/IDCT has an individual roundtrip error > 1"; + + EXPECT_GE(count_test_block, total_error) + << "Error: FDCT/IDCT has average roundtrip error > 1 per block"; +}; + +} // namespace diff --git a/test/i420_video_source.h b/test/i420_video_source.h new file mode 100644 index 0000000..219bd33 --- /dev/null +++ b/test/i420_video_source.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef TEST_I420_VIDEO_SOURCE_H_ +#define TEST_I420_VIDEO_SOURCE_H_ +#include +#include + +#include "test/video_source.h" + +namespace libvpx_test { + +// This class extends VideoSource to allow parsing of raw yv12 +// so that we can do actual file encodes. +class I420VideoSource : public VideoSource { + public: + I420VideoSource(const std::string &file_name, + unsigned int width, unsigned int height, + int rate_numerator, int rate_denominator, + unsigned int start, int limit) + : file_name_(file_name), + input_file_(NULL), + img_(NULL), + start_(start), + limit_(limit), + frame_(0), + width_(0), + height_(0), + framerate_numerator_(rate_numerator), + framerate_denominator_(rate_denominator) { + + // This initializes raw_sz_, width_, height_ and allocates an img. + SetSize(width, height); + } + + virtual ~I420VideoSource() { + vpx_img_free(img_); + if (input_file_) + fclose(input_file_); + } + + virtual void Begin() { + if (input_file_) + fclose(input_file_); + input_file_ = OpenTestDataFile(file_name_); + ASSERT_TRUE(input_file_) << "Input file open failed. Filename: " + << file_name_; + if (start_) { + fseek(input_file_, raw_sz_ * start_, SEEK_SET); + } + + frame_ = start_; + FillFrame(); + } + + virtual void Next() { + ++frame_; + FillFrame(); + } + + virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; } + + // Models a stream where Timebase = 1/FPS, so pts == frame. + virtual vpx_codec_pts_t pts() const { return frame_; } + + virtual unsigned long duration() const { return 1; } + + virtual vpx_rational_t timebase() const { + const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ }; + return t; + } + + virtual unsigned int frame() const { return frame_; } + + virtual unsigned int limit() const { return limit_; } + + void SetSize(unsigned int width, unsigned int height) { + if (width != width_ || height != height_) { + vpx_img_free(img_); + img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 1); + ASSERT_TRUE(img_ != NULL); + width_ = width; + height_ = height; + raw_sz_ = width * height * 3 / 2; + } + } + + virtual void FillFrame() { + // Read a frame from input_file. + if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) { + limit_ = frame_; + } + } + + protected: + std::string file_name_; + FILE *input_file_; + vpx_image_t *img_; + size_t raw_sz_; + unsigned int start_; + unsigned int limit_; + unsigned int frame_; + unsigned int width_; + unsigned int height_; + unsigned int framerate_numerator_; + unsigned int framerate_denominator_; +}; + +} // namespace libvpx_test + +#endif // TEST_I420_VIDEO_SOURCE_H_ diff --git a/test/idctllm_test.cc b/test/idctllm_test.cc new file mode 100644 index 0000000..1be5fa0 --- /dev/null +++ b/test/idctllm_test.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +extern "C" { +#include "vpx_config.h" +#include "vpx_rtcd.h" +} +#include "test/register_state_check.h" +#include "third_party/googletest/src/include/gtest/gtest.h" + +typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr, + int pred_stride, unsigned char *dst_ptr, + int dst_stride); +namespace { +class IDCTTest : public ::testing::TestWithParam +{ + protected: + virtual void SetUp() + { + int i; + + UUT = GetParam(); + memset(input, 0, sizeof(input)); + /* Set up guard blocks */ + for(i=0; i<256; i++) + output[i] = ((i&0xF)<4&&(i<64))?0:-1; + } + + idct_fn_t UUT; + short input[16]; + unsigned char output[256]; + unsigned char predict[256]; +}; + +TEST_P(IDCTTest, TestGuardBlocks) +{ + int i; + + for(i=0; i<256; i++) + if((i&0xF) < 4 && i<64) + EXPECT_EQ(0, output[i]) << i; + else + EXPECT_EQ(255, output[i]); +} + +TEST_P(IDCTTest, TestAllZeros) +{ + int i; + + REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); + + for(i=0; i<256; i++) + if((i&0xF) < 4 && i<64) + EXPECT_EQ(0, output[i]) << "i==" << i; + else + EXPECT_EQ(255, output[i]) << "i==" << i; +} + +TEST_P(IDCTTest, TestAllOnes) +{ + int i; + + input[0] = 4; + REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); + + for(i=0; i<256; i++) + if((i&0xF) < 4 && i<64) + EXPECT_EQ(1, output[i]) << "i==" << i; + else + EXPECT_EQ(255, output[i]) << "i==" << i; +} + +TEST_P(IDCTTest, TestAddOne) +{ + int i; + + for(i=0; i<256; i++) + predict[i] = i; + + input[0] = 4; + REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16)); + + for(i=0; i<256; i++) + if((i&0xF) < 4 && i<64) + EXPECT_EQ(i+1, output[i]) << "i==" << i; + else + EXPECT_EQ(255, output[i]) << "i==" << i; +} + +TEST_P(IDCTTest, TestWithData) +{ + int i; + + for(i=0; i<16; i++) + input[i] = i; + + REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16)); + + for(i=0; i<256; i++) + if((i&0xF) > 3 || i>63) + EXPECT_EQ(255, output[i]) << "i==" << i; + else if(i == 0) + EXPECT_EQ(11, output[i]) << "i==" << i; + else if(i == 34) + EXPECT_EQ(1, output[i]) << "i==" << i; + else if(i == 2 || i == 17 || i == 32) + EXPECT_EQ(3, output[i]) << "i==" << i; + else + EXPECT_EQ(0, output[i]) << "i==" << i; +} + +INSTANTIATE_TEST_CASE_P(C, IDCTTest, + ::testing::Values(vp8_short_idct4x4llm_c)); +#if HAVE_MMX +INSTANTIATE_TEST_CASE_P(MMX, IDCTTest, + ::testing::Values(vp8_short_idct4x4llm_mmx)); +#endif +} diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc new file mode 100644 index 0000000..4c16c3f --- /dev/null +++ b/test/intrapred_test.cc @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +extern "C" { +#include "vpx_config.h" +#include "vpx_rtcd.h" +#include "vp8/common/blockd.h" +#include "vpx_mem/vpx_mem.h" +} + +namespace { + +using libvpx_test::ACMRandom; + +class IntraPredBase { + protected: + void SetupMacroblock(uint8_t *data, int block_size, int stride, + int num_planes) { + memset(&mb_, 0, sizeof(mb_)); + memset(&mi_, 0, sizeof(mi_)); + mb_.up_available = 1; + mb_.left_available = 1; + mb_.mode_info_context = &mi_; + stride_ = stride; + block_size_ = block_size; + num_planes_ = num_planes; + for (int p = 0; p < num_planes; p++) + data_ptr_[p] = data + stride * (block_size + 1) * p + + stride + block_size; + } + + void FillRandom() { + // Fill edges with random data + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int p = 0; p < num_planes_; p++) { + for (int x = -1 ; x <= block_size_; x++) + data_ptr_[p][x - stride_] = rnd.Rand8(); + for (int y = 0; y < block_size_; y++) + data_ptr_[p][y * stride_ - 1] = rnd.Rand8(); + } + } + + virtual void Predict(MB_PREDICTION_MODE mode) = 0; + + void SetLeftUnavailable() { + mb_.left_available = 0; + for (int p = 0; p < num_planes_; p++) + for (int i = -1; i < block_size_; ++i) + data_ptr_[p][stride_ * i - 1] = 129; + } + + void SetTopUnavailable() { + mb_.up_available = 0; + for (int p = 0; p < num_planes_; p++) + memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2); + } + + void SetTopLeftUnavailable() { + SetLeftUnavailable(); + SetTopUnavailable(); + } + + int BlockSizeLog2Min1() const { + switch (block_size_) { + case 16: + return 3; + case 8: + return 2; + default: + return 0; + } + } + + // check DC prediction output against a reference + void CheckDCPrediction() const { + for (int p = 0; p < num_planes_; p++) { + // calculate expected DC + int expected; + if (mb_.up_available || mb_.left_available) { + int sum = 0, shift = BlockSizeLog2Min1() + mb_.up_available + + mb_.left_available; + if (mb_.up_available) + for (int x = 0; x < block_size_; x++) + sum += data_ptr_[p][x - stride_]; + if (mb_.left_available) + for (int y = 0; y < block_size_; y++) + sum += data_ptr_[p][y * stride_ - 1]; + expected = (sum + (1 << (shift - 1))) >> shift; + } else + expected = 0x80; + + // check that all subsequent lines are equal to the first + for (int y = 1; y < block_size_; ++y) + ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_], + block_size_)); + // within the first line, ensure that each pixel has the same value + for (int x = 1; x < block_size_; ++x) + ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]); + // now ensure that that pixel has the expected (DC) value + ASSERT_EQ(expected, data_ptr_[p][0]); + } + } + + // check V prediction output against a reference + void CheckVPrediction() const { + // check that all lines equal the top border + for (int p = 0; p < num_planes_; p++) + for (int y = 0; y < block_size_; y++) + ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_], + &data_ptr_[p][y * stride_], block_size_)); + } + + // check H prediction output against a reference + void CheckHPrediction() const { + // for each line, ensure that each pixel is equal to the left border + for (int p = 0; p < num_planes_; p++) + for (int y = 0; y < block_size_; y++) + for (int x = 0; x < block_size_; x++) + ASSERT_EQ(data_ptr_[p][-1 + y * stride_], + data_ptr_[p][x + y * stride_]); + } + + static int ClipByte(int value) { + if (value > 255) + return 255; + else if (value < 0) + return 0; + return value; + } + + // check TM prediction output against a reference + void CheckTMPrediction() const { + for (int p = 0; p < num_planes_; p++) + for (int y = 0; y < block_size_; y++) + for (int x = 0; x < block_size_; x++) { + const int expected = ClipByte(data_ptr_[p][x - stride_] + + data_ptr_[p][stride_ * y - 1] + - data_ptr_[p][-1 - stride_]); + ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]); + } + } + + // Actual test + void RunTest() { + { + SCOPED_TRACE("DC_PRED"); + FillRandom(); + Predict(DC_PRED); + CheckDCPrediction(); + } + { + SCOPED_TRACE("DC_PRED LEFT"); + FillRandom(); + SetLeftUnavailable(); + Predict(DC_PRED); + CheckDCPrediction(); + } + { + SCOPED_TRACE("DC_PRED TOP"); + FillRandom(); + SetTopUnavailable(); + Predict(DC_PRED); + CheckDCPrediction(); + } + { + SCOPED_TRACE("DC_PRED TOP_LEFT"); + FillRandom(); + SetTopLeftUnavailable(); + Predict(DC_PRED); + CheckDCPrediction(); + } + { + SCOPED_TRACE("H_PRED"); + FillRandom(); + Predict(H_PRED); + CheckHPrediction(); + } + { + SCOPED_TRACE("V_PRED"); + FillRandom(); + Predict(V_PRED); + CheckVPrediction(); + } + { + SCOPED_TRACE("TM_PRED"); + FillRandom(); + Predict(TM_PRED); + CheckTMPrediction(); + } + } + + MACROBLOCKD mb_; + MODE_INFO mi_; + uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used + int stride_; + int block_size_; + int num_planes_; +}; + +typedef void (*intra_pred_y_fn_t)(MACROBLOCKD *x, + uint8_t *yabove_row, + uint8_t *yleft, + int left_stride, + uint8_t *ypred_ptr, + int y_stride); + +class IntraPredYTest : public ::testing::TestWithParam, + protected IntraPredBase { + public: + static void SetUpTestCase() { + data_array_ = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBufferSize)); + } + + static void TearDownTestCase() { + vpx_free(data_array_); + data_array_ = NULL; + } + + protected: + static const int kBlockSize = 16; + static const int kDataAlignment = 16; + static const int kStride = kBlockSize * 3; + // We use 48 so that the data pointer of the first pixel in each row of + // each macroblock is 16-byte aligned, and this gives us access to the + // top-left and top-right corner pixels belonging to the top-left/right + // macroblocks. + // We use 17 lines so we have one line above us for top-prediction. + static const int kDataBufferSize = kStride * (kBlockSize + 1); + + virtual void SetUp() { + pred_fn_ = GetParam(); + SetupMacroblock(data_array_, kBlockSize, kStride, 1); + } + + virtual void Predict(MB_PREDICTION_MODE mode) { + mb_.mode_info_context->mbmi.mode = mode; + REGISTER_STATE_CHECK(pred_fn_(&mb_, + data_ptr_[0] - kStride, + data_ptr_[0] - 1, kStride, + data_ptr_[0], kStride)); + } + + intra_pred_y_fn_t pred_fn_; + static uint8_t* data_array_; +}; + +uint8_t* IntraPredYTest::data_array_ = NULL; + +TEST_P(IntraPredYTest, IntraPredTests) { + RunTest(); +} + +INSTANTIATE_TEST_CASE_P(C, IntraPredYTest, + ::testing::Values( + vp8_build_intra_predictors_mby_s_c)); +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest, + ::testing::Values( + vp8_build_intra_predictors_mby_s_sse2)); +#endif +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest, + ::testing::Values( + vp8_build_intra_predictors_mby_s_ssse3)); +#endif + +typedef void (*intra_pred_uv_fn_t)(MACROBLOCKD *x, + uint8_t *uabove_row, + uint8_t *vabove_row, + uint8_t *uleft, + uint8_t *vleft, + int left_stride, + uint8_t *upred_ptr, + uint8_t *vpred_ptr, + int pred_stride); + +class IntraPredUVTest : public ::testing::TestWithParam, + protected IntraPredBase { + public: + static void SetUpTestCase() { + data_array_ = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBufferSize)); + } + + static void TearDownTestCase() { + vpx_free(data_array_); + data_array_ = NULL; + } + + protected: + static const int kBlockSize = 8; + static const int kDataAlignment = 8; + static const int kStride = kBlockSize * 3; + // We use 24 so that the data pointer of the first pixel in each row of + // each macroblock is 8-byte aligned, and this gives us access to the + // top-left and top-right corner pixels belonging to the top-left/right + // macroblocks. + // We use 9 lines so we have one line above us for top-prediction. + // [0] = U, [1] = V + static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1); + + virtual void SetUp() { + pred_fn_ = GetParam(); + SetupMacroblock(data_array_, kBlockSize, kStride, 2); + } + + virtual void Predict(MB_PREDICTION_MODE mode) { + mb_.mode_info_context->mbmi.uv_mode = mode; + pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[1] - kStride, + data_ptr_[0] - 1, data_ptr_[1] - 1, kStride, + data_ptr_[0], data_ptr_[1], kStride); + } + + intra_pred_uv_fn_t pred_fn_; + // We use 24 so that the data pointer of the first pixel in each row of + // each macroblock is 8-byte aligned, and this gives us access to the + // top-left and top-right corner pixels belonging to the top-left/right + // macroblocks. + // We use 9 lines so we have one line above us for top-prediction. + // [0] = U, [1] = V + static uint8_t* data_array_; +}; + +uint8_t* IntraPredUVTest::data_array_ = NULL; + +TEST_P(IntraPredUVTest, IntraPredTests) { + RunTest(); +} + +INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest, + ::testing::Values( + vp8_build_intra_predictors_mbuv_s_c)); +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest, + ::testing::Values( + vp8_build_intra_predictors_mbuv_s_sse2)); +#endif +#if HAVE_SSSE3 +INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest, + ::testing::Values( + vp8_build_intra_predictors_mbuv_s_ssse3)); +#endif + +} // namespace diff --git a/test/ivf_video_source.h b/test/ivf_video_source.h new file mode 100644 index 0000000..48c3a7d --- /dev/null +++ b/test/ivf_video_source.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef TEST_IVF_VIDEO_SOURCE_H_ +#define TEST_IVF_VIDEO_SOURCE_H_ +#include +#include +#include +#include +#include "test/video_source.h" + +namespace libvpx_test { +const unsigned int kCodeBufferSize = 256 * 1024; +const unsigned int kIvfFileHdrSize = 32; +const unsigned int kIvfFrameHdrSize = 12; + +static unsigned int MemGetLe32(const uint8_t *mem) { + return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]); +} + +// This class extends VideoSource to allow parsing of ivf files, +// so that we can do actual file decodes. +class IVFVideoSource : public CompressedVideoSource { + public: + IVFVideoSource(const std::string &file_name) + : file_name_(file_name), + input_file_(NULL), + compressed_frame_buf_(NULL), + frame_sz_(0), + frame_(0), + end_of_file_(false) { + } + + virtual ~IVFVideoSource() { + delete[] compressed_frame_buf_; + + if (input_file_) + fclose(input_file_); + } + + virtual void Init() { + // Allocate a buffer for read in the compressed video frame. + compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize]; + ASSERT_TRUE(compressed_frame_buf_) << "Allocate frame buffer failed"; + } + + virtual void Begin() { + input_file_ = OpenTestDataFile(file_name_); + ASSERT_TRUE(input_file_) << "Input file open failed. Filename: " + << file_name_; + + // Read file header + uint8_t file_hdr[kIvfFileHdrSize]; + ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_)) + << "File header read failed."; + // Check file header + ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' && file_hdr[2] == 'I' + && file_hdr[3] == 'F') << "Input is not an IVF file."; + + FillFrame(); + } + + virtual void Next() { + ++frame_; + FillFrame(); + } + + void FillFrame() { + uint8_t frame_hdr[kIvfFrameHdrSize]; + // Check frame header and read a frame from input_file. + if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_) + != kIvfFrameHdrSize) { + end_of_file_ = true; + } else { + end_of_file_ = false; + + frame_sz_ = MemGetLe32(frame_hdr); + ASSERT_LE(frame_sz_, kCodeBufferSize) + << "Frame is too big for allocated code buffer"; + ASSERT_EQ(frame_sz_, + fread(compressed_frame_buf_, 1, frame_sz_, input_file_)) + << "Failed to read complete frame"; + } + } + + virtual const uint8_t *cxdata() const { + return end_of_file_ ? NULL : compressed_frame_buf_; + } + virtual const unsigned int frame_size() const { return frame_sz_; } + virtual const unsigned int frame_number() const { return frame_; } + + protected: + std::string file_name_; + FILE *input_file_; + uint8_t *compressed_frame_buf_; + unsigned int frame_sz_; + unsigned int frame_; + bool end_of_file_; +}; + +} // namespace libvpx_test + +#endif // TEST_IVF_VIDEO_SOURCE_H_ diff --git a/test/keyframe_test.cc b/test/keyframe_test.cc new file mode 100644 index 0000000..d0c81df --- /dev/null +++ b/test/keyframe_test.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include +#include +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "third_party/googletest/src/include/gtest/gtest.h" + +namespace { + +class KeyframeTest : public ::libvpx_test::EncoderTest, + public ::testing::TestWithParam { + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(GetParam()); + kf_count_ = 0; + kf_count_max_ = INT_MAX; + kf_do_force_kf_ = false; + set_cpu_used_ = 0; + } + + virtual bool Continue() const { + return !HasFatalFailure() && !abort_; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (kf_do_force_kf_) + frame_flags_ = (video->frame() % 3) ? 0 : VPX_EFLAG_FORCE_KF; + if (set_cpu_used_ && video->frame() == 1) + encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + kf_pts_list_.push_back(pkt->data.frame.pts); + kf_count_++; + abort_ |= kf_count_ > kf_count_max_; + } + } + + bool kf_do_force_kf_; + int kf_count_; + int kf_count_max_; + std::vector kf_pts_list_; + int set_cpu_used_; +}; + +TEST_P(KeyframeTest, TestRandomVideoSource) { + // Validate that encoding the RandomVideoSource produces multiple keyframes. + // This validates the results of the TestDisableKeyframes test. + kf_count_max_ = 2; // early exit successful tests. + + ::libvpx_test::RandomVideoSource video; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // In realtime mode - auto placed keyframes are exceedingly rare, don't + // bother with this check if(GetParam() > 0) + if(GetParam() > 0) + EXPECT_GT(kf_count_, 1); +} + +TEST_P(KeyframeTest, TestDisableKeyframes) { + cfg_.kf_mode = VPX_KF_DISABLED; + kf_count_max_ = 1; // early exit failed tests. + + ::libvpx_test::RandomVideoSource video; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + EXPECT_EQ(1, kf_count_); +} + +TEST_P(KeyframeTest, TestForceKeyframe) { + cfg_.kf_mode = VPX_KF_DISABLED; + kf_do_force_kf_ = true; + + ::libvpx_test::DummyVideoSource video; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // verify that every third frame is a keyframe. + for (std::vector::const_iterator iter = kf_pts_list_.begin(); + iter != kf_pts_list_.end(); ++iter) { + ASSERT_EQ(0, *iter % 3) << "Unexpected keyframe at frame " << *iter; + } +} + +TEST_P(KeyframeTest, TestKeyframeMaxDistance) { + cfg_.kf_max_dist = 25; + + ::libvpx_test::DummyVideoSource video; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // verify that keyframe interval matches kf_max_dist + for (std::vector::const_iterator iter = kf_pts_list_.begin(); + iter != kf_pts_list_.end(); ++iter) { + ASSERT_EQ(0, *iter % 25) << "Unexpected keyframe at frame " << *iter; + } +} + +TEST_P(KeyframeTest, TestAutoKeyframe) { + cfg_.kf_mode = VPX_KF_AUTO; + kf_do_force_kf_ = false; + + // Force a deterministic speed step in Real Time mode, as the faster modes + // may not produce a keyframe like we expect. This is necessary when running + // on very slow environments (like Valgrind). The step -11 was determined + // experimentally as the fastest mode that still throws the keyframe. + if (deadline_ == VPX_DL_REALTIME) + set_cpu_used_ = -11; + + // This clip has a cut scene every 30 frames -> Frame 0, 30, 60, 90, 120. + // I check only the first 40 frames to make sure there's a keyframe at frame + // 0 and 30. + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 40); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // In realtime mode - auto placed keyframes are exceedingly rare, don't + // bother with this check + if(GetParam() > 0) + EXPECT_EQ(2u, kf_pts_list_.size()) << " Not the right number of keyframes "; + + // Verify that keyframes match the file keyframes in the file. + for (std::vector::const_iterator iter = kf_pts_list_.begin(); + iter != kf_pts_list_.end(); ++iter) { + + if (deadline_ == VPX_DL_REALTIME && *iter > 0) + EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame " + << *iter; + else + EXPECT_EQ(0, *iter % 30) << "Unexpected keyframe at frame " << *iter; + } +} + +INSTANTIATE_TEST_CASE_P(AllModes, KeyframeTest, ALL_TEST_MODES); +} // namespace diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc new file mode 100644 index 0000000..9227449 --- /dev/null +++ b/test/pp_filter_test.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "test/register_state_check.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +extern "C" { +#include "vpx_config.h" +#include "vpx_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_mem/vpx_mem.h" +} + +typedef void (*post_proc_func_t)(unsigned char *src_ptr, + unsigned char *dst_ptr, + int src_pixels_per_line, + int dst_pixels_per_line, + int cols, + unsigned char *flimit, + int size); + +namespace { + +class Vp8PostProcessingFilterTest + : public ::testing::TestWithParam {}; + +// Test routine for the VP8 post-processing function +// vp8_post_proc_down_and_across_mb_row_c. + +TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) { + // Size of the underlying data block that will be filtered. + const int block_width = 16; + const int block_height = 16; + + // 5-tap filter needs 2 padding rows above and below the block in the input. + const int input_width = block_width; + const int input_height = block_height + 4; + const int input_stride = input_width; + const int input_size = input_width * input_height; + + // Filter extends output block by 8 samples at left and right edges. + const int output_width = block_width + 16; + const int output_height = block_height; + const int output_stride = output_width; + const int output_size = output_width * output_height; + + uint8_t *const src_image = + reinterpret_cast(vpx_calloc(input_size, 1)); + uint8_t *const dst_image = + reinterpret_cast(vpx_calloc(output_size, 1)); + + // Pointers to top-left pixel of block in the input and output images. + uint8_t *const src_image_ptr = src_image + (input_stride << 1); + uint8_t *const dst_image_ptr = dst_image + 8; + uint8_t *const flimits = reinterpret_cast(vpx_memalign(16, block_width)); + (void)vpx_memset(flimits, 255, block_width); + + // Initialize pixels in the input: + // block pixels to value 1, + // border pixels to value 10. + (void)vpx_memset(src_image, 10, input_size); + uint8_t *pixel_ptr = src_image_ptr; + for (int i = 0; i < block_height; ++i) { + for (int j = 0; j < block_width; ++j) { + pixel_ptr[j] = 1; + } + pixel_ptr += input_stride; + } + + // Initialize pixels in the output to 99. + (void)vpx_memset(dst_image, 99, output_size); + + REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr, input_stride, + output_stride, block_width, flimits, 16)); + + static const uint8_t expected_data[block_height] = { + 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4 + }; + + pixel_ptr = dst_image_ptr; + for (int i = 0; i < block_height; ++i) { + for (int j = 0; j < block_width; ++j) { + EXPECT_EQ(expected_data[i], pixel_ptr[j]) + << "Vp8PostProcessingFilterTest failed with invalid filter output"; + } + pixel_ptr += output_stride; + } + + vpx_free(src_image); + vpx_free(dst_image); + vpx_free(flimits); +}; + +INSTANTIATE_TEST_CASE_P(C, Vp8PostProcessingFilterTest, + ::testing::Values(vp8_post_proc_down_and_across_mb_row_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, Vp8PostProcessingFilterTest, + ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2)); +#endif + +} // namespace diff --git a/test/register_state_check.h b/test/register_state_check.h new file mode 100644 index 0000000..fb3f53b --- /dev/null +++ b/test/register_state_check.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_ +#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_ + +#ifdef _WIN64 + +#define _WIN32_LEAN_AND_MEAN +#include +#include + +#include "third_party/googletest/src/include/gtest/gtest.h" + +namespace testing { +namespace internal { + +inline bool operator==(const M128A& lhs, const M128A& rhs) { + return (lhs.Low == rhs.Low && lhs.High == rhs.High); +} + +} // namespace internal +} // namespace testing + +namespace libvpx_test { + +// Compares the state of xmm[6-15] at construction with their state at +// destruction. These registers should be preserved by the callee on +// Windows x64. +// Usage: +// { +// RegisterStateCheck reg_check; +// FunctionToVerify(); +// } +class RegisterStateCheck { + public: + RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); } + ~RegisterStateCheck() { EXPECT_TRUE(Check()); } + + private: + static bool StoreRegisters(CONTEXT* const context) { + const HANDLE this_thread = GetCurrentThread(); + EXPECT_TRUE(this_thread != NULL); + context->ContextFlags = CONTEXT_FLOATING_POINT; + const bool context_saved = GetThreadContext(this_thread, context) == TRUE; + EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError(); + return context_saved; + } + + // Compares the register state. Returns true if the states match. + bool Check() const { + if (!initialized_) return false; + CONTEXT post_context; + if (!StoreRegisters(&post_context)) return false; + + const M128A* xmm_pre = &pre_context_.Xmm6; + const M128A* xmm_post = &post_context.Xmm6; + for (int i = 6; i <= 15; ++i) { + EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!"; + ++xmm_pre; + ++xmm_post; + } + return !testing::Test::HasNonfatalFailure(); + } + + bool initialized_; + CONTEXT pre_context_; +}; + +#define REGISTER_STATE_CHECK(statement) do { \ + libvpx_test::RegisterStateCheck reg_check; \ + statement; \ +} while (false) + +} // namespace libvpx_test + +#else // !_WIN64 + +namespace libvpx_test { + +class RegisterStateCheck {}; +#define REGISTER_STATE_CHECK(statement) statement + +} // namespace libvpx_test + +#endif // _WIN64 + +#endif // LIBVPX_TEST_REGISTER_STATE_CHECK_H_ diff --git a/test/resize_test.cc b/test/resize_test.cc new file mode 100644 index 0000000..c846157 --- /dev/null +++ b/test/resize_test.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include +#include +#include "test/encode_test_driver.h" +#include "test/video_source.h" +#include "third_party/googletest/src/include/gtest/gtest.h" + +namespace { + +const unsigned int kInitialWidth = 320; +const unsigned int kInitialHeight = 240; + +unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) { + if (frame < 10) + return val; + if (frame < 20) + return val / 2; + if (frame < 30) + return val * 2 / 3; + if (frame < 40) + return val / 4; + if (frame < 50) + return val * 7 / 8; + return val; +} + +class ResizingVideoSource : public ::libvpx_test::DummyVideoSource { + public: + ResizingVideoSource() { + SetSize(kInitialWidth, kInitialHeight); + limit_ = 60; + } + + protected: + virtual void Next() { + ++frame_; + SetSize(ScaleForFrameNumber(frame_, kInitialWidth), + ScaleForFrameNumber(frame_, kInitialHeight)); + FillFrame(); + } +}; + +class ResizeTest : public ::libvpx_test::EncoderTest, + public ::testing::TestWithParam { + protected: + struct FrameInfo { + FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h) + : pts(_pts), w(_w), h(_h) {} + + vpx_codec_pts_t pts; + unsigned int w; + unsigned int h; + }; + + virtual void SetUp() { + InitializeConfig(); + SetMode(GetParam()); + } + + virtual bool Continue() const { + return !HasFatalFailure() && !abort_; + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + const unsigned char *buf = + reinterpret_cast(pkt->data.frame.buf); + const unsigned int w = (buf[6] | (buf[7] << 8)) & 0x3fff; + const unsigned int h = (buf[8] | (buf[9] << 8)) & 0x3fff; + + frame_info_list_.push_back(FrameInfo(pkt->data.frame.pts, w, h)); + } + } + + std::vector< FrameInfo > frame_info_list_; +}; + +TEST_P(ResizeTest, TestExternalResizeWorks) { + ResizingVideoSource video; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + for (std::vector::iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const vpx_codec_pts_t pts = info->pts; + const unsigned int expected_w = ScaleForFrameNumber(pts, kInitialWidth); + const unsigned int expected_h = ScaleForFrameNumber(pts, kInitialHeight); + + EXPECT_EQ(expected_w, info->w) + << "Frame " << pts << "had unexpected width"; + EXPECT_EQ(expected_h, info->h) + << "Frame " << pts << "had unexpected height"; + } +} + +INSTANTIATE_TEST_CASE_P(OnePass, ResizeTest, ONE_PASS_TEST_MODES); +} // namespace diff --git a/test/sad_test.cc b/test/sad_test.cc new file mode 100644 index 0000000..5a0653b --- /dev/null +++ b/test/sad_test.cc @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include +#include + +extern "C" { +#include "./vpx_config.h" +#include "./vpx_rtcd.h" +#include "vp8/common/blockd.h" +#include "vpx_mem/vpx_mem.h" +} + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "third_party/googletest/src/include/gtest/gtest.h" + + +typedef unsigned int (*sad_m_by_n_fn_t)(const unsigned char *source_ptr, + int source_stride, + const unsigned char *reference_ptr, + int reference_stride, + unsigned int max_sad); + +using libvpx_test::ACMRandom; + +namespace { +class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) { + public: + static void SetUpTestCase() { + source_data_ = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBufferSize)); + reference_data_ = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBufferSize)); + } + + static void TearDownTestCase() { + vpx_free(source_data_); + source_data_ = NULL; + vpx_free(reference_data_); + reference_data_ = NULL; + } + + protected: + static const int kDataAlignment = 16; + static const int kDataBufferSize = 16 * 32; + + virtual void SetUp() { + sad_fn_ = GET_PARAM(2); + height_ = GET_PARAM(1); + width_ = GET_PARAM(0); + source_stride_ = width_ * 2; + reference_stride_ = width_ * 2; + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + sad_m_by_n_fn_t sad_fn_; + virtual unsigned int SAD(unsigned int max_sad) { + unsigned int ret; + REGISTER_STATE_CHECK(ret = sad_fn_(source_data_, source_stride_, + reference_data_, reference_stride_, + max_sad)); + return ret; + } + + // Sum of Absolute Differences. Given two blocks, calculate the absolute + // difference between two pixels in the same relative location; accumulate. + unsigned int ReferenceSAD(unsigned int max_sad) { + unsigned int sad = 0; + + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + sad += abs(source_data_[h * source_stride_ + w] + - reference_data_[h * reference_stride_ + w]); + } + if (sad > max_sad) { + break; + } + } + return sad; + } + + void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) { + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + data[h * stride + w] = fill_constant; + } + } + } + + void FillRandom(uint8_t *data, int stride) { + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + data[h * stride + w] = rnd_.Rand8(); + } + } + } + + void CheckSad(unsigned int max_sad) { + unsigned int reference_sad, exp_sad; + + reference_sad = ReferenceSAD(max_sad); + exp_sad = SAD(max_sad); + + if (reference_sad <= max_sad) { + ASSERT_EQ(exp_sad, reference_sad); + } else { + // Alternative implementations are not required to check max_sad + ASSERT_GE(exp_sad, reference_sad); + } + } + + // Handle blocks up to 16x16 with stride up to 32 + int height_, width_; + static uint8_t* source_data_; + int source_stride_; + static uint8_t* reference_data_; + int reference_stride_; + + ACMRandom rnd_; +}; + +uint8_t* SADTest::source_data_ = NULL; +uint8_t* SADTest::reference_data_ = NULL; + +TEST_P(SADTest, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(reference_data_, reference_stride_, 255); + CheckSad(UINT_MAX); +} + +TEST_P(SADTest, MaxSrc) { + FillConstant(source_data_, source_stride_, 255); + FillConstant(reference_data_, reference_stride_, 0); + CheckSad(UINT_MAX); +} + +TEST_P(SADTest, ShortRef) { + int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSad(UINT_MAX); + reference_stride_ = tmp_stride; +} + +TEST_P(SADTest, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSad(UINT_MAX); + reference_stride_ = tmp_stride; +} + +TEST_P(SADTest, ShortSrc) { + int tmp_stride = source_stride_; + source_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSad(UINT_MAX); + source_stride_ = tmp_stride; +} + +TEST_P(SADTest, MaxSAD) { + // Verify that, when max_sad is set, the implementation does not return a + // value lower than the reference. + FillConstant(source_data_, source_stride_, 255); + FillConstant(reference_data_, reference_stride_, 0); + CheckSad(128); +} + +using std::tr1::make_tuple; + +const sad_m_by_n_fn_t sad_16x16_c = vp8_sad16x16_c; +const sad_m_by_n_fn_t sad_8x16_c = vp8_sad8x16_c; +const sad_m_by_n_fn_t sad_16x8_c = vp8_sad16x8_c; +const sad_m_by_n_fn_t sad_8x8_c = vp8_sad8x8_c; +const sad_m_by_n_fn_t sad_4x4_c = vp8_sad4x4_c; +INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::Values( + make_tuple(16, 16, sad_16x16_c), + make_tuple(8, 16, sad_8x16_c), + make_tuple(16, 8, sad_16x8_c), + make_tuple(8, 8, sad_8x8_c), + make_tuple(4, 4, sad_4x4_c))); + +// ARM tests +#if HAVE_MEDIA +const sad_m_by_n_fn_t sad_16x16_armv6 = vp8_sad16x16_armv6; +INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values( + make_tuple(16, 16, sad_16x16_armv6))); + +#endif +#if HAVE_NEON +const sad_m_by_n_fn_t sad_16x16_neon = vp8_sad16x16_neon; +const sad_m_by_n_fn_t sad_8x16_neon = vp8_sad8x16_neon; +const sad_m_by_n_fn_t sad_16x8_neon = vp8_sad16x8_neon; +const sad_m_by_n_fn_t sad_8x8_neon = vp8_sad8x8_neon; +const sad_m_by_n_fn_t sad_4x4_neon = vp8_sad4x4_neon; +INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values( + make_tuple(16, 16, sad_16x16_neon), + make_tuple(8, 16, sad_8x16_neon), + make_tuple(16, 8, sad_16x8_neon), + make_tuple(8, 8, sad_8x8_neon), + make_tuple(4, 4, sad_4x4_neon))); +#endif + +// X86 tests +#if HAVE_MMX +const sad_m_by_n_fn_t sad_16x16_mmx = vp8_sad16x16_mmx; +const sad_m_by_n_fn_t sad_8x16_mmx = vp8_sad8x16_mmx; +const sad_m_by_n_fn_t sad_16x8_mmx = vp8_sad16x8_mmx; +const sad_m_by_n_fn_t sad_8x8_mmx = vp8_sad8x8_mmx; +const sad_m_by_n_fn_t sad_4x4_mmx = vp8_sad4x4_mmx; +INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::Values( + make_tuple(16, 16, sad_16x16_mmx), + make_tuple(8, 16, sad_8x16_mmx), + make_tuple(16, 8, sad_16x8_mmx), + make_tuple(8, 8, sad_8x8_mmx), + make_tuple(4, 4, sad_4x4_mmx))); +#endif +#if HAVE_SSE2 +const sad_m_by_n_fn_t sad_16x16_wmt = vp8_sad16x16_wmt; +const sad_m_by_n_fn_t sad_8x16_wmt = vp8_sad8x16_wmt; +const sad_m_by_n_fn_t sad_16x8_wmt = vp8_sad16x8_wmt; +const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt; +const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt; +INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::Values( + make_tuple(16, 16, sad_16x16_wmt), + make_tuple(8, 16, sad_8x16_wmt), + make_tuple(16, 8, sad_16x8_wmt), + make_tuple(8, 8, sad_8x8_wmt), + make_tuple(4, 4, sad_4x4_wmt))); +#endif +#if HAVE_SSSE3 +const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3; +INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values( + make_tuple(16, 16, sad_16x16_sse3))); +#endif + +} // namespace diff --git a/test/set_roi.cc b/test/set_roi.cc new file mode 100644 index 0000000..3b6112e --- /dev/null +++ b/test/set_roi.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include +#include +#include +#include +#include + +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "vpx/vpx_integer.h" +#include "vpx_mem/vpx_mem.h" +extern "C" { +#include "vp8/encoder/onyx_int.h" +} + +namespace { + +TEST(Vp8RoiMapTest, ParameterCheck) { + int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; + int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; + unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 }; + + const int internalq_trans[] = { + 0, 1, 2, 3, 4, 5, 7, 8, + 9, 10, 12, 13, 15, 17, 18, 19, + 20, 21, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 33, 35, 37, 39, 41, + 43, 45, 47, 49, 51, 53, 55, 57, + 59, 61, 64, 67, 70, 73, 76, 79, + 82, 85, 88, 91, 94, 97, 100, 103, + 106, 109, 112, 115, 118, 121, 124, 127, + }; + + // Initialize elements of cpi with valid defaults. + VP8_COMP cpi; + cpi.mb.e_mbd.mb_segement_abs_delta = SEGMENT_DELTADATA; + cpi.cyclic_refresh_mode_enabled = 0; + cpi.mb.e_mbd.segmentation_enabled = 0; + cpi.mb.e_mbd.update_mb_segmentation_map = 0; + cpi.mb.e_mbd.update_mb_segmentation_data = 0; + cpi.common.mb_rows = 240 >> 4; + cpi.common.mb_cols = 320 >> 4; + const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols); + vpx_memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data)); + + // Segment map + cpi.segmentation_map = reinterpret_cast(vpx_calloc(mbs, 1)); + + // Allocate memory for the source memory map. + unsigned char *roi_map = + reinterpret_cast(vpx_calloc(mbs, 1)); + vpx_memset(&roi_map[mbs >> 2], 1, (mbs >> 2)); + vpx_memset(&roi_map[mbs >> 1], 2, (mbs >> 2)); + vpx_memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2)); + + // Do a test call with valid parameters. + int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, + cpi.common.mb_cols, delta_q, delta_lf, + threshold); + EXPECT_EQ(0, roi_retval) + << "vp8_set_roimap roi failed with default test parameters"; + + // Check that the values in the cpi structure get set as expected. + if (roi_retval == 0) { + // Check that the segment map got set. + const int mapcompare = memcmp(roi_map, cpi.segmentation_map, mbs); + EXPECT_EQ(0, mapcompare) << "segment map error"; + + // Check the q deltas (note the need to translate into + // the interanl range of 0-127. + for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { + const int transq = internalq_trans[abs(delta_q[i])]; + if (abs(cpi.segment_feature_data[MB_LVL_ALT_Q][i]) != transq) { + EXPECT_EQ(transq, cpi.segment_feature_data[MB_LVL_ALT_Q][i]) + << "segment delta_q error"; + break; + } + } + + // Check the loop filter deltas + for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { + if (cpi.segment_feature_data[MB_LVL_ALT_LF][i] != delta_lf[i]) { + EXPECT_EQ(delta_lf[i], cpi.segment_feature_data[MB_LVL_ALT_LF][i]) + << "segment delta_lf error"; + break; + } + } + + // Check the breakout thresholds + for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { + unsigned int breakout = + static_cast(cpi.segment_encode_breakout[i]); + + if (threshold[i] != breakout) { + EXPECT_EQ(threshold[i], breakout) + << "breakout threshold error"; + break; + } + } + + // Segmentation, and segmentation update flages should be set. + EXPECT_EQ(1, cpi.mb.e_mbd.segmentation_enabled) + << "segmentation_enabled error"; + EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_map) + << "update_mb_segmentation_map error"; + EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_data) + << "update_mb_segmentation_data error"; + + + // Try a range of delta q and lf parameters (some legal, some not) + for (int i = 0; i < 1000; ++i) { + int rand_deltas[4]; + int deltas_valid; + rand_deltas[0] = (rand() % 160) - 80; + rand_deltas[1] = (rand() % 160) - 80; + rand_deltas[2] = (rand() % 160) - 80; + rand_deltas[3] = (rand() % 160) - 80; + + deltas_valid = ((abs(rand_deltas[0]) <= 63) && + (abs(rand_deltas[1]) <= 63) && + (abs(rand_deltas[2]) <= 63) && + (abs(rand_deltas[3]) <= 63)) ? 0 : -1; + + // Test with random delta q values. + roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, + cpi.common.mb_cols, rand_deltas, + delta_lf, threshold); + EXPECT_EQ(deltas_valid, roi_retval) << "dq range check error"; + + // One delta_q error shown at a time + if (deltas_valid != roi_retval) + break; + + // Test with random loop filter values. + roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, + cpi.common.mb_cols, delta_q, + rand_deltas, threshold); + EXPECT_EQ(deltas_valid, roi_retval) << "dlf range check error"; + + // One delta loop filter error shown at a time + if (deltas_valid != roi_retval) + break; + } + + // Test that we report and error if cyclic refresh is enabled. + cpi.cyclic_refresh_mode_enabled = 1; + roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, + cpi.common.mb_cols, delta_q, + delta_lf, threshold); + EXPECT_EQ(-1, roi_retval) << "cyclic refresh check error"; + cpi.cyclic_refresh_mode_enabled = 0; + + // Test invalid number of rows or colums. + roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows + 1, + cpi.common.mb_cols, delta_q, + delta_lf, threshold); + EXPECT_EQ(-1, roi_retval) << "MB rows bounds check error"; + + roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, + cpi.common.mb_cols - 1, delta_q, + delta_lf, threshold); + EXPECT_EQ(-1, roi_retval) << "MB cols bounds check error"; + } + + // Free allocated memory + if (cpi.segmentation_map) + vpx_free(cpi.segmentation_map); + if (roi_map) + vpx_free(roi_map); +}; + +} // namespace diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc new file mode 100644 index 0000000..c9dcceb --- /dev/null +++ b/test/sixtap_predict_test.cc @@ -0,0 +1,224 @@ +/* +* Copyright (c) 2012 The WebM project authors. All Rights Reserved. +* +* Use of this source code is governed by a BSD-style license +* that can be found in the LICENSE file in the root of the source +* tree. An additional intellectual property rights grant can be found +* in the file PATENTS. All contributing project authors may +* be found in the AUTHORS file in the root of the source tree. +*/ + +#include +#include +#include +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +extern "C" { +#include "./vpx_config.h" +#include "./vpx_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_mem/vpx_mem.h" +} + +namespace { + +typedef void (*sixtap_predict_fn_t)(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch); + +class SixtapPredictTest : public PARAMS(int, int, sixtap_predict_fn_t) { + public: + static void SetUpTestCase() { + src_ = reinterpret_cast(vpx_memalign(kDataAlignment, kSrcSize)); + dst_ = reinterpret_cast(vpx_memalign(kDataAlignment, kDstSize)); + dst_c_ = reinterpret_cast(vpx_memalign(kDataAlignment, kDstSize)); + } + + static void TearDownTestCase() { + vpx_free(src_); + src_ = NULL; + vpx_free(dst_); + dst_ = NULL; + vpx_free(dst_c_); + dst_c_ = NULL; + } + + protected: + // Make test arrays big enough for 16x16 functions. Six-tap filters + // need 5 extra pixels outside of the macroblock. + static const int kSrcStride = 21; + static const int kDstStride = 16; + static const int kDataAlignment = 16; + static const int kSrcSize = kSrcStride * kSrcStride + 1; + static const int kDstSize = kDstStride * kDstStride; + + virtual void SetUp() { + width_ = GET_PARAM(0); + height_ = GET_PARAM(1); + sixtap_predict_ = GET_PARAM(2); + memset(src_, 0, sizeof(src_)); + memset(dst_, 0, sizeof(dst_)); + memset(dst_c_, 0, sizeof(dst_c_)); + } + + int width_; + int height_; + sixtap_predict_fn_t sixtap_predict_; + // The src stores the macroblock we will filter on, and makes it 1 byte larger + // in order to test unaligned access. The result is stored in dst and dst_c(c + // reference code result). + static uint8_t* src_; + static uint8_t* dst_; + static uint8_t* dst_c_; +}; + +uint8_t* SixtapPredictTest::src_ = NULL; +uint8_t* SixtapPredictTest::dst_ = NULL; +uint8_t* SixtapPredictTest::dst_c_ = NULL; + +TEST_P(SixtapPredictTest, TestWithPresetData) { + // Test input + static const uint8_t test_data[kSrcSize] = { + 216, 184, 4, 191, 82, 92, 41, 0, 1, 226, 236, 172, 20, 182, 42, 226, 177, + 79, 94, 77, 179, 203, 206, 198, 22, 192, 19, 75, 17, 192, 44, 233, 120, + 48, 168, 203, 141, 210, 203, 143, 180, 184, 59, 201, 110, 102, 171, 32, + 182, 10, 109, 105, 213, 60, 47, 236, 253, 67, 55, 14, 3, 99, 247, 124, + 148, 159, 71, 34, 114, 19, 177, 38, 203, 237, 239, 58, 83, 155, 91, 10, + 166, 201, 115, 124, 5, 163, 104, 2, 231, 160, 16, 234, 4, 8, 103, 153, + 167, 174, 187, 26, 193, 109, 64, 141, 90, 48, 200, 174, 204, 36, 184, + 114, 237, 43, 238, 242, 207, 86, 245, 182, 247, 6, 161, 251, 14, 8, 148, + 182, 182, 79, 208, 120, 188, 17, 6, 23, 65, 206, 197, 13, 242, 126, 128, + 224, 170, 110, 211, 121, 197, 200, 47, 188, 207, 208, 184, 221, 216, 76, + 148, 143, 156, 100, 8, 89, 117, 14, 112, 183, 221, 54, 197, 208, 180, 69, + 176, 94, 180, 131, 215, 121, 76, 7, 54, 28, 216, 238, 249, 176, 58, 142, + 64, 215, 242, 72, 49, 104, 87, 161, 32, 52, 216, 230, 4, 141, 44, 181, + 235, 224, 57, 195, 89, 134, 203, 144, 162, 163, 126, 156, 84, 185, 42, + 148, 145, 29, 221, 194, 134, 52, 100, 166, 105, 60, 140, 110, 201, 184, + 35, 181, 153, 93, 121, 243, 227, 68, 131, 134, 232, 2, 35, 60, 187, 77, + 209, 76, 106, 174, 15, 241, 227, 115, 151, 77, 175, 36, 187, 121, 221, + 223, 47, 118, 61, 168, 105, 32, 237, 236, 167, 213, 238, 202, 17, 170, + 24, 226, 247, 131, 145, 6, 116, 117, 121, 11, 194, 41, 48, 126, 162, 13, + 93, 209, 131, 154, 122, 237, 187, 103, 217, 99, 60, 200, 45, 78, 115, 69, + 49, 106, 200, 194, 112, 60, 56, 234, 72, 251, 19, 120, 121, 182, 134, 215, + 135, 10, 114, 2, 247, 46, 105, 209, 145, 165, 153, 191, 243, 12, 5, 36, + 119, 206, 231, 231, 11, 32, 209, 83, 27, 229, 204, 149, 155, 83, 109, 35, + 93, 223, 37, 84, 14, 142, 37, 160, 52, 191, 96, 40, 204, 101, 77, 67, 52, + 53, 43, 63, 85, 253, 147, 113, 226, 96, 6, 125, 179, 115, 161, 17, 83, + 198, 101, 98, 85, 139, 3, 137, 75, 99, 178, 23, 201, 255, 91, 253, 52, + 134, 60, 138, 131, 208, 251, 101, 48, 2, 227, 228, 118, 132, 245, 202, + 75, 91, 44, 160, 231, 47, 41, 50, 147, 220, 74, 92, 219, 165, 89, 16 + }; + + // Expected result + static const uint8_t expected_dst[kDstSize] = { + 117, 102, 74, 135, 42, 98, 175, 206, 70, 73, 222, 197, 50, 24, 39, 49, 38, + 105, 90, 47, 169, 40, 171, 215, 200, 73, 109, 141, 53, 85, 177, 164, 79, + 208, 124, 89, 212, 18, 81, 145, 151, 164, 217, 153, 91, 154, 102, 102, + 159, 75, 164, 152, 136, 51, 213, 219, 186, 116, 193, 224, 186, 36, 231, + 208, 84, 211, 155, 167, 35, 59, 42, 76, 216, 149, 73, 201, 78, 149, 184, + 100, 96, 196, 189, 198, 188, 235, 195, 117, 129, 120, 129, 49, 25, 133, + 113, 69, 221, 114, 70, 143, 99, 157, 108, 189, 140, 78, 6, 55, 65, 240, + 255, 245, 184, 72, 90, 100, 116, 131, 39, 60, 234, 167, 33, 160, 88, 185, + 200, 157, 159, 176, 127, 151, 138, 102, 168, 106, 170, 86, 82, 219, 189, + 76, 33, 115, 197, 106, 96, 198, 136, 97, 141, 237, 151, 98, 137, 191, + 185, 2, 57, 95, 142, 91, 255, 185, 97, 137, 76, 162, 94, 173, 131, 193, + 161, 81, 106, 72, 135, 222, 234, 137, 66, 137, 106, 243, 210, 147, 95, + 15, 137, 110, 85, 66, 16, 96, 167, 147, 150, 173, 203, 140, 118, 196, + 84, 147, 160, 19, 95, 101, 123, 74, 132, 202, 82, 166, 12, 131, 166, + 189, 170, 159, 85, 79, 66, 57, 152, 132, 203, 194, 0, 1, 56, 146, 180, + 224, 156, 28, 83, 181, 79, 76, 80, 46, 160, 175, 59, 106, 43, 87, 75, + 136, 85, 189, 46, 71, 200, 90 + }; + + uint8_t *src = const_cast(test_data); + + REGISTER_STATE_CHECK(sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride, + 2, 2, dst_, kDstStride)); + + for (int i = 0; i < height_; ++i) + for (int j = 0; j < width_; ++j) + ASSERT_EQ(expected_dst[i * kDstStride + j], dst_[i * kDstStride + j]) + << "i==" << (i * width_ + j); +} + +using libvpx_test::ACMRandom; + +TEST_P(SixtapPredictTest, TestWithRandomData) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int i = 0; i < kSrcSize; ++i) + src_[i] = rnd.Rand8(); + + // Run tests for all possible offsets. + for (int xoffset = 0; xoffset < 8; ++xoffset) { + for (int yoffset = 0; yoffset < 8; ++yoffset) { + // Call c reference function. + // Move start point to next pixel to test if the function reads + // unaligned data correctly. + vp8_sixtap_predict16x16_c(&src_[kSrcStride * 2 + 2 + 1], kSrcStride, + xoffset, yoffset, dst_c_, kDstStride); + + // Run test. + REGISTER_STATE_CHECK( + sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1], kSrcStride, + xoffset, yoffset, dst_, kDstStride)); + + for (int i = 0; i < height_; ++i) + for (int j = 0; j < width_; ++j) + ASSERT_EQ(dst_c_[i * kDstStride + j], dst_[i * kDstStride + j]) + << "i==" << (i * width_ + j); + } + } +} + +using std::tr1::make_tuple; + +const sixtap_predict_fn_t sixtap_16x16_c = vp8_sixtap_predict16x16_c; +const sixtap_predict_fn_t sixtap_8x8_c = vp8_sixtap_predict8x8_c; +const sixtap_predict_fn_t sixtap_8x4_c = vp8_sixtap_predict8x4_c; +const sixtap_predict_fn_t sixtap_4x4_c = vp8_sixtap_predict4x4_c; +INSTANTIATE_TEST_CASE_P( + C, SixtapPredictTest, ::testing::Values( + make_tuple(16, 16, sixtap_16x16_c), + make_tuple(8, 8, sixtap_8x8_c), + make_tuple(8, 4, sixtap_8x4_c), + make_tuple(4, 4, sixtap_4x4_c))); +#if HAVE_MMX +const sixtap_predict_fn_t sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx; +const sixtap_predict_fn_t sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx; +const sixtap_predict_fn_t sixtap_8x4_mmx = vp8_sixtap_predict8x4_mmx; +const sixtap_predict_fn_t sixtap_4x4_mmx = vp8_sixtap_predict4x4_mmx; +INSTANTIATE_TEST_CASE_P( + MMX, SixtapPredictTest, ::testing::Values( + make_tuple(16, 16, sixtap_16x16_mmx), + make_tuple(8, 8, sixtap_8x8_mmx), + make_tuple(8, 4, sixtap_8x4_mmx), + make_tuple(4, 4, sixtap_4x4_mmx))); +#endif +#if HAVE_SSE2 +const sixtap_predict_fn_t sixtap_16x16_sse2 = vp8_sixtap_predict16x16_sse2; +const sixtap_predict_fn_t sixtap_8x8_sse2 = vp8_sixtap_predict8x8_sse2; +const sixtap_predict_fn_t sixtap_8x4_sse2 = vp8_sixtap_predict8x4_sse2; +INSTANTIATE_TEST_CASE_P( + SSE2, SixtapPredictTest, ::testing::Values( + make_tuple(16, 16, sixtap_16x16_sse2), + make_tuple(8, 8, sixtap_8x8_sse2), + make_tuple(8, 4, sixtap_8x4_sse2))); +#endif +#if HAVE_SSSE3 +const sixtap_predict_fn_t sixtap_16x16_ssse3 = vp8_sixtap_predict16x16_ssse3; +const sixtap_predict_fn_t sixtap_8x8_ssse3 = vp8_sixtap_predict8x8_ssse3; +const sixtap_predict_fn_t sixtap_8x4_ssse3 = vp8_sixtap_predict8x4_ssse3; +const sixtap_predict_fn_t sixtap_4x4_ssse3 = vp8_sixtap_predict4x4_ssse3; +INSTANTIATE_TEST_CASE_P( + SSSE3, SixtapPredictTest, ::testing::Values( + make_tuple(16, 16, sixtap_16x16_ssse3), + make_tuple(8, 8, sixtap_8x8_ssse3), + make_tuple(8, 4, sixtap_8x4_ssse3), + make_tuple(4, 4, sixtap_4x4_ssse3))); +#endif +} // namespace diff --git a/test/subtract_test.cc b/test/subtract_test.cc new file mode 100644 index 0000000..60acf81 --- /dev/null +++ b/test/subtract_test.cc @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +extern "C" { +#include "vpx_config.h" +#include "vpx_rtcd.h" +#include "vp8/common/blockd.h" +#include "vp8/encoder/block.h" +#include "vpx_mem/vpx_mem.h" +} + +typedef void (*subtract_b_fn_t)(BLOCK *be, BLOCKD *bd, int pitch); + +namespace { + +class SubtractBlockTest : public ::testing::TestWithParam {}; + +using libvpx_test::ACMRandom; + +TEST_P(SubtractBlockTest, SimpleSubtract) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + BLOCK be; + BLOCKD bd; + // in libvpx, this stride is always 16 + const int kDiffPredStride = 16; + const int kSrcStride[] = {32, 16, 8, 4, 0}; + const int kBlockWidth = 4; + const int kBlockHeight = 4; + + // Allocate... align to 16 for mmx/sse tests + uint8_t *source = reinterpret_cast( + vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source))); + be.src_diff = reinterpret_cast( + vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff))); + bd.predictor = reinterpret_cast( + vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor))); + + for(int i = 0; kSrcStride[i] > 0; ++i) { + // start at block0 + be.src = 0; + be.base_src = &source; + be.src_stride = kSrcStride[i]; + + // set difference + int16_t *src_diff = be.src_diff; + for (int r = 0; r < kBlockHeight; ++r) { + for (int c = 0; c < kBlockWidth; ++c) { + src_diff[c] = 0xa5a5; + } + src_diff += kDiffPredStride; + } + + // set destination + uint8_t *base_src = *be.base_src; + for (int r = 0; r < kBlockHeight; ++r) { + for (int c = 0; c < kBlockWidth; ++c) { + base_src[c] = rnd.Rand8(); + } + base_src += be.src_stride; + } + + // set predictor + uint8_t *predictor = bd.predictor; + for (int r = 0; r < kBlockHeight; ++r) { + for (int c = 0; c < kBlockWidth; ++c) { + predictor[c] = rnd.Rand8(); + } + predictor += kDiffPredStride; + } + + REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride)); + + base_src = *be.base_src; + src_diff = be.src_diff; + predictor = bd.predictor; + for (int r = 0; r < kBlockHeight; ++r) { + for (int c = 0; c < kBlockWidth; ++c) { + EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r + << ", c = " << c; + } + src_diff += kDiffPredStride; + predictor += kDiffPredStride; + base_src += be.src_stride; + } + } + vpx_free(be.src_diff); + vpx_free(source); + vpx_free(bd.predictor); +} + +INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest, + ::testing::Values(vp8_subtract_b_c)); + +#if HAVE_MMX +INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest, + ::testing::Values(vp8_subtract_b_mmx)); +#endif + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest, + ::testing::Values(vp8_subtract_b_sse2)); +#endif + +} // namespace diff --git a/test/test-data.sha1 b/test/test-data.sha1 new file mode 100644 index 0000000..c1b6a83 --- /dev/null +++ b/test/test-data.sha1 @@ -0,0 +1,123 @@ +d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv +5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf +65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf +906b4c1e99eb734504c504b3f1ad8052137ce672 vp80-00-comprehensive-003.ivf +ec144b1af53af895db78355785650b96dd3f0ade vp80-00-comprehensive-004.ivf +afc7091785c62f1c121c4554a2830c30704587d9 vp80-00-comprehensive-005.ivf +42ea9d55c818145d06a9b633b8e85c6a6164fd3e vp80-00-comprehensive-006.ivf +e5b3a73ab79fe024c14309d653d6bed92902ee3b vp80-00-comprehensive-007.ivf +f3c50a58875930adfb84525c0ef59d7e4c08540c vp80-00-comprehensive-008.ivf +4b2841fdb83db51ae322096ae468bbb9dc2c8362 vp80-00-comprehensive-009.ivf +efbff736e3a91ab6a98c5bc2dce65d645944c7b1 vp80-00-comprehensive-010.ivf +6b315102cae008d22a3d2c231be92cb704a222f8 vp80-00-comprehensive-011.ivf +f3214a4fea14c2d5ec689936c1613f274c859ee8 vp80-00-comprehensive-012.ivf +e4094e96d308c8a35b74c480a43d853c5294cd34 vp80-00-comprehensive-013.ivf +5b0adfaf60a69e0aaf3ec021a39d0a68fc0e1b5a vp80-00-comprehensive-014.ivf +e8467688ddf26b5000664f904faf0d70506aa653 vp80-00-comprehensive-015.ivf +aab55582337dfd2a39ff54fb2576a91910d49337 vp80-00-comprehensive-016.ivf +1ba24724f80203c9bae4f1d0f99d534721980016 vp80-00-comprehensive-017.ivf +143a15512b46f436280ddb4d0e6411eb4af434f2 vp80-00-comprehensive-018.ivf +c5baeaf5714fdfb3a8bc960a8e33ac438e83b16b vp80-01-intra-1400.ivf +f383955229afe3408453e316d11553d923ca60d5 vp80-01-intra-1411.ivf +84e1f4343f174c9f3c83f834bac3196fb325bf2c vp80-01-intra-1416.ivf +fb6e712a47dd57a28a3727d2ae2c97a8b7c7ca51 vp80-01-intra-1417.ivf +71ea772d3e9d315b8cbecf41207b8a237c34853b vp80-02-inter-1402.ivf +d85dbc4271525dcd128c503f936fe69091d1f8d0 vp80-02-inter-1412.ivf +d4e5d3ad56511867d025f93724d090f92ba6ec3d vp80-02-inter-1418.ivf +91791cbcc37c60f35dbd8090bacb54e5ec6dd4fa vp80-02-inter-1424.ivf +17fbfe2fea70f6e2f3fa6ca4efaae6c0b03b5f02 vp80-03-segmentation-01.ivf +3c3600dbbcde08e20d54c66fe3b7eadd4f09bdbb vp80-03-segmentation-02.ivf +c156778d5340967d4b369c490848076e92f1f875 vp80-03-segmentation-03.ivf +d25dcff6c60e87a1af70945b8911b6b4998533b0 vp80-03-segmentation-04.ivf +362baba2ce454c9db21218f35e81c27a5ed0b730 vp80-03-segmentation-1401.ivf +d223ae7ee748ce07e74c4679bfd219e84aa9f4b0 vp80-03-segmentation-1403.ivf +033adf7f3a13836a3f1cffcb87c1972900f2b5c6 vp80-03-segmentation-1407.ivf +4d51dfbf9f3e2c590ec99d1d6f59dd731d04375f vp80-03-segmentation-1408.ivf +f37a62b197c2600d75e0ccfbb31b60efdedac251 vp80-03-segmentation-1409.ivf +eb25bd7bfba5b2f6935018a930f42d123b1e7fcd vp80-03-segmentation-1410.ivf +b9d5c436663a30c27cfff84b53a002e501258843 vp80-03-segmentation-1413.ivf +6da92b9d1a180cc3a8afe348ab12258f5a37be1a vp80-03-segmentation-1414.ivf +a4f5842602886bd669f115f93d8a35c035cb0948 vp80-03-segmentation-1415.ivf +f295dceb8ef278b77251b3f9df8aee22e161d547 vp80-03-segmentation-1425.ivf +198dbf9f36f733200e432664cc8c5752d59779de vp80-03-segmentation-1426.ivf +7704804e32f5de976803929934a7fafe101ac7b0 vp80-03-segmentation-1427.ivf +831ccd862ea95ca025d2f3bd8b88678752f5416d vp80-03-segmentation-1432.ivf +b3c11978529289f9109f2766fcaba3ebc40e11ef vp80-03-segmentation-1435.ivf +a835a731f5520ebfc1002c40121264d0020559ac vp80-03-segmentation-1436.ivf +1d1732942f773bb2a5775fcb9689b1579ce28eab vp80-03-segmentation-1437.ivf +db04799adfe089dfdf74dbd43cc05ede7161f99e vp80-03-segmentation-1441.ivf +7caf39b3f20cfd52b998210878062e52a5edf1e6 vp80-03-segmentation-1442.ivf +3607f6bb4ee106c38fa1ea370dc4ff8b8cde2261 vp80-04-partitions-1404.ivf +93cc323b6b6867f1b12dd48773424549c6960a6b vp80-04-partitions-1405.ivf +047eedb14b865bdac8a3538e63801054e0295e9c vp80-04-partitions-1406.ivf +0f1233bd2bc33f56ce5e495dbd455d122339f384 vp80-05-sharpness-1428.ivf +51767fc136488a9535c2a4c38067c542ee2048df vp80-05-sharpness-1429.ivf +9805aa107672de25d6fb8c35e20d06deca5efe18 vp80-05-sharpness-1430.ivf +61db6b965f9c27aebe71b85bf2d5877e58e4bbdf vp80-05-sharpness-1431.ivf +10420d266290d2923555f84af38eeb96edbd3ae8 vp80-05-sharpness-1433.ivf +3ed24f9a80cddfdf75824ba95cdb4ff9286cb443 vp80-05-sharpness-1434.ivf +c87599cbecd72d4cd4f7ace3313b7a6bc6eb8163 vp80-05-sharpness-1438.ivf +aff51d865c2621b60510459244ea83e958e4baed vp80-05-sharpness-1439.ivf +da386e72b19b5485a6af199c5eb60ef25e510dd1 vp80-05-sharpness-1440.ivf +6759a095203d96ccd267ce09b1b050b8cc4c2f1f vp80-05-sharpness-1443.ivf +db55ec7fd02c864ba996ff060b25b1e08611330b vp80-00-comprehensive-001.ivf.md5 +29db0ad011cba1e45f856d5623cd38dac3e3bf19 vp80-00-comprehensive-002.ivf.md5 +e84f258f69e173e7d68f8f8c037a0a3766902182 vp80-00-comprehensive-003.ivf.md5 +eb7912eaf69559a16fd82bc3f5fb1524cf4a4466 vp80-00-comprehensive-004.ivf.md5 +4206f71c94894bd5b5b376f6c09b3817dbc65206 vp80-00-comprehensive-005.ivf.md5 +4f89b356f6f2fecb928f330a10f804f00f5325f5 vp80-00-comprehensive-006.ivf.md5 +2813236a32964dd8007e17648bcf035a20fcda6c vp80-00-comprehensive-007.ivf.md5 +10746c72098f872803c900e17c5680e451f5f498 vp80-00-comprehensive-008.ivf.md5 +39a23d0692ce64421a7bb7cdf6ccec5928d37fff vp80-00-comprehensive-009.ivf.md5 +f6e3de8931a0cc659bda8fbc14050346955e72d4 vp80-00-comprehensive-010.ivf.md5 +101683ec195b6e944f7cd1e468fc8921439363e6 vp80-00-comprehensive-011.ivf.md5 +1f592751ce46d8688998fa0fa4fbdcda0fd4058c vp80-00-comprehensive-012.ivf.md5 +6066176f90ca790251e795fca1a5797d59999841 vp80-00-comprehensive-013.ivf.md5 +2656da94ba93691f23edc4d60b3a09e2be46c217 vp80-00-comprehensive-014.ivf.md5 +c6e0d5f5d61460c8ac8edfa4e701f10312c03133 vp80-00-comprehensive-015.ivf.md5 +ee60fee501d8493e34e8d6a1fe315b51ed09b24a vp80-00-comprehensive-016.ivf.md5 +9f1914ceffcad4546c0a29de3ef591d8bea304dc vp80-00-comprehensive-017.ivf.md5 +e0305178fe288a9fd8082b39e2d03181edb19054 vp80-00-comprehensive-018.ivf.md5 +612494da2fa799cc9d76dcdd835ae6c7cb2e5c05 vp80-01-intra-1400.ivf.md5 +48ea06097ac8269c5e8c2131d3d0639f431fcf0e vp80-01-intra-1411.ivf.md5 +6e2ab4e7677ad0ba868083ca6bc387ee922b400c vp80-01-intra-1416.ivf.md5 +eca0a90348959ce3854142f8d8641b13050e8349 vp80-01-intra-1417.ivf.md5 +920feea203145d5c2258a91c4e6991934a79a99e vp80-02-inter-1402.ivf.md5 +f71d97909fe2b3dd65be7e1f56c72237f0cef200 vp80-02-inter-1412.ivf.md5 +e911254569a30bbb2a237ff8b79f69ed9da0672d vp80-02-inter-1418.ivf.md5 +58c789c50c9bb9cc90580bed291164a0939d28ba vp80-02-inter-1424.ivf.md5 +ff3e2f441327b9c20a0b37c524e0f5a48a36de7b vp80-03-segmentation-01.ivf.md5 +0791f417f076a542ae66fbc3426ab4d94cbd6c75 vp80-03-segmentation-02.ivf.md5 +722e50f1a6a91c34302d68681faffc1c26d1cc57 vp80-03-segmentation-03.ivf.md5 +c701f1885bcfb27fb8e70cc65606b289172ef889 vp80-03-segmentation-04.ivf.md5 +f79bc9ec189a2b4807632a3d0c5bf04a178b5300 vp80-03-segmentation-1401.ivf.md5 +b9aa4c74c0219b639811c44760d0b24cd8bb436a vp80-03-segmentation-1403.ivf.md5 +70d5a2207ca1891bcaebd5cf6dd88ce8d57b4334 vp80-03-segmentation-1407.ivf.md5 +265f962ee781531f9a93b9309461316fd32b2a1d vp80-03-segmentation-1408.ivf.md5 +0c4ecbbd6dc042d30e626d951b65f460dd6cd563 vp80-03-segmentation-1409.ivf.md5 +cf779af36a937f06570a0fca9db64ba133451dee vp80-03-segmentation-1410.ivf.md5 +0e6c5036d51ab078842f133934926c598a9cff02 vp80-03-segmentation-1413.ivf.md5 +eb3930aaf229116c80d507516c34759c3f6cdf69 vp80-03-segmentation-1414.ivf.md5 +123d6c0f72ee87911c4ae7538e87b7d163b22d6c vp80-03-segmentation-1415.ivf.md5 +e70551d1a38920e097a5d8782390b79ecaeb7505 vp80-03-segmentation-1425.ivf.md5 +44e8f4117e46dbb302b2cfd81171cc1a1846e431 vp80-03-segmentation-1426.ivf.md5 +52636e54aee5f95bbace37021bd67de5db767e9a vp80-03-segmentation-1427.ivf.md5 +b1ad3eff20215c28e295b15ef3636ed926d59cba vp80-03-segmentation-1432.ivf.md5 +24c22a552fa28a90e5978f67f57181cc2d7546d7 vp80-03-segmentation-1435.ivf.md5 +96c49c390abfced18a7a8c9b9ea10af778e10edb vp80-03-segmentation-1436.ivf.md5 +f95eb6214571434f1f73ab7833b9ccdf47588020 vp80-03-segmentation-1437.ivf.md5 +1c0700ca27c9b0090a7747a4b0b4dc21d1843181 vp80-03-segmentation-1441.ivf.md5 +81d4f23ca32667ee958bae579c8f5e97ba72eb97 vp80-03-segmentation-1442.ivf.md5 +272efcef07a3a30fbca51bfd566063d8258ec0be vp80-04-partitions-1404.ivf.md5 +66ed219ab812ac801b256d35cf495d193d4cf478 vp80-04-partitions-1405.ivf.md5 +36083f37f56f502bd60ec5e07502ee9e6b8699b0 vp80-04-partitions-1406.ivf.md5 +6ca909bf168a64c09415626294665dc1be3d1973 vp80-05-sharpness-1428.ivf.md5 +1667d2ee2334e5fdea8a8a866f4ccf3cf76f033a vp80-05-sharpness-1429.ivf.md5 +71bcbe5357d36a19df5b07fbe3e27bffa8893f0a vp80-05-sharpness-1430.ivf.md5 +89a09b1dffce2d55770a89e58d9925c70ef79bf8 vp80-05-sharpness-1431.ivf.md5 +08444a18b4e6ba3450c0796dd728d48c399a2dc9 vp80-05-sharpness-1433.ivf.md5 +6d6223719a90c13e848aa2a8a6642098cdb5977a vp80-05-sharpness-1434.ivf.md5 +41d70bb5fa45bc88da1604a0af466930b8dd77b5 vp80-05-sharpness-1438.ivf.md5 +086c56378df81b6cee264d7540a7b8f2b405c7a4 vp80-05-sharpness-1439.ivf.md5 +d32dc2c4165eb266ea4c23c14a45459b363def32 vp80-05-sharpness-1440.ivf.md5 +8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5 \ No newline at end of file diff --git a/test/test.mk b/test/test.mk new file mode 100644 index 0000000..982be5b --- /dev/null +++ b/test/test.mk @@ -0,0 +1,179 @@ +LIBVPX_TEST_SRCS-yes += acm_random.h +LIBVPX_TEST_SRCS-yes += register_state_check.h +LIBVPX_TEST_SRCS-yes += test.mk +LIBVPX_TEST_SRCS-yes += test_libvpx.cc +LIBVPX_TEST_SRCS-yes += util.h +LIBVPX_TEST_SRCS-yes += video_source.h + +## +## BLACK BOX TESTS +## +## Black box tests only use the public API. +## +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += datarate_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.h +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += error_resilience_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += i420_video_source.h +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc + +LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ../md5_utils.h ../md5_utils.c +LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.h +LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ivf_video_source.h +LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc +## +## WHITE BOX TESTS +## +## Whitebox tests invoke functions not exposed via the public API. Certain +## shared library builds don't make these functions accessible. +## +ifeq ($(CONFIG_SHARED),) + +# These tests require both the encoder and decoder to be built. +ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes) +LIBVPX_TEST_SRCS-yes += boolcoder_test.cc +endif + +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += fdct4x4_test.cc +LIBVPX_TEST_SRCS-yes += idctllm_test.cc +LIBVPX_TEST_SRCS-yes += intrapred_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc +LIBVPX_TEST_SRCS-yes += sad_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc +LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc + +endif + + +## +## TEST DATA +## +LIBVPX_TEST_DATA-$(CONFIG_VP8_ENCODER) += hantro_collage_w352h288.yuv +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf.md5 diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc new file mode 100644 index 0000000..cfd5d28 --- /dev/null +++ b/test/test_libvpx.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include +#include "vpx_config.h" +#if ARCH_X86 || ARCH_X86_64 +extern "C" { +#include "vpx_ports/x86.h" +} +#endif +#include "third_party/googletest/src/include/gtest/gtest.h" + +static void append_gtest_filter(const char *str) { + std::string filter = ::testing::FLAGS_gtest_filter; + filter += str; + ::testing::FLAGS_gtest_filter = filter; +} + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + +#if ARCH_X86 || ARCH_X86_64 + const int simd_caps = x86_simd_caps(); + if(!(simd_caps & HAS_MMX)) + append_gtest_filter(":-MMX/*"); + if(!(simd_caps & HAS_SSE)) + append_gtest_filter(":-SSE/*"); + if(!(simd_caps & HAS_SSE2)) + append_gtest_filter(":-SSE2/*"); + if(!(simd_caps & HAS_SSE3)) + append_gtest_filter(":-SSE3/*"); + if(!(simd_caps & HAS_SSSE3)) + append_gtest_filter(":-SSSE3/*"); + if(!(simd_caps & HAS_SSE4_1)) + append_gtest_filter(":-SSE4_1/*"); +#endif + + return RUN_ALL_TESTS(); +} diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc new file mode 100644 index 0000000..938457b --- /dev/null +++ b/test/test_vector_test.cc @@ -0,0 +1,144 @@ +/* + Copyright (c) 2012 The WebM project authors. All Rights Reserved. + + Use of this source code is governed by a BSD-style license + that can be found in the LICENSE file in the root of the source + tree. An additional intellectual property rights grant can be found + in the file PATENTS. All contributing project authors may + be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/decode_test_driver.h" +#include "test/ivf_video_source.h" +extern "C" { +#include "./md5_utils.h" +#include "vpx_mem/vpx_mem.h" +} + +#if defined(_MSC_VER) +#define snprintf sprintf_s +#endif + +namespace { +// There are 61 test vectors in total. +const char *kTestVectors[] = { + "vp80-00-comprehensive-001.ivf", + "vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf", + "vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf", + "vp80-00-comprehensive-006.ivf", "vp80-00-comprehensive-007.ivf", + "vp80-00-comprehensive-008.ivf", "vp80-00-comprehensive-009.ivf", + "vp80-00-comprehensive-010.ivf", "vp80-00-comprehensive-011.ivf", + "vp80-00-comprehensive-012.ivf", "vp80-00-comprehensive-013.ivf", + "vp80-00-comprehensive-014.ivf", "vp80-00-comprehensive-015.ivf", + "vp80-00-comprehensive-016.ivf", "vp80-00-comprehensive-017.ivf", + "vp80-00-comprehensive-018.ivf", "vp80-01-intra-1400.ivf", + "vp80-01-intra-1411.ivf", "vp80-01-intra-1416.ivf", + "vp80-01-intra-1417.ivf", "vp80-02-inter-1402.ivf", + "vp80-02-inter-1412.ivf", "vp80-02-inter-1418.ivf", + "vp80-02-inter-1424.ivf", "vp80-03-segmentation-01.ivf", + "vp80-03-segmentation-02.ivf", "vp80-03-segmentation-03.ivf", + "vp80-03-segmentation-04.ivf", "vp80-03-segmentation-1401.ivf", + "vp80-03-segmentation-1403.ivf", "vp80-03-segmentation-1407.ivf", + "vp80-03-segmentation-1408.ivf", "vp80-03-segmentation-1409.ivf", + "vp80-03-segmentation-1410.ivf", "vp80-03-segmentation-1413.ivf", + "vp80-03-segmentation-1414.ivf", "vp80-03-segmentation-1415.ivf", + "vp80-03-segmentation-1425.ivf", "vp80-03-segmentation-1426.ivf", + "vp80-03-segmentation-1427.ivf", "vp80-03-segmentation-1432.ivf", + "vp80-03-segmentation-1435.ivf", "vp80-03-segmentation-1436.ivf", + "vp80-03-segmentation-1437.ivf", "vp80-03-segmentation-1441.ivf", + "vp80-03-segmentation-1442.ivf", "vp80-04-partitions-1404.ivf", + "vp80-04-partitions-1405.ivf", "vp80-04-partitions-1406.ivf", + "vp80-05-sharpness-1428.ivf", "vp80-05-sharpness-1429.ivf", + "vp80-05-sharpness-1430.ivf", "vp80-05-sharpness-1431.ivf", + "vp80-05-sharpness-1433.ivf", "vp80-05-sharpness-1434.ivf", + "vp80-05-sharpness-1438.ivf", "vp80-05-sharpness-1439.ivf", + "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf" +}; + +class TestVectorTest : public libvpx_test::DecoderTest, + public ::testing::TestWithParam { + protected: + TestVectorTest() : md5_file_(NULL) {} + + virtual ~TestVectorTest() { + if (md5_file_) + fclose(md5_file_); + } + + void OpenMD5File(const std::string& md5_file_name_) { + md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_); + ASSERT_TRUE(md5_file_) << "Md5 file open failed. Filename: " + << md5_file_name_; + } + + virtual void DecompressedFrameHook(const vpx_image_t& img, + const unsigned int frame_number) { + char expected_md5[33]; + char junk[128]; + + // Read correct md5 checksums. + const int res = fscanf(md5_file_, "%s %s", expected_md5, junk); + ASSERT_NE(res, EOF) << "Read md5 data failed"; + expected_md5[32] = '\0'; + + MD5Context md5; + MD5Init(&md5); + + // Compute and update md5 for each raw in decompressed data. + for (int plane = 0; plane < 3; ++plane) { + uint8_t *buf = img.planes[plane]; + + for (unsigned int y = 0; y < (plane ? (img.d_h + 1) >> 1 : img.d_h); + ++y) { + MD5Update(&md5, buf, (plane ? (img.d_w + 1) >> 1 : img.d_w)); + buf += img.stride[plane]; + } + } + + uint8_t md5_sum[16]; + MD5Final(md5_sum, &md5); + + char actual_md5[33]; + // Convert to get the actual md5. + for (int i = 0; i < 16; i++) { + snprintf(&actual_md5[i * 2], sizeof(actual_md5) - i * 2, "%02x", + md5_sum[i]); + } + actual_md5[32] = '\0'; + + // Check md5 match. + ASSERT_STREQ(expected_md5, actual_md5) + << "Md5 checksums don't match: frame number = " << frame_number; + } + + private: + FILE *md5_file_; +}; + +// This test runs through the whole set of test vectors, and decodes them. +// The md5 checksums are computed for each frame in the video file. If md5 +// checksums match the correct md5 data, then the test is passed. Otherwise, +// the test failed. +TEST_P(TestVectorTest, MD5Match) { + const std::string filename = GetParam(); + // Open compressed video file. + libvpx_test::IVFVideoSource video(filename); + + video.Init(); + + // Construct md5 file name. + const std::string md5_filename = filename + ".md5"; + OpenMD5File(md5_filename); + + // Decode frame, and check the md5 matching. + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +INSTANTIATE_TEST_CASE_P(TestVectorSequence, TestVectorTest, + ::testing::ValuesIn(kTestVectors)); + +} // namespace diff --git a/test/util.h b/test/util.h new file mode 100644 index 0000000..06a70cc --- /dev/null +++ b/test/util.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef TEST_UTIL_H_ +#define TEST_UTIL_H_ + +// Macros +#define PARAMS(...) ::testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > +#define GET_PARAM(k) std::tr1::get< k >(GetParam()) + +#endif // TEST_UTIL_H_ diff --git a/test/video_source.h b/test/video_source.h new file mode 100644 index 0000000..9772657 --- /dev/null +++ b/test/video_source.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef TEST_VIDEO_SOURCE_H_ +#define TEST_VIDEO_SOURCE_H_ + +#include +#include +#include +#include "test/acm_random.h" +#include "vpx/vpx_encoder.h" + +namespace libvpx_test { + +static FILE *OpenTestDataFile(const std::string& file_name) { + std::string path_to_source = file_name; + const char *kDataPath = getenv("LIBVPX_TEST_DATA_PATH"); + + if (kDataPath) { + path_to_source = kDataPath; + path_to_source += "/"; + path_to_source += file_name; + } + + return fopen(path_to_source.c_str(), "rb"); +} + +// Abstract base class for test video sources, which provide a stream of +// vpx_image_t images with associated timestamps and duration. +class VideoSource { + public: + virtual ~VideoSource() {} + + // Prepare the stream for reading, rewind/open as necessary. + virtual void Begin() = 0; + + // Advance the cursor to the next frame + virtual void Next() = 0; + + // Get the current video frame, or NULL on End-Of-Stream. + virtual vpx_image_t *img() const = 0; + + // Get the presentation timestamp of the current frame. + virtual vpx_codec_pts_t pts() const = 0; + + // Get the current frame's duration + virtual unsigned long duration() const = 0; + + // Get the timebase for the stream + virtual vpx_rational_t timebase() const = 0; + + // Get the current frame counter, starting at 0. + virtual unsigned int frame() const = 0; + + // Get the current file limit. + virtual unsigned int limit() const = 0; +}; + + +class DummyVideoSource : public VideoSource { + public: + DummyVideoSource() : img_(NULL), limit_(100), width_(0), height_(0) { + SetSize(80, 64); + } + + virtual ~DummyVideoSource() { vpx_img_free(img_); } + + virtual void Begin() { + frame_ = 0; + FillFrame(); + } + + virtual void Next() { + ++frame_; + FillFrame(); + } + + virtual vpx_image_t *img() const { + return (frame_ < limit_) ? img_ : NULL; + } + + // Models a stream where Timebase = 1/FPS, so pts == frame. + virtual vpx_codec_pts_t pts() const { return frame_; } + + virtual unsigned long duration() const { return 1; } + + virtual vpx_rational_t timebase() const { + const vpx_rational_t t = {1, 30}; + return t; + } + + virtual unsigned int frame() const { return frame_; } + + virtual unsigned int limit() const { return limit_; } + + void SetSize(unsigned int width, unsigned int height) { + if (width != width_ || height != height_) { + vpx_img_free(img_); + raw_sz_ = ((width + 31)&~31) * height * 3 / 2; + img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 32); + width_ = width; + height_ = height; + } + } + + protected: + virtual void FillFrame() { memset(img_->img_data, 0, raw_sz_); } + + vpx_image_t *img_; + size_t raw_sz_; + unsigned int limit_; + unsigned int frame_; + unsigned int width_; + unsigned int height_; +}; + + +class RandomVideoSource : public DummyVideoSource { + public: + RandomVideoSource(int seed = ACMRandom::DeterministicSeed()) + : rnd_(seed), + seed_(seed) { } + + protected: + // Reset the RNG to get a matching stream for the second pass + virtual void Begin() { + frame_ = 0; + rnd_.Reset(seed_); + FillFrame(); + } + + // 15 frames of noise, followed by 15 static frames. Reset to 0 rather + // than holding previous frames to encourage keyframes to be thrown. + virtual void FillFrame() { + if (frame_ % 30 < 15) + for (size_t i = 0; i < raw_sz_; ++i) + img_->img_data[i] = rnd_.Rand8(); + else + memset(img_->img_data, 0, raw_sz_); + } + + ACMRandom rnd_; + int seed_; +}; + +// Abstract base class for test video sources, which provide a stream of +// decompressed images to the decoder. +class CompressedVideoSource { + public: + virtual ~CompressedVideoSource() {} + + virtual void Init() = 0; + + // Prepare the stream for reading, rewind/open as necessary. + virtual void Begin() = 0; + + // Advance the cursor to the next frame + virtual void Next() = 0; + + virtual const uint8_t *cxdata() const = 0; + + virtual const unsigned int frame_size() const = 0; + + virtual const unsigned int frame_number() const = 0; +}; + +} // namespace libvpx_test + +#endif // TEST_VIDEO_SOURCE_H_ diff --git a/third_party/libyuv/source/scale.c b/third_party/libyuv/source/scale.c index 930a7ae..c142a17 100644 --- a/third_party/libyuv/source/scale.c +++ b/third_party/libyuv/source/scale.c @@ -60,7 +60,7 @@ void SetUseReferenceImpl(int use) { #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #define HAS_SCALEROWDOWN2_NEON -void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */, +void ScaleRowDown2_NEON(const uint8* src_ptr, int src_stride, uint8* dst, int dst_width) { asm volatile ( "1: \n" @@ -102,7 +102,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride, } #define HAS_SCALEROWDOWN4_NEON -static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */, +static void ScaleRowDown4_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { asm volatile ( "1: \n" @@ -160,7 +160,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride, // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. -static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */, +static void ScaleRowDown34_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { asm volatile ( "1: \n" @@ -284,7 +284,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) = 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; // 32 -> 12 -static void ScaleRowDown38_NEON(const uint8* src_ptr, int, +static void ScaleRowDown38_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { asm volatile ( "vld1.u8 {q3}, [%3] \n" diff --git a/tools/ftfy.sh b/tools/ftfy.sh index 95fd397..c5cfdea 100755 --- a/tools/ftfy.sh +++ b/tools/ftfy.sh @@ -34,7 +34,7 @@ vpx_style() { --align-pointer=name \ --indent-preprocessor --convert-tabs --indent-labels \ --suffix=none --quiet "$@" - sed -i 's/[[:space:]]\{1,\},/,/g' "$@" + sed -i "" 's/[[:space:]]\{1,\},/,/g' "$@" } diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c index d58e49c..8af9e90 100644 --- a/vp8/common/alloccommon.c +++ b/vp8/common/alloccommon.c @@ -17,23 +17,6 @@ #include "entropymode.h" #include "systemdependent.h" - -extern void vp8_init_scan_order_mask(); - -static void update_mode_info_border(MODE_INFO *mi, int rows, int cols) -{ - int i; - vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1)); - - for (i = 0; i < rows; i++) - { - /* TODO(holmer): Bug? This updates the last element of each row - * rather than the border element! - */ - vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO)); - } -} - void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) { int i; @@ -45,16 +28,20 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); if (oci->post_proc_buffer_int_used) vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int); + + vpx_free(oci->pp_limits_buffer); + oci->pp_limits_buffer = NULL; #endif vpx_free(oci->above_context); vpx_free(oci->mip); +#if CONFIG_ERROR_CONCEALMENT vpx_free(oci->prev_mip); + oci->prev_mip = NULL; +#endif - oci->above_context = 0; - oci->mip = 0; - oci->prev_mip = 0; - + oci->above_context = NULL; + oci->mip = NULL; } int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) @@ -76,10 +63,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) oci->fb_idx_ref_cnt[i] = 0; oci->yv12_fb[i].flags = 0; if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) - { - vp8_de_alloc_frame_buffers(oci); - return 1; - } + goto allocation_fail; } oci->new_fb_idx = 0; @@ -93,22 +77,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) oci->fb_idx_ref_cnt[3] = 1; if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) - { - vp8_de_alloc_frame_buffers(oci); - return 1; - } - -#if CONFIG_POSTPROC - if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) - { - vp8_de_alloc_frame_buffers(oci); - return 1; - } - - oci->post_proc_buffer_int_used = 0; - vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); - vpx_memset((&oci->post_proc_buffer)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); -#endif + goto allocation_fail; oci->mb_rows = height >> 4; oci->mb_cols = width >> 4; @@ -117,44 +86,43 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); if (!oci->mip) - { - vp8_de_alloc_frame_buffers(oci); - return 1; - } + goto allocation_fail; oci->mi = oci->mip + oci->mode_info_stride + 1; - /* allocate memory for last frame MODE_INFO array */ -#if CONFIG_ERROR_CONCEALMENT - oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); - - if (!oci->prev_mip) - { - vp8_de_alloc_frame_buffers(oci); - return 1; - } - - oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1; -#else - oci->prev_mip = NULL; - oci->prev_mi = NULL; -#endif + /* Allocation of previous mode info will be done in vp8_decode_frame() + * as it is a decoder only data */ oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1); if (!oci->above_context) - { - vp8_de_alloc_frame_buffers(oci); - return 1; - } + goto allocation_fail; - update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols); -#if CONFIG_ERROR_CONCEALMENT - update_mode_info_border(oci->prev_mi, oci->mb_rows, oci->mb_cols); +#if CONFIG_POSTPROC + if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) + goto allocation_fail; + + oci->post_proc_buffer_int_used = 0; + vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); + vpx_memset(oci->post_proc_buffer.buffer_alloc, 128, + oci->post_proc_buffer.frame_size); + + /* Allocate buffer to store post-processing filter coefficients. + * + * Note: Round up mb_cols to support SIMD reads + */ + oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1)); + if (!oci->pp_limits_buffer) + goto allocation_fail; #endif return 0; + +allocation_fail: + vp8_de_alloc_frame_buffers(oci); + return 1; } + void vp8_setup_version(VP8_COMMON *cm) { switch (cm->version) diff --git a/vp8/common/arm/armv6/intra4x4_predict_v6.asm b/vp8/common/arm/armv6/intra4x4_predict_v6.asm index a974cd1..c5ec824 100644 --- a/vp8/common/arm/armv6/intra4x4_predict_v6.asm +++ b/vp8/common/arm/armv6/intra4x4_predict_v6.asm @@ -18,15 +18,23 @@ AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_intra4x4_predict(unsigned char *src, int src_stride, int b_mode, -; unsigned char *dst, int dst_stride) - +;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, +; B_PREDICTION_MODE left_stride, int b_mode, +; unsigned char *dst, int dst_stride, +; unsigned char top_left) + +; r0: *Above +; r1: *yleft +; r2: left_stride +; r3: b_mode +; sp + #40: dst +; sp + #44: dst_stride +; sp + #48: top_left |vp8_intra4x4_predict_armv6| PROC push {r4-r12, lr} - - cmp r2, #10 - addlt pc, pc, r2, lsl #2 ; position independent switch + cmp r3, #10 + addlt pc, pc, r3, lsl #2 ; position independent switch pop {r4-r12, pc} ; default b b_dc_pred b b_tm_pred @@ -41,13 +49,13 @@ b_dc_pred ; load values - ldr r8, [r0, -r1] ; Above - ldrb r4, [r0, #-1]! ; Left[0] + ldr r8, [r0] ; Above + ldrb r4, [r1], r2 ; Left[0] mov r9, #0 - ldrb r5, [r0, r1] ; Left[1] - ldrb r6, [r0, r1, lsl #1]! ; Left[2] + ldrb r5, [r1], r2 ; Left[1] + ldrb r6, [r1], r2 ; Left[2] usad8 r12, r8, r9 - ldrb r7, [r0, r1] ; Left[3] + ldrb r7, [r1] ; Left[3] ; calculate dc add r4, r4, r5 @@ -55,31 +63,30 @@ b_dc_pred add r4, r4, r7 add r4, r4, r12 add r4, r4, #4 - ldr r0, [sp, #40] ; load stride + ldr r0, [sp, #44] ; dst_stride mov r12, r4, asr #3 ; (expected_dc + 4) >> 3 add r12, r12, r12, lsl #8 - add r3, r3, r0 + ldr r3, [sp, #40] ; dst add r12, r12, r12, lsl #16 ; store values - str r12, [r3, -r0] + str r12, [r3], r0 + str r12, [r3], r0 + str r12, [r3], r0 str r12, [r3] - str r12, [r3, r0] - str r12, [r3, r0, lsl #1] pop {r4-r12, pc} b_tm_pred - sub r10, r0, #1 ; Left - ldr r8, [r0, -r1] ; Above - ldrb r9, [r10, -r1] ; top_left - ldrb r4, [r0, #-1]! ; Left[0] - ldrb r5, [r10, r1]! ; Left[1] - ldrb r6, [r0, r1, lsl #1] ; Left[2] - ldrb r7, [r10, r1, lsl #1] ; Left[3] - ldr r0, [sp, #40] ; load stride - + ldr r8, [r0] ; Above + ldrb r9, [sp, #48] ; top_left + ldrb r4, [r1], r2 ; Left[0] + ldrb r5, [r1], r2 ; Left[1] + ldrb r6, [r1], r2 ; Left[2] + ldrb r7, [r1] ; Left[3] + ldr r0, [sp, #44] ; dst_stride + ldr r3, [sp, #40] ; dst add r9, r9, r9, lsl #16 ; [tl|tl] uxtb16 r10, r8 ; a[2|0] @@ -126,25 +133,26 @@ b_tm_pred str r12, [r3], r0 add r12, r4, r5, lsl #8 ; [3|2|1|0] - str r12, [r3], r0 + str r12, [r3] pop {r4-r12, pc} b_ve_pred - ldr r8, [r0, -r1]! ; a[3|2|1|0] + ldr r8, [r0] ; a[3|2|1|0] ldr r11, c00FF00FF - ldrb r9, [r0, #-1] ; top_left + ldrb r9, [sp, #48] ; top_left ldrb r10, [r0, #4] ; a[4] ldr r0, c00020002 uxtb16 r4, r8 ; a[2|0] uxtb16 r5, r8, ror #8 ; a[3|1] - ldr r2, [sp, #40] ; stride + ldr r2, [sp, #44] ; dst_stride pkhbt r9, r9, r5, lsl #16 ; a[1|-1] add r9, r9, r4, lsl #1 ;[a[1]+2*a[2] | tl+2*a[0] ] uxtab16 r9, r9, r5 ;[a[1]+2*a[2]+a[3] | tl+2*a[0]+a[1] ] + ldr r3, [sp, #40] ; dst uxtab16 r9, r9, r0 ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2] add r0, r0, r10, lsl #16 ;[a[4]+2 | 2] @@ -154,25 +162,23 @@ b_ve_pred and r9, r11, r9, asr #2 and r4, r11, r4, asr #2 - add r3, r3, r2 ; dst + dst_stride add r9, r9, r4, lsl #8 ; store values - str r9, [r3, -r2] + str r9, [r3], r2 + str r9, [r3], r2 + str r9, [r3], r2 str r9, [r3] - str r9, [r3, r2] - str r9, [r3, r2, lsl #1] pop {r4-r12, pc} b_he_pred - sub r10, r0, #1 ; Left - ldrb r4, [r0, #-1]! ; Left[0] - ldrb r8, [r10, -r1] ; top_left - ldrb r5, [r10, r1]! ; Left[1] - ldrb r6, [r0, r1, lsl #1] ; Left[2] - ldrb r7, [r10, r1, lsl #1] ; Left[3] + ldrb r4, [r1], r2 ; Left[0] + ldrb r8, [sp, #48] ; top_left + ldrb r5, [r1], r2 ; Left[1] + ldrb r6, [r1], r2 ; Left[2] + ldrb r7, [r1] ; Left[3] add r8, r8, r4 ; tl + l[0] add r9, r4, r5 ; l[0] + l[1] @@ -197,7 +203,8 @@ b_he_pred pkhtb r10, r10, r10, asr #16 ; l[-|2|-|2] pkhtb r11, r11, r11, asr #16 ; l[-|3|-|3] - ldr r0, [sp, #40] ; stride + ldr r0, [sp, #44] ; dst_stride + ldr r3, [sp, #40] ; dst add r8, r8, r8, lsl #8 ; l[0|0|0|0] add r9, r9, r9, lsl #8 ; l[1|1|1|1] @@ -206,16 +213,16 @@ b_he_pred ; store values str r8, [r3], r0 - str r9, [r3] - str r10, [r3, r0] - str r11, [r3, r0, lsl #1] + str r9, [r3], r0 + str r10, [r3], r0 + str r11, [r3] pop {r4-r12, pc} b_ld_pred - ldr r4, [r0, -r1]! ; Above + ldr r4, [r0] ; Above[0-3] ldr r12, c00020002 - ldr r5, [r0, #4] + ldr r5, [r0, #4] ; Above[4-7] ldr lr, c00FF00FF uxtb16 r6, r4 ; a[2|0] @@ -225,7 +232,6 @@ b_ld_pred pkhtb r10, r6, r8 ; a[2|4] pkhtb r11, r7, r9 ; a[3|5] - add r4, r6, r7, lsl #1 ; [a2+2*a3 | a0+2*a1] add r4, r4, r10, ror #16 ; [a2+2*a3+a4 | a0+2*a1+a2] uxtab16 r4, r4, r12 ; [a2+2*a3+a4+2 | a0+2*a1+a2+2] @@ -244,7 +250,8 @@ b_ld_pred add r7, r7, r9, asr #16 ; [ a5+2*a6+a7] uxtah r7, r7, r12 ; [ a5+2*a6+a7+2] - ldr r0, [sp, #40] ; stride + ldr r0, [sp, #44] ; dst_stride + ldr r3, [sp, #40] ; dst ; scale down and r4, lr, r4, asr #2 @@ -266,18 +273,17 @@ b_ld_pred mov r6, r6, lsr #16 mov r11, r10, lsr #8 add r11, r11, r6, lsl #24 ; [6|5|4|3] - str r11, [r3], r0 + str r11, [r3] pop {r4-r12, pc} b_rd_pred - sub r12, r0, r1 ; Above = src - src_stride - ldrb r7, [r0, #-1]! ; l[0] = pp[3] - ldr lr, [r12] ; Above = pp[8|7|6|5] - ldrb r8, [r12, #-1]! ; tl = pp[4] - ldrb r6, [r12, r1, lsl #1] ; l[1] = pp[2] - ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1] - ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0] + ldrb r7, [r1], r2 ; l[0] = pp[3] + ldr lr, [r0] ; Above = pp[8|7|6|5] + ldrb r8, [sp, #48] ; tl = pp[4] + ldrb r6, [r1], r2 ; l[1] = pp[2] + ldrb r5, [r1], r2 ; l[2] = pp[1] + ldrb r4, [r1], r2 ; l[3] = pp[0] uxtb16 r9, lr ; p[7|5] @@ -307,7 +313,8 @@ b_rd_pred add r7, r7, r10 ; [p6+2*p7+p8 | p4+2*p5+p6] uxtab16 r7, r7, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2] - ldr r0, [sp, #40] ; stride + ldr r0, [sp, #44] ; dst_stride + ldr r3, [sp, #40] ; dst ; scale down and r7, lr, r7, asr #2 @@ -328,18 +335,17 @@ b_rd_pred mov r11, r10, lsl #8 ; [3|2|1|-] uxtab r11, r11, r4 ; [3|2|1|0] - str r11, [r3], r0 + str r11, [r3] pop {r4-r12, pc} b_vr_pred - sub r12, r0, r1 ; Above = src - src_stride - ldrb r7, [r0, #-1]! ; l[0] = pp[3] - ldr lr, [r12] ; Above = pp[8|7|6|5] - ldrb r8, [r12, #-1]! ; tl = pp[4] - ldrb r6, [r12, r1, lsl #1] ; l[1] = pp[2] - ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1] - ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0] + ldrb r7, [r1], r2 ; l[0] = pp[3] + ldr lr, [r0] ; Above = pp[8|7|6|5] + ldrb r8, [sp, #48] ; tl = pp[4] + ldrb r6, [r1], r2 ; l[1] = pp[2] + ldrb r5, [r1], r2 ; l[2] = pp[1] + ldrb r4, [r1] ; l[3] = pp[0] add r5, r5, r7, lsl #16 ; p[3|1] add r6, r6, r8, lsl #16 ; p[4|2] @@ -376,7 +382,8 @@ b_vr_pred add r8, r8, r10 ; [p6+2*p7+p8 | p4+2*p5+p6] uxtab16 r8, r8, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2] - ldr r0, [sp, #40] ; stride + ldr r0, [sp, #44] ; dst_stride + ldr r3, [sp, #40] ; dst ; scale down and r5, lr, r5, asr #2 ; [B|A] @@ -397,14 +404,14 @@ b_vr_pred pkhtb r10, r7, r5, asr #16 ; [-|H|-|B] str r2, [r3], r0 add r12, r12, r10, lsl #8 ; [H|D|B|A] - str r12, [r3], r0 + str r12, [r3] pop {r4-r12, pc} b_vl_pred - ldr r4, [r0, -r1]! ; [3|2|1|0] + ldr r4, [r0] ; [3|2|1|0] = Above[0-3] ldr r12, c00020002 - ldr r5, [r0, #4] ; [7|6|5|4] + ldr r5, [r0, #4] ; [7|6|5|4] = Above[4-7] ldr lr, c00FF00FF ldr r2, c00010001 @@ -441,7 +448,8 @@ b_vl_pred add r9, r9, r11 ; [p5+2*p6+p7 | p3+2*p4+p5] uxtab16 r9, r9, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2] - ldr r0, [sp, #40] ; stride + ldr r0, [sp, #44] ; dst_stride + ldr r3, [sp, #40] ; dst ; scale down and r5, lr, r5, asr #2 ; [D|C] @@ -449,7 +457,6 @@ b_vl_pred and r8, lr, r8, asr #2 ; [I|D] and r9, lr, r9, asr #2 ; [J|H] - add r10, r4, r6, lsl #8 ; [F|B|E|A] str r10, [r3], r0 @@ -463,18 +470,17 @@ b_vl_pred str r12, [r3], r0 add r10, r7, r10, lsl #8 ; [J|H|D|G] - str r10, [r3], r0 + str r10, [r3] pop {r4-r12, pc} b_hd_pred - sub r12, r0, r1 ; Above = src - src_stride - ldrb r7, [r0, #-1]! ; l[0] = pp[3] - ldr lr, [r12] ; Above = pp[8|7|6|5] - ldrb r8, [r12, #-1]! ; tl = pp[4] - ldrb r6, [r0, r1] ; l[1] = pp[2] - ldrb r5, [r0, r1, lsl #1] ; l[2] = pp[1] - ldrb r4, [r12, r1, lsl #2] ; l[3] = pp[0] + ldrb r7, [r1], r2 ; l[0] = pp[3] + ldr lr, [r0] ; Above = pp[8|7|6|5] + ldrb r8, [sp, #48] ; tl = pp[4] + ldrb r6, [r1], r2 ; l[1] = pp[2] + ldrb r5, [r1], r2 ; l[2] = pp[1] + ldrb r4, [r1] ; l[3] = pp[0] uxtb16 r9, lr ; p[7|5] uxtb16 r10, lr, ror #8 ; p[8|6] @@ -492,7 +498,6 @@ b_hd_pred pkhtb r1, r9, r10 ; p[7|6] pkhbt r10, r8, r10, lsl #16 ; p[6|5] - uadd16 r11, r4, r5 ; [p1+p2 | p0+p1] uhadd16 r11, r11, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1] ; [B|A] @@ -518,7 +523,8 @@ b_hd_pred and r5, lr, r5, asr #2 ; [H|G] and r6, lr, r6, asr #2 ; [J|I] - ldr lr, [sp, #40] ; stride + ldr lr, [sp, #44] ; dst_stride + ldr r3, [sp, #40] ; dst pkhtb r2, r0, r6 ; [-|F|-|I] pkhtb r12, r6, r5, asr #16 ; [-|J|-|H] @@ -527,7 +533,6 @@ b_hd_pred mov r12, r12, ror #24 ; [J|I|H|F] str r12, [r3], lr - mov r7, r11, asr #16 ; [-|-|-|B] str r2, [r3], lr add r7, r7, r0, lsl #16 ; [-|E|-|B] @@ -536,21 +541,20 @@ b_hd_pred str r7, [r3], lr add r5, r11, r4, lsl #8 ; [D|B|C|A] - str r5, [r3], lr + str r5, [r3] pop {r4-r12, pc} b_hu_pred - ldrb r4, [r0, #-1]! ; Left[0] + ldrb r4, [r1], r2 ; Left[0] ldr r12, c00020002 - ldrb r5, [r0, r1]! ; Left[1] + ldrb r5, [r1], r2 ; Left[1] ldr lr, c00FF00FF - ldrb r6, [r0, r1]! ; Left[2] + ldrb r6, [r1], r2 ; Left[2] ldr r2, c00010001 - ldrb r7, [r0, r1] ; Left[3] - + ldrb r7, [r1] ; Left[3] add r4, r4, r5, lsl #16 ; [1|0] add r5, r5, r6, lsl #16 ; [2|1] @@ -563,7 +567,8 @@ b_hu_pred add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1] add r4, r4, r9 ; [p1+2*p2+p3 | p0+2*p1+p2] uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2] - ldr r2, [sp, #40] ; stride + ldr r2, [sp, #44] ; dst_stride + ldr r3, [sp, #40] ; dst and r4, lr, r4, asr #2 ; [D|C] add r10, r6, r7 ; [p2+p3] @@ -587,9 +592,9 @@ b_hu_pred add r10, r11, lsl #8 ; [-|-|F|E] add r10, r10, r9, lsl #16 ; [G|G|F|E] - str r10, [r3] + str r10, [r3], r2 - str r7, [r3, r2] + str r7, [r3] pop {r4-r12, pc} diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm index 65a4680..79ff02c 100644 --- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm +++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm @@ -46,7 +46,7 @@ vst1.32 {d2[1]}, [r3], r12 vst1.32 {d4[0]}, [r3], r12 vst1.32 {d4[1]}, [r3] - + bx lr ENDP diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index a4c1d92..f7ff577 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -161,22 +161,32 @@ typedef struct uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */ } MB_MODE_INFO; -typedef struct +typedef struct modeinfo { MB_MODE_INFO mbmi; union b_mode_info bmi[16]; } MODE_INFO; #if CONFIG_MULTI_RES_ENCODING -/* The information needed to be stored for higher-resolution encoder */ +/* The mb-level information needed to be stored for higher-resolution encoder */ typedef struct { MB_PREDICTION_MODE mode; MV_REFERENCE_FRAME ref_frame; int_mv mv; - //union b_mode_info bmi[16]; - int dissim; // dissimilarity level of the macroblock -} LOWER_RES_INFO; + int dissim; /* dissimilarity level of the macroblock */ +} LOWER_RES_MB_INFO; + +/* The frame-level information needed to be stored for higher-resolution + * encoder */ +typedef struct +{ + FRAME_TYPE frame_type; + int is_frame_dropped; + /* The frame number of each reference frames */ + unsigned int low_res_ref_frames[MAX_REF_FRAMES]; + LOWER_RES_MB_INFO *mb_info; +} LOWER_RES_FRAME_INFO; #endif typedef struct blockd @@ -216,12 +226,6 @@ typedef struct macroblockd MODE_INFO *mode_info_context; int mode_info_stride; -#if CONFIG_TEMPORAL_DENOISING - MB_PREDICTION_MODE best_sse_inter_mode; - int_mv best_sse_mv; - unsigned char need_to_clamp_best_mvs; -#endif - FRAME_TYPE frame_type; int up_available; diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c index a95a923..8c046a4 100644 --- a/vp8/common/entropy.c +++ b/vp8/common/entropy.c @@ -101,7 +101,7 @@ const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ /* vp8_coef_encodings generated with: vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree); */ -const vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = +vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = { {2, 2}, {6, 3}, diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c index de7e828..091e4c7 100644 --- a/vp8/common/entropymode.c +++ b/vp8/common/entropymode.c @@ -160,9 +160,7 @@ const vp8_tree_index vp8_small_mvtree [14] = void vp8_init_mbmode_probs(VP8_COMMON *x) { vpx_memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob)); - vpx_memcpy(x->kf_ymode_prob, vp8_kf_ymode_prob, sizeof(vp8_kf_ymode_prob)); vpx_memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob)); - vpx_memcpy(x->kf_uv_mode_prob, vp8_kf_uv_mode_prob, sizeof(vp8_kf_uv_mode_prob)); vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob)); } @@ -171,7 +169,3 @@ void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1]) vpx_memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob)); } -void vp8_kf_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]) -{ - vpx_memcpy(p, vp8_kf_bmode_prob, sizeof(vp8_kf_bmode_prob)); -} diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h index 70200cb..1df0f64 100644 --- a/vp8/common/entropymode.h +++ b/vp8/common/entropymode.h @@ -24,11 +24,11 @@ typedef enum SUBMVREF_LEFT_ABOVE_ZED } sumvfref_t; -typedef const int vp8_mbsplit[16]; +typedef int vp8_mbsplit[16]; #define VP8_NUMMBSPLITS 4 -extern vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; +extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */ @@ -67,9 +67,14 @@ extern const vp8_tree_index vp8_small_mvtree[]; extern const struct vp8_token_struct vp8_small_mvencodings[8]; -void vp8_init_mbmode_probs(VP8_COMMON *x); +/* Key frame default mode probs */ +extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES] +[VP8_BINTRAMODES-1]; +extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1]; +extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1]; -void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]); +void vp8_init_mbmode_probs(VP8_COMMON *x); +void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]); void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]); #endif diff --git a/vp8/common/extend.c b/vp8/common/extend.c index 9089e16..c9bdd21 100644 --- a/vp8/common/extend.c +++ b/vp8/common/extend.c @@ -116,7 +116,7 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); - // If the side is not touching the bounder then don't extend. + /* If the side is not touching the bounder then don't extend. */ if (srcy) et = 0; if (srcx) @@ -157,7 +157,10 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, /* note the extension is only for the last row, for intra prediction purpose */ -void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr) +void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, + unsigned char *YPtr, + unsigned char *UPtr, + unsigned char *VPtr) { int i; diff --git a/vp8/common/filter.h b/vp8/common/filter.h index 0f225c2..b7591f2 100644 --- a/vp8/common/filter.h +++ b/vp8/common/filter.h @@ -19,4 +19,4 @@ extern const short vp8_bilinear_filters[8][2]; extern const short vp8_sub_pel_filters[8][6]; -#endif //FILTER_H +#endif diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index 2a30166..5a6ac7b 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -83,57 +83,6 @@ static int get_cpu_count() #endif -#if HAVE_PTHREAD_H -#include -static void once(void (*func)(void)) -{ - static pthread_once_t lock = PTHREAD_ONCE_INIT; - pthread_once(&lock, func); -} - - -#elif defined(_WIN32) -static void once(void (*func)(void)) -{ - /* Using a static initializer here rather than InitializeCriticalSection() - * since there's no race-free context in which to execute it. Protecting - * it with an atomic op like InterlockedCompareExchangePointer introduces - * an x86 dependency, and InitOnceExecuteOnce requires Vista. - */ - static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0}; - static int done; - - EnterCriticalSection(&lock); - - if (!done) - { - func(); - done = 1; - } - - LeaveCriticalSection(&lock); -} - - -#else -/* No-op version that performs no synchronization. vpx_rtcd() is idempotent, - * so as long as your platform provides atomic loads/stores of pointers - * no synchronization is strictly necessary. - */ - -static void once(void (*func)(void)) -{ - static int done; - - if(!done) - { - func(); - done = 1; - } -} -#endif - - void vp8_machine_specific_config(VP8_COMMON *ctx) { #if CONFIG_MULTITHREAD @@ -145,6 +94,4 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) #elif ARCH_X86 || ARCH_X86_64 ctx->cpu_caps = x86_simd_caps(); #endif - - once(vpx_rtcd); } diff --git a/vp8/common/idctllm_test.cc b/vp8/common/idctllm_test.cc deleted file mode 100755 index 0f6ebe7..0000000 --- a/vp8/common/idctllm_test.cc +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - - extern "C" { - void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr, - int pred_stride, unsigned char *dst_ptr, - int dst_stride); -} - -#include "vpx_config.h" -#include "idctllm_test.h" -namespace -{ - -INSTANTIATE_TEST_CASE_P(C, IDCTTest, - ::testing::Values(vp8_short_idct4x4llm_c)); - -} // namespace - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/vp8/common/idctllm_test.h b/vp8/common/idctllm_test.h deleted file mode 100755 index a6a694b..0000000 --- a/vp8/common/idctllm_test.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - - #include "third_party/googletest/src/include/gtest/gtest.h" -typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr, - int pred_stride, unsigned char *dst_ptr, - int dst_stride); -namespace { -class IDCTTest : public ::testing::TestWithParam -{ - protected: - virtual void SetUp() - { - int i; - - UUT = GetParam(); - memset(input, 0, sizeof(input)); - /* Set up guard blocks */ - for(i=0; i<256; i++) - output[i] = ((i&0xF)<4&&(i<64))?0:-1; - } - - idct_fn_t UUT; - short input[16]; - unsigned char output[256]; - unsigned char predict[256]; -}; - -TEST_P(IDCTTest, TestGuardBlocks) -{ - int i; - - for(i=0; i<256; i++) - if((i&0xF) < 4 && i<64) - EXPECT_EQ(0, output[i]) << i; - else - EXPECT_EQ(255, output[i]); -} - -TEST_P(IDCTTest, TestAllZeros) -{ - int i; - - UUT(input, output, 16, output, 16); - - for(i=0; i<256; i++) - if((i&0xF) < 4 && i<64) - EXPECT_EQ(0, output[i]) << "i==" << i; - else - EXPECT_EQ(255, output[i]) << "i==" << i; -} - -TEST_P(IDCTTest, TestAllOnes) -{ - int i; - - input[0] = 4; - UUT(input, output, 16, output, 16); - - for(i=0; i<256; i++) - if((i&0xF) < 4 && i<64) - EXPECT_EQ(1, output[i]) << "i==" << i; - else - EXPECT_EQ(255, output[i]) << "i==" << i; -} - -TEST_P(IDCTTest, TestAddOne) -{ - int i; - - for(i=0; i<256; i++) - predict[i] = i; - - input[0] = 4; - UUT(input, predict, 16, output, 16); - - for(i=0; i<256; i++) - if((i&0xF) < 4 && i<64) - EXPECT_EQ(i+1, output[i]) << "i==" << i; - else - EXPECT_EQ(255, output[i]) << "i==" << i; -} - -TEST_P(IDCTTest, TestWithData) -{ - int i; - - for(i=0; i<16; i++) - input[i] = i; - - UUT(input, output, 16, output, 16); - - for(i=0; i<256; i++) - if((i&0xF) > 3 || i>63) - EXPECT_EQ(255, output[i]) << "i==" << i; - else if(i == 0) - EXPECT_EQ(11, output[i]) << "i==" << i; - else if(i == 34) - EXPECT_EQ(1, output[i]) << "i==" << i; - else if(i == 2 || i == 17 || i == 32) - EXPECT_EQ(3, output[i]) << "i==" << i; - else - EXPECT_EQ(0, output[i]) << "i==" << i; -} -} diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c index 3f05efe..41b4f12 100644 --- a/vp8/common/loopfilter.c +++ b/vp8/common/loopfilter.c @@ -196,18 +196,122 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm, } } -void vp8_loop_filter_frame -( - VP8_COMMON *cm, - MACROBLOCKD *mbd -) + +void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr) { - YV12_BUFFER_CONFIG *post = cm->frame_to_show; + int mb_col; + int filter_level; loop_filter_info_n *lfi_n = &cm->lf_info; loop_filter_info lfi; - FRAME_TYPE frame_type = cm->frame_type; + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + int skip_lf = (mode_info_context->mbmi.mode != B_PRED && + mode_info_context->mbmi.mode != SPLITMV && + mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const int seg = mode_info_context->mbmi.segment_id; + const int ref_frame = mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) + { + const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; + lfi.mblim = lfi_n->mblim[filter_level]; + lfi.blim = lfi_n->blim[filter_level]; + lfi.lim = lfi_n->lim[filter_level]; + lfi.hev_thr = lfi_n->hev_thr[hev_index]; + + if (mb_col > 0) + vp8_loop_filter_mbv + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + + if (!skip_lf) + vp8_loop_filter_bv + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_mbh + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + + if (!skip_lf) + vp8_loop_filter_bh + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + } + + y_ptr += 16; + u_ptr += 8; + v_ptr += 8; + + mode_info_context++; /* step to next MB */ + } + +} + +void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr) +{ + int mb_col; + int filter_level; + loop_filter_info_n *lfi_n = &cm->lf_info; + + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + int skip_lf = (mode_info_context->mbmi.mode != B_PRED && + mode_info_context->mbmi.mode != SPLITMV && + mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const int seg = mode_info_context->mbmi.segment_id; + const int ref_frame = mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) + { + if (mb_col > 0) + vp8_loop_filter_simple_mbv + (y_ptr, post_ystride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bv + (y_ptr, post_ystride, lfi_n->blim[filter_level]); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_simple_mbh + (y_ptr, post_ystride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bh + (y_ptr, post_ystride, lfi_n->blim[filter_level]); + } + + y_ptr += 16; + u_ptr += 8; + v_ptr += 8; + + mode_info_context++; /* step to next MB */ + } + +} +void vp8_loop_filter_frame(VP8_COMMON *cm, + MACROBLOCKD *mbd, + int frame_type) +{ + YV12_BUFFER_CONFIG *post = cm->frame_to_show; + loop_filter_info_n *lfi_n = &cm->lf_info; + loop_filter_info lfi; + int mb_row; int mb_col; int mb_rows = cm->mb_rows; diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h index 0fa8375..b3af2d6 100644 --- a/vp8/common/loopfilter.h +++ b/vp8/common/loopfilter.h @@ -69,6 +69,7 @@ typedef void loop_filter_uvfunction /* assorted loopfilter functions which get used elsewhere */ struct VP8Common; struct macroblockd; +struct modeinfo; void vp8_loop_filter_init(struct VP8Common *cm); @@ -76,7 +77,8 @@ void vp8_loop_filter_frame_init(struct VP8Common *cm, struct macroblockd *mbd, int default_filt_lvl); -void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd); +void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd, + int frame_type); void vp8_loop_filter_partial_frame(struct VP8Common *cm, struct macroblockd *mbd, @@ -89,4 +91,15 @@ void vp8_loop_filter_frame_yonly(struct VP8Common *cm, void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl); +void vp8_loop_filter_row_normal(struct VP8Common *cm, + struct modeinfo *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr); + +void vp8_loop_filter_row_simple(struct VP8Common *cm, + struct modeinfo *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr); #endif diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c index ca67e91..3dff150 100644 --- a/vp8/common/mfqe.c +++ b/vp8/common/mfqe.c @@ -160,9 +160,9 @@ static void multiframe_quality_enhance_block vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse)); vsad = (sse + 32)>>6; #else - sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, INT_MAX)+128)>>8; - usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, INT_MAX)+32)>>6; - vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, INT_MAX)+32)>>6; + sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8; + usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6; + vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6; #endif } else /* if (blksize == 8) */ @@ -177,16 +177,16 @@ static void multiframe_quality_enhance_block vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse)); vsad = (sse + 8)>>4; #else - sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, INT_MAX)+32)>>6; - usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, INT_MAX)+8)>>4; - vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, INT_MAX)+8)>>4; + sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6; + usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4; + vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4; #endif } actrisk = (actd > act * 5); - /* thr = qdiff/8 + log2(act) + log4(qprev) */ - thr = (qdiff >> 3); + /* thr = qdiff/16 + log2(act) + log4(qprev) */ + thr = (qdiff >> 4); while (actd >>= 1) thr++; while (qprev >>= 2) thr++; diff --git a/vp8/common/mips/dspr2/dequantize_dspr2.c b/vp8/common/mips/dspr2/dequantize_dspr2.c new file mode 100644 index 0000000..6823325 --- /dev/null +++ b/vp8/common/mips/dspr2/dequantize_dspr2.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vpx_rtcd.h" +#include "vpx_mem/vpx_mem.h" + +#if HAVE_DSPR2 +void vp8_dequant_idct_add_dspr2(short *input, short *dq, + unsigned char *dest, int stride) +{ + int i; + + for (i = 0; i < 16; i++) + { + input[i] = dq[i] * input[i]; + } + + vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride); + + vpx_memset(input, 0, 32); + +} + +#endif diff --git a/vp8/common/mips/dspr2/filter_dspr2.c b/vp8/common/mips/dspr2/filter_dspr2.c new file mode 100644 index 0000000..71fdcd7 --- /dev/null +++ b/vp8/common/mips/dspr2/filter_dspr2.c @@ -0,0 +1,2823 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include "vpx_rtcd.h" +#include "vpx_ports/mem.h" + +#if HAVE_DSPR2 +#define CROP_WIDTH 256 +unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH]; + +static const unsigned short sub_pel_filterss[8][3] = +{ + { 0, 0, 0}, + { 0, 0x0601, 0x7b0c}, + { 0x0201, 0x0b08, 0x6c24}, + { 0, 0x0906, 0x5d32}, + { 0x0303, 0x1010, 0x4d4d}, + { 0, 0x0609, 0x325d}, + { 0x0102, 0x080b, 0x246c}, + { 0, 0x0106, 0x0c7b}, +}; + + +static const int sub_pel_filters_int[8][3] = +{ + { 0, 0, 0}, + { 0x0000fffa, 0x007b000c, 0xffff0000}, + { 0x0002fff5, 0x006c0024, 0xfff80001}, + { 0x0000fff7, 0x005d0032, 0xfffa0000}, + { 0x0003fff0, 0x004d004d, 0xfff00003}, + { 0x0000fffa, 0x0032005d, 0xfff70000}, + { 0x0001fff8, 0x0024006c, 0xfff50002}, + { 0x0000ffff, 0x000c007b, 0xfffa0000}, +}; + + +static const int sub_pel_filters_inv[8][3] = +{ + { 0, 0, 0}, + { 0xfffa0000, 0x000c007b, 0x0000ffff}, + { 0xfff50002, 0x0024006c, 0x0001fff8}, + { 0xfff70000, 0x0032005d, 0x0000fffa}, + { 0xfff00003, 0x004d004d, 0x0003fff0}, + { 0xfffa0000, 0x005d0032, 0x0000fff7}, + { 0xfff80001, 0x006c0024, 0x0002fff5}, + { 0xffff0000, 0x007b000c, 0x0000fffa}, +}; + + +static const int sub_pel_filters_int_tap_4[8][2] = +{ + { 0, 0}, + { 0xfffa007b, 0x000cffff}, + { 0, 0}, + { 0xfff7005d, 0x0032fffa}, + { 0, 0}, + { 0xfffa0032, 0x005dfff7}, + { 0, 0}, + { 0xffff000c, 0x007bfffa}, +}; + + +static const int sub_pel_filters_inv_tap_4[8][2] = +{ + { 0, 0}, + { 0x007bfffa, 0xffff000c}, + { 0, 0}, + { 0x005dfff7, 0xfffa0032}, + { 0, 0}, + { 0x0032fffa, 0xfff7005d}, + { 0, 0}, + { 0x000cffff, 0xfffa007b}, +}; + +inline void prefetch_load(unsigned char *src) +{ + __asm__ __volatile__ ( + "pref 0, 0(%[src]) \n\t" + : + : [src] "r" (src) + ); +} + + +inline void prefetch_store(unsigned char *dst) +{ + __asm__ __volatile__ ( + "pref 1, 0(%[dst]) \n\t" + : + : [dst] "r" (dst) + ); +} + +void dsputil_static_init(void) +{ + int i; + + for (i = 0; i < 256; i++) ff_cropTbl[i + CROP_WIDTH] = i; + + for (i = 0; i < CROP_WIDTH; i++) + { + ff_cropTbl[i] = 0; + ff_cropTbl[i + CROP_WIDTH + 256] = 255; + } +} + +void vp8_filter_block2d_first_pass_4 +( + unsigned char *RESTRICT src_ptr, + unsigned char *RESTRICT dst_ptr, + unsigned int src_pixels_per_line, + unsigned int output_height, + int xoffset, + int pitch +) +{ + unsigned int i; + int Temp1, Temp2, Temp3, Temp4; + + unsigned int vector4a = 64; + int vector1b, vector2b, vector3b; + unsigned int tp1, tp2, tn1, tn2; + unsigned int p1, p2, p3; + unsigned int n1, n2, n3; + unsigned char *cm = ff_cropTbl + CROP_WIDTH; + + vector3b = sub_pel_filters_inv[xoffset][2]; + + /* if (xoffset == 0) we don't need any filtering */ + if (vector3b == 0) + { + for (i = 0; i < output_height; i++) + { + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr + src_pixels_per_line); + dst_ptr[0] = src_ptr[0]; + dst_ptr[1] = src_ptr[1]; + dst_ptr[2] = src_ptr[2]; + dst_ptr[3] = src_ptr[3]; + + /* next row... */ + src_ptr += src_pixels_per_line; + dst_ptr += 4; + } + } + else + { + if (vector3b > 65536) + { + /* 6 tap filter */ + + vector1b = sub_pel_filters_inv[xoffset][0]; + vector2b = sub_pel_filters_inv[xoffset][1]; + + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr + src_pixels_per_line); + + for (i = output_height; i--;) + { + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "ulw %[tp1], -2(%[src_ptr]) \n\t" + "ulw %[tp2], 2(%[src_ptr]) \n\t" + + /* even 1. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[p1], %[tp1] \n\t" + "preceu.ph.qbl %[p2], %[tp1] \n\t" + "preceu.ph.qbr %[p3], %[tp2] \n\t" + "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" + + /* even 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[p1], %[tp2] \n\t" + "balign %[tp2], %[tp1], 3 \n\t" + "extp %[Temp1], $ac3, 9 \n\t" + "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" + + /* odd 1. pixel */ + "ulw %[tn2], 3(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[n1], %[tp2] \n\t" + "preceu.ph.qbl %[n2], %[tp2] \n\t" + "preceu.ph.qbr %[n3], %[tn2] \n\t" + "extp %[Temp3], $ac2, 9 \n\t" + "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" + + /* even 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[n1], %[tn2] \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" + "extp %[Temp4], $ac2, 9 \n\t" + + /* clamp */ + "lbux %[tp1], %[Temp1](%[cm]) \n\t" + "lbux %[tn1], %[Temp2](%[cm]) \n\t" + "lbux %[tp2], %[Temp3](%[cm]) \n\t" + "lbux %[n2], %[Temp4](%[cm]) \n\t" + + /* store bytes */ + "sb %[tp1], 0(%[dst_ptr]) \n\t" + "sb %[tn1], 1(%[dst_ptr]) \n\t" + "sb %[tp2], 2(%[dst_ptr]) \n\t" + "sb %[n2], 3(%[dst_ptr]) \n\t" + + : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), + [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2), + [p3] "=&r" (p3), [n1] "=&r" (n1), [n2] "=&r" (n2), + [n3] "=&r" (n3), [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), + [vector3b] "r" (vector3b), [src_ptr] "r" (src_ptr) + ); + + /* Next row... */ + src_ptr += src_pixels_per_line; + dst_ptr += pitch; + } + } + else + { + /* 4 tap filter */ + + vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; + vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; + + for (i = output_height; i--;) + { + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "ulw %[tp1], -1(%[src_ptr]) \n\t" + "ulw %[tp2], 3(%[src_ptr]) \n\t" + + /* even 1. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[p1], %[tp1] \n\t" + "preceu.ph.qbl %[p2], %[tp1] \n\t" + "preceu.ph.qbr %[p3], %[tp2] \n\t" + "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" + + /* even 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" + "extp %[Temp1], $ac3, 9 \n\t" + + /* odd 1. pixel */ + "srl %[tn1], %[tp2], 8 \n\t" + "balign %[tp2], %[tp1], 3 \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[n1], %[tp2] \n\t" + "preceu.ph.qbl %[n2], %[tp2] \n\t" + "preceu.ph.qbr %[n3], %[tn1] \n\t" + "extp %[Temp3], $ac2, 9 \n\t" + "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" + + /* odd 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" + "extp %[Temp4], $ac2, 9 \n\t" + + /* clamp and store results */ + "lbux %[tp1], %[Temp1](%[cm]) \n\t" + "lbux %[tn1], %[Temp2](%[cm]) \n\t" + "lbux %[tp2], %[Temp3](%[cm]) \n\t" + "sb %[tp1], 0(%[dst_ptr]) \n\t" + "sb %[tn1], 1(%[dst_ptr]) \n\t" + "lbux %[n2], %[Temp4](%[cm]) \n\t" + "sb %[tp2], 2(%[dst_ptr]) \n\t" + "sb %[n2], 3(%[dst_ptr]) \n\t" + + : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), + [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), + [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), + [src_ptr] "r" (src_ptr) + ); + /* Next row... */ + src_ptr += src_pixels_per_line; + dst_ptr += pitch; + } + } + } +} + +void vp8_filter_block2d_first_pass_8_all +( + unsigned char *RESTRICT src_ptr, + unsigned char *RESTRICT dst_ptr, + unsigned int src_pixels_per_line, + unsigned int output_height, + int xoffset, + int pitch +) +{ + unsigned int i; + int Temp1, Temp2, Temp3, Temp4; + + unsigned int vector4a = 64; + unsigned int vector1b, vector2b, vector3b; + unsigned int tp1, tp2, tn1, tn2; + unsigned int p1, p2, p3, p4; + unsigned int n1, n2, n3, n4; + + unsigned char *cm = ff_cropTbl + CROP_WIDTH; + + /* if (xoffset == 0) we don't need any filtering */ + if (xoffset == 0) + { + for (i = 0; i < output_height; i++) + { + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr + src_pixels_per_line); + + dst_ptr[0] = src_ptr[0]; + dst_ptr[1] = src_ptr[1]; + dst_ptr[2] = src_ptr[2]; + dst_ptr[3] = src_ptr[3]; + dst_ptr[4] = src_ptr[4]; + dst_ptr[5] = src_ptr[5]; + dst_ptr[6] = src_ptr[6]; + dst_ptr[7] = src_ptr[7]; + + /* next row... */ + src_ptr += src_pixels_per_line; + dst_ptr += 8; + } + } + else + { + vector3b = sub_pel_filters_inv[xoffset][2]; + + if (vector3b > 65536) + { + /* 6 tap filter */ + + vector1b = sub_pel_filters_inv[xoffset][0]; + vector2b = sub_pel_filters_inv[xoffset][1]; + + for (i = output_height; i--;) + { + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr + src_pixels_per_line); + + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "ulw %[tp1], -2(%[src_ptr]) \n\t" + "ulw %[tp2], 2(%[src_ptr]) \n\t" + + /* even 1. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[p1], %[tp1] \n\t" + "preceu.ph.qbl %[p2], %[tp1] \n\t" + "preceu.ph.qbr %[p3], %[tp2] \n\t" + "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" + + /* even 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[p1], %[tp2] \n\t" + "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" + + "balign %[tp2], %[tp1], 3 \n\t" + "extp %[Temp1], $ac3, 9 \n\t" + "ulw %[tn2], 3(%[src_ptr]) \n\t" + + /* odd 1. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[n1], %[tp2] \n\t" + "preceu.ph.qbl %[n2], %[tp2] \n\t" + "preceu.ph.qbr %[n3], %[tn2] \n\t" + "extp %[Temp3], $ac2, 9 \n\t" + "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" + + /* odd 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[n1], %[tn2] \n\t" + "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" + "ulw %[tp1], 6(%[src_ptr]) \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[p2], %[tp1] \n\t" + "extp %[Temp4], $ac2, 9 \n\t" + + : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2), + [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), + [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), + [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + dst_ptr[0] = cm[Temp1]; + dst_ptr[1] = cm[Temp2]; + dst_ptr[2] = cm[Temp3]; + dst_ptr[3] = cm[Temp4]; + + /* next 4 pixels */ + __asm__ __volatile__ ( + /* even 3. pixel */ + "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t" + + /* even 4. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[p4], %[tp1] \n\t" + "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" + + "ulw %[tn1], 7(%[src_ptr]) \n\t" + "extp %[Temp1], $ac3, 9 \n\t" + + /* odd 3. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[n2], %[tn1] \n\t" + "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t" + "extp %[Temp3], $ac2, 9 \n\t" + + /* odd 4. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[n4], %[tn1] \n\t" + "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + "extp %[Temp4], $ac2, 9 \n\t" + + : [tn1] "=&r" (tn1), [n2] "=&r" (n2), + [p4] "=&r" (p4), [n4] "=&r" (n4), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) + : [tp1] "r" (tp1), [vector1b] "r" (vector1b), [p2] "r" (p2), + [vector2b] "r" (vector2b), [n1] "r" (n1), [p1] "r" (p1), + [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), + [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + dst_ptr[4] = cm[Temp1]; + dst_ptr[5] = cm[Temp2]; + dst_ptr[6] = cm[Temp3]; + dst_ptr[7] = cm[Temp4]; + + src_ptr += src_pixels_per_line; + dst_ptr += pitch; + } + } + else + { + /* 4 tap filter */ + + vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; + vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; + + for (i = output_height; i--;) + { + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr + src_pixels_per_line); + + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "ulw %[tp1], -1(%[src_ptr]) \n\t" + + /* even 1. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[p1], %[tp1] \n\t" + "preceu.ph.qbl %[p2], %[tp1] \n\t" + "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" + + "ulw %[tp2], 3(%[src_ptr]) \n\t" + + /* even 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbr %[p3], %[tp2] \n\t" + "preceu.ph.qbl %[p4], %[tp2] \n\t" + "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" + "extp %[Temp1], $ac3, 9 \n\t" + + "balign %[tp2], %[tp1], 3 \n\t" + + /* odd 1. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[n1], %[tp2] \n\t" + "preceu.ph.qbl %[n2], %[tp2] \n\t" + "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" + "extp %[Temp3], $ac2, 9 \n\t" + + "ulw %[tn2], 4(%[src_ptr]) \n\t" + + /* odd 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbr %[n3], %[tn2] \n\t" + "preceu.ph.qbl %[n4], %[tn2] \n\t" + "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" + "ulw %[tp1], 7(%[src_ptr]) \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp4], $ac2, 9 \n\t" + + : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), + [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2), + [p3] "=&r" (p3), [p4] "=&r" (p4), [n1] "=&r" (n1), + [n2] "=&r" (n2), [n3] "=&r" (n3), [n4] "=&r" (n4), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + dst_ptr[0] = cm[Temp1]; + dst_ptr[1] = cm[Temp2]; + dst_ptr[2] = cm[Temp3]; + dst_ptr[3] = cm[Temp4]; + + /* next 4 pixels */ + __asm__ __volatile__ ( + /* even 3. pixel */ + "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" + + /* even 4. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbr %[p2], %[tp1] \n\t" + "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" + "extp %[Temp1], $ac3, 9 \n\t" + + /* odd 3. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t" + "ulw %[tn1], 8(%[src_ptr]) \n\t" + "extp %[Temp3], $ac2, 9 \n\t" + + /* odd 4. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbr %[n2], %[tn1] \n\t" + "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + "extp %[Temp4], $ac2, 9 \n\t" + + : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) + : [tp1] "r" (tp1), [p3] "r" (p3), [p4] "r" (p4), + [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr), + [n3] "r" (n3), [n4] "r" (n4) + ); + + /* clamp and store results */ + dst_ptr[4] = cm[Temp1]; + dst_ptr[5] = cm[Temp2]; + dst_ptr[6] = cm[Temp3]; + dst_ptr[7] = cm[Temp4]; + + /* next row... */ + src_ptr += src_pixels_per_line; + dst_ptr += pitch; + } + } + } +} + + +void vp8_filter_block2d_first_pass16_6tap +( + unsigned char *RESTRICT src_ptr, + unsigned char *RESTRICT dst_ptr, + unsigned int src_pixels_per_line, + unsigned int output_height, + int xoffset, + int pitch +) +{ + unsigned int i; + int Temp1, Temp2, Temp3, Temp4; + + unsigned int vector4a; + unsigned int vector1b, vector2b, vector3b; + unsigned int tp1, tp2, tn1, tn2; + unsigned int p1, p2, p3, p4; + unsigned int n1, n2, n3, n4; + unsigned char *cm = ff_cropTbl + CROP_WIDTH; + + vector1b = sub_pel_filters_inv[xoffset][0]; + vector2b = sub_pel_filters_inv[xoffset][1]; + vector3b = sub_pel_filters_inv[xoffset][2]; + vector4a = 64; + + for (i = output_height; i--;) + { + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr + src_pixels_per_line); + + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "ulw %[tp1], -2(%[src_ptr]) \n\t" + "ulw %[tp2], 2(%[src_ptr]) \n\t" + + /* even 1. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[p1], %[tp1] \n\t" + "preceu.ph.qbl %[p2], %[tp1] \n\t" + "preceu.ph.qbr %[p3], %[tp2] \n\t" + "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" + + /* even 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[p1], %[tp2] \n\t" + "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" + + "balign %[tp2], %[tp1], 3 \n\t" + "ulw %[tn2], 3(%[src_ptr]) \n\t" + "extp %[Temp1], $ac3, 9 \n\t" + + /* odd 1. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[n1], %[tp2] \n\t" + "preceu.ph.qbl %[n2], %[tp2] \n\t" + "preceu.ph.qbr %[n3], %[tn2] \n\t" + "extp %[Temp3], $ac2, 9 \n\t" + "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" + + /* odd 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[n1], %[tn2] \n\t" + "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" + "ulw %[tp1], 6(%[src_ptr]) \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[p2], %[tp1] \n\t" + "extp %[Temp4], $ac2, 9 \n\t" + + : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2), + [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), + [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), + [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + dst_ptr[0] = cm[Temp1]; + dst_ptr[1] = cm[Temp2]; + dst_ptr[2] = cm[Temp3]; + dst_ptr[3] = cm[Temp4]; + + /* next 4 pixels */ + __asm__ __volatile__ ( + /* even 3. pixel */ + "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t" + + /* even 4. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[p4], %[tp1] \n\t" + "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" + "ulw %[tn1], 7(%[src_ptr]) \n\t" + "extp %[Temp1], $ac3, 9 \n\t" + + /* odd 3. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[n2], %[tn1] \n\t" + "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t" + "extp %[Temp3], $ac2, 9 \n\t" + + /* odd 4. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[n4], %[tn1] \n\t" + "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" + "ulw %[tp2], 10(%[src_ptr]) \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[p1], %[tp2] \n\t" + "extp %[Temp4], $ac2, 9 \n\t" + + : [tn1] "=&r" (tn1), [tp2] "=&r" (tp2), [n2] "=&r" (n2), + [p4] "=&r" (p4), [n4] "=&r" (n4), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [tp1] "r" (tp1), [n1] "r" (n1), [p1] "r" (p1), + [vector4a] "r" (vector4a), [p2] "r" (p2), [vector3b] "r" (vector3b), + [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + dst_ptr[4] = cm[Temp1]; + dst_ptr[5] = cm[Temp2]; + dst_ptr[6] = cm[Temp3]; + dst_ptr[7] = cm[Temp4]; + + /* next 4 pixels */ + __asm__ __volatile__ ( + /* even 5. pixel */ + "dpa.w.ph $ac3, %[p2], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" + + /* even 6. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[p3], %[tp2] \n\t" + "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[p3], %[vector3b] \n\t" + + "ulw %[tn1], 11(%[src_ptr]) \n\t" + "extp %[Temp1], $ac3, 9 \n\t" + + /* odd 5. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[n1], %[tn1] \n\t" + "dpa.w.ph $ac3, %[n2], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" + "extp %[Temp3], $ac2, 9 \n\t" + + /* odd 6. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[n3], %[tn1] \n\t" + "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n1], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[n3], %[vector3b] \n\t" + "ulw %[tp1], 14(%[src_ptr]) \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[p4], %[tp1] \n\t" + "extp %[Temp4], $ac2, 9 \n\t" + + : [tn1] "=&r" (tn1), [tp1] "=&r" (tp1), + [n1] "=&r" (n1), [p3] "=&r" (p3), [n3] "=&r" (n3), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [tp2] "r" (tp2), [p2] "r" (p2), [n2] "r" (n2), + [p4] "r" (p4), [n4] "r" (n4), [p1] "r" (p1), [src_ptr] "r" (src_ptr), + [vector4a] "r" (vector4a), [vector3b] "r" (vector3b) + ); + + /* clamp and store results */ + dst_ptr[8] = cm[Temp1]; + dst_ptr[9] = cm[Temp2]; + dst_ptr[10] = cm[Temp3]; + dst_ptr[11] = cm[Temp4]; + + /* next 4 pixels */ + __asm__ __volatile__ ( + /* even 7. pixel */ + "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p3], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[p4], %[vector3b] \n\t" + + /* even 8. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[p2], %[tp1] \n\t" + "dpa.w.ph $ac2, %[p3], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p4], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[p2], %[vector3b] \n\t" + "ulw %[tn1], 15(%[src_ptr]) \n\t" + "extp %[Temp1], $ac3, 9 \n\t" + + /* odd 7. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "preceu.ph.qbr %[n4], %[tn1] \n\t" + "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n3], %[vector2b] \n\t" + "dpa.w.ph $ac3, %[n4], %[vector3b] \n\t" + "extp %[Temp3], $ac2, 9 \n\t" + + /* odd 8. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "preceu.ph.qbl %[n2], %[tn1] \n\t" + "dpa.w.ph $ac2, %[n3], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n4], %[vector2b] \n\t" + "dpa.w.ph $ac2, %[n2], %[vector3b] \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + "extp %[Temp4], $ac2, 9 \n\t" + + /* clamp and store results */ + "lbux %[tp1], %[Temp1](%[cm]) \n\t" + "lbux %[tn1], %[Temp2](%[cm]) \n\t" + "lbux %[p2], %[Temp3](%[cm]) \n\t" + "sb %[tp1], 12(%[dst_ptr]) \n\t" + "sb %[tn1], 13(%[dst_ptr]) \n\t" + "lbux %[n2], %[Temp4](%[cm]) \n\t" + "sb %[p2], 14(%[dst_ptr]) \n\t" + "sb %[n2], 15(%[dst_ptr]) \n\t" + + : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), [n4] "=&r" (n4), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [tp1] "r" (tp1), [p4] "r" (p4), [n1] "r" (n1), [p1] "r" (p1), + [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), [p3] "r" (p3), + [n3] "r" (n3), [src_ptr] "r" (src_ptr), + [cm] "r" (cm), [dst_ptr] "r" (dst_ptr) + ); + + src_ptr += src_pixels_per_line; + dst_ptr += pitch; + } +} + + +void vp8_filter_block2d_first_pass16_0 +( + unsigned char *RESTRICT src_ptr, + unsigned char *RESTRICT output_ptr, + unsigned int src_pixels_per_line +) +{ + int Temp1, Temp2, Temp3, Temp4; + int i; + + /* prefetch src_ptr data to cache memory */ + prefetch_store(output_ptr + 32); + + /* copy memory from src buffer to dst buffer */ + for (i = 0; i < 7; i++) + { + __asm__ __volatile__ ( + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "ulw %[Temp3], 8(%[src_ptr]) \n\t" + "ulw %[Temp4], 12(%[src_ptr]) \n\t" + "sw %[Temp1], 0(%[output_ptr]) \n\t" + "sw %[Temp2], 4(%[output_ptr]) \n\t" + "sw %[Temp3], 8(%[output_ptr]) \n\t" + "sw %[Temp4], 12(%[output_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), + [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) + : [src_pixels_per_line] "r" (src_pixels_per_line), + [output_ptr] "r" (output_ptr) + ); + + __asm__ __volatile__ ( + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "ulw %[Temp3], 8(%[src_ptr]) \n\t" + "ulw %[Temp4], 12(%[src_ptr]) \n\t" + "sw %[Temp1], 16(%[output_ptr]) \n\t" + "sw %[Temp2], 20(%[output_ptr]) \n\t" + "sw %[Temp3], 24(%[output_ptr]) \n\t" + "sw %[Temp4], 28(%[output_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), + [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) + : [src_pixels_per_line] "r" (src_pixels_per_line), + [output_ptr] "r" (output_ptr) + ); + + __asm__ __volatile__ ( + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "ulw %[Temp3], 8(%[src_ptr]) \n\t" + "ulw %[Temp4], 12(%[src_ptr]) \n\t" + "sw %[Temp1], 32(%[output_ptr]) \n\t" + "sw %[Temp2], 36(%[output_ptr]) \n\t" + "sw %[Temp3], 40(%[output_ptr]) \n\t" + "sw %[Temp4], 44(%[output_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), + [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) + : [src_pixels_per_line] "r" (src_pixels_per_line), + [output_ptr] "r" (output_ptr) + ); + + output_ptr += 48; + } +} + + +void vp8_filter_block2d_first_pass16_4tap +( + unsigned char *RESTRICT src_ptr, + unsigned char *RESTRICT output_ptr, + unsigned int src_pixels_per_line, + unsigned int output_width, + unsigned int output_height, + int xoffset, + int yoffset, + unsigned char *RESTRICT dst_ptr, + int pitch +) +{ + unsigned int i, j; + int Temp1, Temp2, Temp3, Temp4; + + unsigned int vector4a; + int vector1b, vector2b; + unsigned int tp1, tp2, tp3, tn1; + unsigned int p1, p2, p3; + unsigned int n1, n2, n3; + unsigned char *cm = ff_cropTbl + CROP_WIDTH; + + vector4a = 64; + + vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; + vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; + + /* if (yoffset == 0) don't need temp buffer, data will be stored in dst_ptr */ + if (yoffset == 0) + { + output_height -= 5; + src_ptr += (src_pixels_per_line + src_pixels_per_line); + + for (i = output_height; i--;) + { + __asm__ __volatile__ ( + "ulw %[tp3], -1(%[src_ptr]) \n\t" + : [tp3] "=&r" (tp3) + : [src_ptr] "r" (src_ptr) + ); + + /* processing 4 adjacent pixels */ + for (j = 0; j < 16; j += 4) + { + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "ulw %[tp2], 3(%[src_ptr]) \n\t" + "move %[tp1], %[tp3] \n\t" + + /* even 1. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "mthi $0, $ac3 \n\t" + "move %[tp3], %[tp2] \n\t" + "preceu.ph.qbr %[p1], %[tp1] \n\t" + "preceu.ph.qbl %[p2], %[tp1] \n\t" + "preceu.ph.qbr %[p3], %[tp2] \n\t" + "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" + + /* even 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "mthi $0, $ac2 \n\t" + "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" + "extr.w %[Temp1], $ac3, 7 \n\t" + + /* odd 1. pixel */ + "ulw %[tn1], 4(%[src_ptr]) \n\t" + "balign %[tp2], %[tp1], 3 \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "mthi $0, $ac3 \n\t" + "preceu.ph.qbr %[n1], %[tp2] \n\t" + "preceu.ph.qbl %[n2], %[tp2] \n\t" + "preceu.ph.qbr %[n3], %[tn1] \n\t" + "extr.w %[Temp3], $ac2, 7 \n\t" + "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" + + /* odd 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "mthi $0, $ac2 \n\t" + "extr.w %[Temp2], $ac3, 7 \n\t" + "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" + "extr.w %[Temp4], $ac2, 7 \n\t" + + /* clamp and store results */ + "lbux %[tp1], %[Temp1](%[cm]) \n\t" + "lbux %[tn1], %[Temp2](%[cm]) \n\t" + "lbux %[tp2], %[Temp3](%[cm]) \n\t" + "sb %[tp1], 0(%[dst_ptr]) \n\t" + "sb %[tn1], 1(%[dst_ptr]) \n\t" + "lbux %[n2], %[Temp4](%[cm]) \n\t" + "sb %[tp2], 2(%[dst_ptr]) \n\t" + "sb %[n2], 3(%[dst_ptr]) \n\t" + + : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tp3] "=&r" (tp3), + [tn1] "=&r" (tn1), [p1] "=&r" (p1), [p2] "=&r" (p2), + [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [p3] "=&r" (p3), + [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), + [src_ptr] "r" (src_ptr) + ); + + src_ptr += 4; + } + + /* Next row... */ + src_ptr += src_pixels_per_line - 16; + dst_ptr += pitch; + } + } + else + { + for (i = output_height; i--;) + { + /* processing 4 adjacent pixels */ + for (j = 0; j < 16; j += 4) + { + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "ulw %[tp1], -1(%[src_ptr]) \n\t" + "ulw %[tp2], 3(%[src_ptr]) \n\t" + + /* even 1. pixel */ + "mtlo %[vector4a], $ac3 \n\t" + "mthi $0, $ac3 \n\t" + "preceu.ph.qbr %[p1], %[tp1] \n\t" + "preceu.ph.qbl %[p2], %[tp1] \n\t" + "preceu.ph.qbr %[p3], %[tp2] \n\t" + "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" + + /* even 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "mthi $0, $ac2 \n\t" + "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" + "extr.w %[Temp1], $ac3, 7 \n\t" + + /* odd 1. pixel */ + "ulw %[tn1], 4(%[src_ptr]) \n\t" + "balign %[tp2], %[tp1], 3 \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "mthi $0, $ac3 \n\t" + "preceu.ph.qbr %[n1], %[tp2] \n\t" + "preceu.ph.qbl %[n2], %[tp2] \n\t" + "preceu.ph.qbr %[n3], %[tn1] \n\t" + "extr.w %[Temp3], $ac2, 7 \n\t" + "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" + "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" + + /* odd 2. pixel */ + "mtlo %[vector4a], $ac2 \n\t" + "mthi $0, $ac2 \n\t" + "extr.w %[Temp2], $ac3, 7 \n\t" + "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" + "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" + "extr.w %[Temp4], $ac2, 7 \n\t" + + /* clamp and store results */ + "lbux %[tp1], %[Temp1](%[cm]) \n\t" + "lbux %[tn1], %[Temp2](%[cm]) \n\t" + "lbux %[tp2], %[Temp3](%[cm]) \n\t" + "sb %[tp1], 0(%[output_ptr]) \n\t" + "sb %[tn1], 1(%[output_ptr]) \n\t" + "lbux %[n2], %[Temp4](%[cm]) \n\t" + "sb %[tp2], 2(%[output_ptr]) \n\t" + "sb %[n2], 3(%[output_ptr]) \n\t" + + : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), + [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), + [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), + [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector4a] "r" (vector4a), [cm] "r" (cm), + [output_ptr] "r" (output_ptr), [src_ptr] "r" (src_ptr) + ); + + src_ptr += 4; + } + + /* next row... */ + src_ptr += src_pixels_per_line; + output_ptr += output_width; + } + } +} + + +void vp8_filter_block2d_second_pass4 +( + unsigned char *RESTRICT src_ptr, + unsigned char *RESTRICT output_ptr, + int output_pitch, + int yoffset +) +{ + unsigned int i; + + int Temp1, Temp2, Temp3, Temp4; + unsigned int vector1b, vector2b, vector3b, vector4a; + + unsigned char src_ptr_l2; + unsigned char src_ptr_l1; + unsigned char src_ptr_0; + unsigned char src_ptr_r1; + unsigned char src_ptr_r2; + unsigned char src_ptr_r3; + + unsigned char *cm = ff_cropTbl + CROP_WIDTH; + + vector4a = 64; + + /* load filter coefficients */ + vector1b = sub_pel_filterss[yoffset][0]; + vector2b = sub_pel_filterss[yoffset][2]; + vector3b = sub_pel_filterss[yoffset][1]; + + if (vector1b) + { + /* 6 tap filter */ + + for (i = 2; i--;) + { + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr); + + /* do not allow compiler to reorder instructions */ + __asm__ __volatile__ ( + ".set noreorder \n\t" + : + : + ); + + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "lbu %[src_ptr_l2], -8(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 12(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -7(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 13(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp1], $ac2, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -6(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 14(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac0 \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -5(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 15(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp3], $ac0, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp4], $ac1, 9 \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), + [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), + [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), + [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + output_ptr[0] = cm[Temp1]; + output_ptr[1] = cm[Temp2]; + output_ptr[2] = cm[Temp3]; + output_ptr[3] = cm[Temp4]; + + output_ptr += output_pitch; + + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "lbu %[src_ptr_l2], -4(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 16(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -3(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 17(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp1], $ac2, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -2(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 18(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac0 \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -1(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 19(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp3], $ac0, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp4], $ac1, 9 \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), + [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), + [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), + [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + output_ptr[0] = cm[Temp1]; + output_ptr[1] = cm[Temp2]; + output_ptr[2] = cm[Temp3]; + output_ptr[3] = cm[Temp4]; + + src_ptr += 8; + output_ptr += output_pitch; + } + } + else + { + /* 4 tap filter */ + + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr); + + for (i = 2; i--;) + { + /* do not allow compiler to reorder instructions */ + __asm__ __volatile__ ( + ".set noreorder \n\t" + : + : + ); + + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp1], $ac2, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac0 \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp3], $ac0, 9 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp4], $ac1, 9 \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), + [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + output_ptr[0] = cm[Temp1]; + output_ptr[1] = cm[Temp2]; + output_ptr[2] = cm[Temp3]; + output_ptr[3] = cm[Temp4]; + + output_ptr += output_pitch; + + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp1], $ac2, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac0 \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp3], $ac0, 9 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp4], $ac1, 9 \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), + [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + output_ptr[0] = cm[Temp1]; + output_ptr[1] = cm[Temp2]; + output_ptr[2] = cm[Temp3]; + output_ptr[3] = cm[Temp4]; + + src_ptr += 8; + output_ptr += output_pitch; + } + } +} + + +void vp8_filter_block2d_second_pass_8 +( + unsigned char *RESTRICT src_ptr, + unsigned char *RESTRICT output_ptr, + int output_pitch, + unsigned int output_height, + unsigned int output_width, + unsigned int yoffset +) +{ + unsigned int i; + + int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8; + unsigned int vector1b, vector2b, vector3b, vector4a; + + unsigned char src_ptr_l2; + unsigned char src_ptr_l1; + unsigned char src_ptr_0; + unsigned char src_ptr_r1; + unsigned char src_ptr_r2; + unsigned char src_ptr_r3; + unsigned char *cm = ff_cropTbl + CROP_WIDTH; + + vector4a = 64; + + vector1b = sub_pel_filterss[yoffset][0]; + vector2b = sub_pel_filterss[yoffset][2]; + vector3b = sub_pel_filterss[yoffset][1]; + + if (vector1b) + { + /* 6 tap filter */ + + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr); + + for (i = output_height; i--;) + { + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "lbu %[src_ptr_l2], -16(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 24(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -15(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 25(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp1], $ac2, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -14(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 18(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 26(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac0 \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -13(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 19(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 27(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp3], $ac0, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), + [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), + [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), + [src_ptr] "r" (src_ptr) + ); + + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "lbu %[src_ptr_l2], -12(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 12(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 20(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 28(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp4], $ac1, 9 \n\t" + + "lbu %[src_ptr_l2], -11(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 13(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 21(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 29(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp5], $ac2, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -10(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 14(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 22(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 30(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac0 \n\t" + "extp %[Temp6], $ac3, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -9(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 15(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 23(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 31(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp7], $ac0, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp8], $ac1, 9 \n\t" + + : [Temp4] "=&r" (Temp4), [Temp5] "=&r" (Temp5), + [Temp6] "=&r" (Temp6), [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), + [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), + [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), + [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + output_ptr[0] = cm[Temp1]; + output_ptr[1] = cm[Temp2]; + output_ptr[2] = cm[Temp3]; + output_ptr[3] = cm[Temp4]; + output_ptr[4] = cm[Temp5]; + output_ptr[5] = cm[Temp6]; + output_ptr[6] = cm[Temp7]; + output_ptr[7] = cm[Temp8]; + + src_ptr += 8; + output_ptr += output_pitch; + } + } + else + { + /* 4 tap filter */ + + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr); + + for (i = output_height; i--;) + { + __asm__ __volatile__ ( + "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + : [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) + ); + + __asm__ __volatile__ ( + "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp1], $ac2, 9 \n\t" + + : [Temp1] "=r" (Temp1), + [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) + ); + + src_ptr_l1 = src_ptr[-6]; + src_ptr_0 = src_ptr[2]; + src_ptr_r1 = src_ptr[10]; + src_ptr_r2 = src_ptr[18]; + + __asm__ __volatile__ ( + "mtlo %[vector4a], $ac0 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + + : [Temp2] "=r" (Temp2) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), + [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), + [vector4a] "r" (vector4a) + ); + + src_ptr_l1 = src_ptr[-5]; + src_ptr_0 = src_ptr[3]; + src_ptr_r1 = src_ptr[11]; + src_ptr_r2 = src_ptr[19]; + + __asm__ __volatile__ ( + "mtlo %[vector4a], $ac1 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp3], $ac0, 9 \n\t" + + : [Temp3] "=r" (Temp3) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), + [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), + [vector4a] "r" (vector4a) + ); + + src_ptr_l1 = src_ptr[-4]; + src_ptr_0 = src_ptr[4]; + src_ptr_r1 = src_ptr[12]; + src_ptr_r2 = src_ptr[20]; + + __asm__ __volatile__ ( + "mtlo %[vector4a], $ac2 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp4], $ac1, 9 \n\t" + + : [Temp4] "=r" (Temp4) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), + [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), + [vector4a] "r" (vector4a) + ); + + src_ptr_l1 = src_ptr[-3]; + src_ptr_0 = src_ptr[5]; + src_ptr_r1 = src_ptr[13]; + src_ptr_r2 = src_ptr[21]; + + __asm__ __volatile__ ( + "mtlo %[vector4a], $ac3 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp5], $ac2, 9 \n\t" + + : [Temp5] "=&r" (Temp5) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), + [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), + [vector4a] "r" (vector4a) + ); + + src_ptr_l1 = src_ptr[-2]; + src_ptr_0 = src_ptr[6]; + src_ptr_r1 = src_ptr[14]; + src_ptr_r2 = src_ptr[22]; + + __asm__ __volatile__ ( + "mtlo %[vector4a], $ac0 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp6], $ac3, 9 \n\t" + + : [Temp6] "=r" (Temp6) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), + [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), + [vector4a] "r" (vector4a) + ); + + src_ptr_l1 = src_ptr[-1]; + src_ptr_0 = src_ptr[7]; + src_ptr_r1 = src_ptr[15]; + src_ptr_r2 = src_ptr[23]; + + __asm__ __volatile__ ( + "mtlo %[vector4a], $ac1 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp7], $ac0, 9 \n\t" + "extp %[Temp8], $ac1, 9 \n\t" + + : [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), + [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), + [vector4a] "r" (vector4a) + ); + + /* clamp and store results */ + output_ptr[0] = cm[Temp1]; + output_ptr[1] = cm[Temp2]; + output_ptr[2] = cm[Temp3]; + output_ptr[3] = cm[Temp4]; + output_ptr[4] = cm[Temp5]; + output_ptr[5] = cm[Temp6]; + output_ptr[6] = cm[Temp7]; + output_ptr[7] = cm[Temp8]; + + src_ptr += 8; + output_ptr += output_pitch; + } + } +} + + +void vp8_filter_block2d_second_pass161 +( + unsigned char *RESTRICT src_ptr, + unsigned char *RESTRICT output_ptr, + int output_pitch, + const unsigned short *vp8_filter +) +{ + unsigned int i, j; + + int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8; + unsigned int vector4a; + unsigned int vector1b, vector2b, vector3b; + + unsigned char src_ptr_l2; + unsigned char src_ptr_l1; + unsigned char src_ptr_0; + unsigned char src_ptr_r1; + unsigned char src_ptr_r2; + unsigned char src_ptr_r3; + unsigned char *cm = ff_cropTbl + CROP_WIDTH; + + vector4a = 64; + + vector1b = vp8_filter[0]; + vector2b = vp8_filter[2]; + vector3b = vp8_filter[1]; + + if (vector1b == 0) + { + /* 4 tap filter */ + + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr + 16); + + for (i = 16; i--;) + { + /* unrolling for loop */ + for (j = 0; j < 16; j += 8) + { + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp1], $ac2, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp2], $ac3, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp3], $ac1, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + "extp %[Temp4], $ac3, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp5], $ac2, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp6], $ac3, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp7], $ac1, 9 \n\t" + + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp8], $ac3, 9 \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), + [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), + [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), + [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) + : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), + [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + output_ptr[j] = cm[Temp1]; + output_ptr[j + 1] = cm[Temp2]; + output_ptr[j + 2] = cm[Temp3]; + output_ptr[j + 3] = cm[Temp4]; + output_ptr[j + 4] = cm[Temp5]; + output_ptr[j + 5] = cm[Temp6]; + output_ptr[j + 6] = cm[Temp7]; + output_ptr[j + 7] = cm[Temp8]; + + src_ptr += 8; + } + + output_ptr += output_pitch; + } + } + else + { + /* 4 tap filter */ + + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr + 16); + + /* unroll for loop */ + for (i = 16; i--;) + { + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "lbu %[src_ptr_l2], -32(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 48(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -31(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 49(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac0 \n\t" + "extp %[Temp1], $ac2, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -30(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 50(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp2], $ac0, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -29(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 51(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp3], $ac1, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -28(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 52(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + "extp %[Temp4], $ac3, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -27(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 53(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac0 \n\t" + "extp %[Temp5], $ac2, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -26(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 54(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp6], $ac0, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -25(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 55(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp7], $ac1, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp8], $ac3, 9 \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), + [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), + [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), + [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), + [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), + [src_ptr] "r" (src_ptr) + ); + + /* clamp and store results */ + output_ptr[0] = cm[Temp1]; + output_ptr[1] = cm[Temp2]; + output_ptr[2] = cm[Temp3]; + output_ptr[3] = cm[Temp4]; + output_ptr[4] = cm[Temp5]; + output_ptr[5] = cm[Temp6]; + output_ptr[6] = cm[Temp7]; + output_ptr[7] = cm[Temp8]; + + /* apply filter with vectors pairs */ + __asm__ __volatile__ ( + "lbu %[src_ptr_l2], -24(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 8(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 24(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 40(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 56(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -23(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 9(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 25(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 41(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 57(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac0 \n\t" + "extp %[Temp1], $ac2, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -22(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 10(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 26(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 42(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 58(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp2], $ac0, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -21(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 11(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 27(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 43(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 59(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp3], $ac1, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -20(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 12(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 28(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 44(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 60(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac2 \n\t" + "extp %[Temp4], $ac3, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -19(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 13(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 29(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 45(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 61(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac0 \n\t" + "extp %[Temp5], $ac2, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -18(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 14(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 30(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 46(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 62(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac1 \n\t" + "extp %[Temp6], $ac0, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" + + "lbu %[src_ptr_l2], -17(%[src_ptr]) \n\t" + "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" + "lbu %[src_ptr_0], 15(%[src_ptr]) \n\t" + "lbu %[src_ptr_r1], 31(%[src_ptr]) \n\t" + "lbu %[src_ptr_r2], 47(%[src_ptr]) \n\t" + "lbu %[src_ptr_r3], 63(%[src_ptr]) \n\t" + "mtlo %[vector4a], $ac3 \n\t" + "extp %[Temp7], $ac1, 9 \n\t" + + "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" + "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" + "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" + "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" + "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" + "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" + "extp %[Temp8], $ac3, 9 \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), + [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), + [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), + [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), + [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), + [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), + [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) + : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), + [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), + [src_ptr] "r" (src_ptr) + ); + + src_ptr += 16; + output_ptr[8] = cm[Temp1]; + output_ptr[9] = cm[Temp2]; + output_ptr[10] = cm[Temp3]; + output_ptr[11] = cm[Temp4]; + output_ptr[12] = cm[Temp5]; + output_ptr[13] = cm[Temp6]; + output_ptr[14] = cm[Temp7]; + output_ptr[15] = cm[Temp8]; + + output_ptr += output_pitch; + } + } +} + + +void vp8_sixtap_predict4x4_dspr2 +( + unsigned char *RESTRICT src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *RESTRICT dst_ptr, + int dst_pitch +) +{ + unsigned char FData[9 * 4]; /* Temp data bufffer used in filtering */ + unsigned int pos = 16; + + /* bit positon for extract from acc */ + __asm__ __volatile__ ( + "wrdsp %[pos], 1 \n\t" + : + : [pos] "r" (pos) + ); + + if (yoffset) + { + /* First filter 1-D horizontally... */ + vp8_filter_block2d_first_pass_4(src_ptr - (2 * src_pixels_per_line), FData, + src_pixels_per_line, 9, xoffset, 4); + /* then filter verticaly... */ + vp8_filter_block2d_second_pass4(FData + 8, dst_ptr, dst_pitch, yoffset); + } + else + /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ + vp8_filter_block2d_first_pass_4(src_ptr, dst_ptr, src_pixels_per_line, + 4, xoffset, dst_pitch); +} + + +void vp8_sixtap_predict8x8_dspr2 +( + unsigned char *RESTRICT src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *RESTRICT dst_ptr, + int dst_pitch +) +{ + + unsigned char FData[13 * 8]; /* Temp data bufffer used in filtering */ + unsigned int pos, Temp1, Temp2; + + pos = 16; + + /* bit positon for extract from acc */ + __asm__ __volatile__ ( + "wrdsp %[pos], 1 \n\t" + : + : [pos] "r" (pos) + ); + + if (yoffset) + { + + src_ptr = src_ptr - (2 * src_pixels_per_line); + + if (xoffset) + /* filter 1-D horizontally... */ + vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line, + 13, xoffset, 8); + + else + { + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr + 2 * src_pixels_per_line); + + __asm__ __volatile__ ( + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 0(%[FData]) \n\t" + "sw %[Temp2], 4(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 8(%[FData]) \n\t" + "sw %[Temp2], 12(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 16(%[FData]) \n\t" + "sw %[Temp2], 20(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 24(%[FData]) \n\t" + "sw %[Temp2], 28(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 32(%[FData]) \n\t" + "sw %[Temp2], 36(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 40(%[FData]) \n\t" + "sw %[Temp2], 44(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 48(%[FData]) \n\t" + "sw %[Temp2], 52(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 56(%[FData]) \n\t" + "sw %[Temp2], 60(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 64(%[FData]) \n\t" + "sw %[Temp2], 68(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 72(%[FData]) \n\t" + "sw %[Temp2], 76(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 80(%[FData]) \n\t" + "sw %[Temp2], 84(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 88(%[FData]) \n\t" + "sw %[Temp2], 92(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 96(%[FData]) \n\t" + "sw %[Temp2], 100(%[FData]) \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) + : [FData] "r" (FData), [src_ptr] "r" (src_ptr), + [src_pixels_per_line] "r" (src_pixels_per_line) + ); + } + + /* filter verticaly... */ + vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 8, 8, yoffset); + } + + /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ + else + { + if (xoffset) + vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line, + 8, xoffset, dst_pitch); + + else + { + /* copy from src buffer to dst buffer */ + __asm__ __volatile__ ( + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 0(%[dst_ptr]) \n\t" + "sw %[Temp2], 4(%[dst_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 8(%[dst_ptr]) \n\t" + "sw %[Temp2], 12(%[dst_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 16(%[dst_ptr]) \n\t" + "sw %[Temp2], 20(%[dst_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 24(%[dst_ptr]) \n\t" + "sw %[Temp2], 28(%[dst_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 32(%[dst_ptr]) \n\t" + "sw %[Temp2], 36(%[dst_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 40(%[dst_ptr]) \n\t" + "sw %[Temp2], 44(%[dst_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 48(%[dst_ptr]) \n\t" + "sw %[Temp2], 52(%[dst_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 56(%[dst_ptr]) \n\t" + "sw %[Temp2], 60(%[dst_ptr]) \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) + : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr), + [src_pixels_per_line] "r" (src_pixels_per_line) + ); + } + } +} + + +void vp8_sixtap_predict8x4_dspr2 +( + unsigned char *RESTRICT src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *RESTRICT dst_ptr, + int dst_pitch +) +{ + unsigned char FData[9 * 8]; /* Temp data bufffer used in filtering */ + unsigned int pos, Temp1, Temp2; + + pos = 16; + + /* bit positon for extract from acc */ + __asm__ __volatile__ ( + "wrdsp %[pos], 1 \n\t" + : + : [pos] "r" (pos) + ); + + if (yoffset) + { + + src_ptr = src_ptr - (2 * src_pixels_per_line); + + if (xoffset) + /* filter 1-D horizontally... */ + vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line, + 9, xoffset, 8); + + else + { + /* prefetch src_ptr data to cache memory */ + prefetch_load(src_ptr + 2 * src_pixels_per_line); + + __asm__ __volatile__ ( + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 0(%[FData]) \n\t" + "sw %[Temp2], 4(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 8(%[FData]) \n\t" + "sw %[Temp2], 12(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 16(%[FData]) \n\t" + "sw %[Temp2], 20(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 24(%[FData]) \n\t" + "sw %[Temp2], 28(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 32(%[FData]) \n\t" + "sw %[Temp2], 36(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 40(%[FData]) \n\t" + "sw %[Temp2], 44(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 48(%[FData]) \n\t" + "sw %[Temp2], 52(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 56(%[FData]) \n\t" + "sw %[Temp2], 60(%[FData]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 64(%[FData]) \n\t" + "sw %[Temp2], 68(%[FData]) \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) + : [FData] "r" (FData), [src_ptr] "r" (src_ptr), + [src_pixels_per_line] "r" (src_pixels_per_line) + ); + } + + /* filter verticaly... */ + vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 4, 8, yoffset); + } + + /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ + else + { + if (xoffset) + vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line, + 4, xoffset, dst_pitch); + + else + { + /* copy from src buffer to dst buffer */ + __asm__ __volatile__ ( + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 0(%[dst_ptr]) \n\t" + "sw %[Temp2], 4(%[dst_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 8(%[dst_ptr]) \n\t" + "sw %[Temp2], 12(%[dst_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 16(%[dst_ptr]) \n\t" + "sw %[Temp2], 20(%[dst_ptr]) \n\t" + "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" + + "ulw %[Temp1], 0(%[src_ptr]) \n\t" + "ulw %[Temp2], 4(%[src_ptr]) \n\t" + "sw %[Temp1], 24(%[dst_ptr]) \n\t" + "sw %[Temp2], 28(%[dst_ptr]) \n\t" + + : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) + : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr), + [src_pixels_per_line] "r" (src_pixels_per_line) + ); + } + } +} + + +void vp8_sixtap_predict16x16_dspr2 +( + unsigned char *RESTRICT src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *RESTRICT dst_ptr, + int dst_pitch +) +{ + const unsigned short *VFilter; + unsigned char FData[21 * 16]; /* Temp data bufffer used in filtering */ + unsigned int pos; + + VFilter = sub_pel_filterss[yoffset]; + + pos = 16; + + /* bit positon for extract from acc */ + __asm__ __volatile__ ( + "wrdsp %[pos], 1 \n\t" + : + : [pos] "r" (pos) + ); + + if (yoffset) + { + + src_ptr = src_ptr - (2 * src_pixels_per_line); + + switch (xoffset) + { + /* filter 1-D horizontally... */ + case 2: + case 4: + case 6: + /* 6 tap filter */ + vp8_filter_block2d_first_pass16_6tap(src_ptr, FData, src_pixels_per_line, + 21, xoffset, 16); + break; + + case 0: + /* only copy buffer */ + vp8_filter_block2d_first_pass16_0(src_ptr, FData, src_pixels_per_line); + break; + + case 1: + case 3: + case 5: + case 7: + /* 4 tap filter */ + vp8_filter_block2d_first_pass16_4tap(src_ptr, FData, src_pixels_per_line, 16, + 21, xoffset, yoffset, dst_ptr, dst_pitch); + break; + } + + /* filter verticaly... */ + vp8_filter_block2d_second_pass161(FData + 32, dst_ptr, dst_pitch, VFilter); + } + else + { + /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ + switch (xoffset) + { + case 2: + case 4: + case 6: + /* 6 tap filter */ + vp8_filter_block2d_first_pass16_6tap(src_ptr, dst_ptr, src_pixels_per_line, + 16, xoffset, dst_pitch); + break; + + case 1: + case 3: + case 5: + case 7: + /* 4 tap filter */ + vp8_filter_block2d_first_pass16_4tap(src_ptr, dst_ptr, src_pixels_per_line, 16, + 21, xoffset, yoffset, dst_ptr, dst_pitch); + break; + } + } +} + +#endif diff --git a/vp8/common/mips/dspr2/idct_blk_dspr2.c b/vp8/common/mips/dspr2/idct_blk_dspr2.c new file mode 100644 index 0000000..1e0ebd1 --- /dev/null +++ b/vp8/common/mips/dspr2/idct_blk_dspr2.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_config.h" +#include "vpx_rtcd.h" + +#if HAVE_DSPR2 + +void vp8_dequant_idct_add_y_block_dspr2 +(short *q, short *dq, + unsigned char *dst, int stride, char *eobs) +{ + int i, j; + + for (i = 0; i < 4; i++) + { + for (j = 0; j < 4; j++) + { + if (*eobs++ > 1) + vp8_dequant_idct_add_dspr2(q, dq, dst, stride); + else + { + vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dst, stride, dst, stride); + ((int *)q)[0] = 0; + } + + q += 16; + dst += 4; + } + + dst += 4 * stride - 16; + } +} + +void vp8_dequant_idct_add_uv_block_dspr2 +(short *q, short *dq, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) +{ + int i, j; + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + if (*eobs++ > 1) + vp8_dequant_idct_add_dspr2(q, dq, dstu, stride); + else + { + vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstu, stride, dstu, stride); + ((int *)q)[0] = 0; + } + + q += 16; + dstu += 4; + } + + dstu += 4 * stride - 8; + } + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + if (*eobs++ > 1) + vp8_dequant_idct_add_dspr2(q, dq, dstv, stride); + else + { + vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstv, stride, dstv, stride); + ((int *)q)[0] = 0; + } + + q += 16; + dstv += 4; + } + + dstv += 4 * stride - 8; + } +} + +#endif + diff --git a/vp8/common/mips/dspr2/idctllm_dspr2.c b/vp8/common/mips/dspr2/idctllm_dspr2.c new file mode 100644 index 0000000..25b7936 --- /dev/null +++ b/vp8/common/mips/dspr2/idctllm_dspr2.c @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_rtcd.h" + +#if HAVE_DSPR2 +#define CROP_WIDTH 256 + +/****************************************************************************** + * Notes: + * + * This implementation makes use of 16 bit fixed point version of two multiply + * constants: + * 1. sqrt(2) * cos (pi/8) + * 2. sqrt(2) * sin (pi/8) + * Since the first constant is bigger than 1, to maintain the same 16 bit + * fixed point precision as the second one, we use a trick of + * x * a = x + x*(a-1) + * so + * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). + ****************************************************************************/ +extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH]; +static const int cospi8sqrt2minus1 = 20091; +static const int sinpi8sqrt2 = 35468; + +inline void prefetch_load_short(short *src) +{ + __asm__ __volatile__ ( + "pref 0, 0(%[src]) \n\t" + : + : [src] "r" (src) + ); +} + +void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr, + int pred_stride, unsigned char *dst_ptr, + int dst_stride) +{ + int r, c; + int a1, b1, c1, d1; + short output[16]; + short *ip = input; + short *op = output; + int temp1, temp2; + int shortpitch = 4; + + int c2, d2; + int temp3, temp4; + unsigned char *cm = ff_cropTbl + CROP_WIDTH; + + /* prepare data for load */ + prefetch_load_short(ip + 8); + + /* first loop is unrolled */ + a1 = ip[0] + ip[8]; + b1 = ip[0] - ip[8]; + + temp1 = (ip[4] * sinpi8sqrt2) >> 16; + temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); + c1 = temp1 - temp2; + + temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); + temp2 = (ip[12] * sinpi8sqrt2) >> 16; + d1 = temp1 + temp2; + + temp3 = (ip[5] * sinpi8sqrt2) >> 16; + temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16); + c2 = temp3 - temp4; + + temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16); + temp4 = (ip[13] * sinpi8sqrt2) >> 16; + d2 = temp3 + temp4; + + op[0] = a1 + d1; + op[12] = a1 - d1; + op[4] = b1 + c1; + op[8] = b1 - c1; + + a1 = ip[1] + ip[9]; + b1 = ip[1] - ip[9]; + + op[1] = a1 + d2; + op[13] = a1 - d2; + op[5] = b1 + c2; + op[9] = b1 - c2; + + a1 = ip[2] + ip[10]; + b1 = ip[2] - ip[10]; + + temp1 = (ip[6] * sinpi8sqrt2) >> 16; + temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16); + c1 = temp1 - temp2; + + temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16); + temp2 = (ip[14] * sinpi8sqrt2) >> 16; + d1 = temp1 + temp2; + + temp3 = (ip[7] * sinpi8sqrt2) >> 16; + temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16); + c2 = temp3 - temp4; + + temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16); + temp4 = (ip[15] * sinpi8sqrt2) >> 16; + d2 = temp3 + temp4; + + op[2] = a1 + d1; + op[14] = a1 - d1; + op[6] = b1 + c1; + op[10] = b1 - c1; + + a1 = ip[3] + ip[11]; + b1 = ip[3] - ip[11]; + + op[3] = a1 + d2; + op[15] = a1 - d2; + op[7] = b1 + c2; + op[11] = b1 - c2; + + ip = output; + + /* prepare data for load */ + prefetch_load_short(ip + shortpitch); + + /* second loop is unrolled */ + a1 = ip[0] + ip[2]; + b1 = ip[0] - ip[2]; + + temp1 = (ip[1] * sinpi8sqrt2) >> 16; + temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); + c1 = temp1 - temp2; + + temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); + temp2 = (ip[3] * sinpi8sqrt2) >> 16; + d1 = temp1 + temp2; + + temp3 = (ip[5] * sinpi8sqrt2) >> 16; + temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16); + c2 = temp3 - temp4; + + temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16); + temp4 = (ip[7] * sinpi8sqrt2) >> 16; + d2 = temp3 + temp4; + + op[0] = (a1 + d1 + 4) >> 3; + op[3] = (a1 - d1 + 4) >> 3; + op[1] = (b1 + c1 + 4) >> 3; + op[2] = (b1 - c1 + 4) >> 3; + + a1 = ip[4] + ip[6]; + b1 = ip[4] - ip[6]; + + op[4] = (a1 + d2 + 4) >> 3; + op[7] = (a1 - d2 + 4) >> 3; + op[5] = (b1 + c2 + 4) >> 3; + op[6] = (b1 - c2 + 4) >> 3; + + a1 = ip[8] + ip[10]; + b1 = ip[8] - ip[10]; + + temp1 = (ip[9] * sinpi8sqrt2) >> 16; + temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16); + c1 = temp1 - temp2; + + temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16); + temp2 = (ip[11] * sinpi8sqrt2) >> 16; + d1 = temp1 + temp2; + + temp3 = (ip[13] * sinpi8sqrt2) >> 16; + temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16); + c2 = temp3 - temp4; + + temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16); + temp4 = (ip[15] * sinpi8sqrt2) >> 16; + d2 = temp3 + temp4; + + op[8] = (a1 + d1 + 4) >> 3; + op[11] = (a1 - d1 + 4) >> 3; + op[9] = (b1 + c1 + 4) >> 3; + op[10] = (b1 - c1 + 4) >> 3; + + a1 = ip[12] + ip[14]; + b1 = ip[12] - ip[14]; + + op[12] = (a1 + d2 + 4) >> 3; + op[15] = (a1 - d2 + 4) >> 3; + op[13] = (b1 + c2 + 4) >> 3; + op[14] = (b1 - c2 + 4) >> 3; + + ip = output; + + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + short a = ip[c] + pred_ptr[c] ; + dst_ptr[c] = cm[a] ; + } + + ip += 4; + dst_ptr += dst_stride; + pred_ptr += pred_stride; + } +} + +void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) +{ + int a1; + int i, absa1; + int t2, vector_a1, vector_a; + + /* a1 = ((input_dc + 4) >> 3); */ + __asm__ __volatile__ ( + "addi %[a1], %[input_dc], 4 \n\t" + "sra %[a1], %[a1], 3 \n\t" + : [a1] "=r" (a1) + : [input_dc] "r" (input_dc) + ); + + if (a1 < 0) + { + /* use quad-byte + * input and output memory are four byte aligned + */ + __asm__ __volatile__ ( + "abs %[absa1], %[a1] \n\t" + "replv.qb %[vector_a1], %[absa1] \n\t" + : [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1) + : [a1] "r" (a1) + ); + + /* use (a1 - predptr[c]) instead a1 + predptr[c] */ + for (i = 4; i--;) + { + __asm__ __volatile__ ( + "lw %[t2], 0(%[pred_ptr]) \n\t" + "add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t" + "subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t" + "sw %[vector_a], 0(%[dst_ptr]) \n\t" + "add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" + : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), + [dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr) + : [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1) + ); + } + } + else + { + /* use quad-byte + * input and output memory are four byte aligned + */ + __asm__ __volatile__ ( + "replv.qb %[vector_a1], %[a1] \n\t" + : [vector_a1] "=r" (vector_a1) + : [a1] "r" (a1) + ); + + for (i = 4; i--;) + { + __asm__ __volatile__ ( + "lw %[t2], 0(%[pred_ptr]) \n\t" + "add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t" + "addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t" + "sw %[vector_a], 0(%[dst_ptr]) \n\t" + "add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" + : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), + [dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr) + : [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1) + ); + } + } + +} + +void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff) +{ + short output[16]; + int i; + int a1, b1, c1, d1; + int a2, b2, c2, d2; + short *ip = input; + short *op = output; + + prefetch_load_short(ip); + + for (i = 4; i--;) + { + a1 = ip[0] + ip[12]; + b1 = ip[4] + ip[8]; + c1 = ip[4] - ip[8]; + d1 = ip[0] - ip[12]; + + op[0] = a1 + b1; + op[4] = c1 + d1; + op[8] = a1 - b1; + op[12] = d1 - c1; + + ip++; + op++; + } + + ip = output; + op = output; + + prefetch_load_short(ip); + + for (i = 4; i--;) + { + a1 = ip[0] + ip[3] + 3; + b1 = ip[1] + ip[2]; + c1 = ip[1] - ip[2]; + d1 = ip[0] - ip[3] + 3; + + a2 = a1 + b1; + b2 = d1 + c1; + c2 = a1 - b1; + d2 = d1 - c1; + + op[0] = a2 >> 3; + op[1] = b2 >> 3; + op[2] = c2 >> 3; + op[3] = d2 >> 3; + + ip += 4; + op += 4; + } + + for (i = 0; i < 16; i++) + { + mb_dqcoeff[i * 16] = output[i]; + } +} + +void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff) +{ + int a1; + + a1 = ((input[0] + 3) >> 3); + + __asm__ __volatile__ ( + "sh %[a1], 0(%[mb_dqcoeff]) \n\t" + "sh %[a1], 32(%[mb_dqcoeff]) \n\t" + "sh %[a1], 64(%[mb_dqcoeff]) \n\t" + "sh %[a1], 96(%[mb_dqcoeff]) \n\t" + "sh %[a1], 128(%[mb_dqcoeff]) \n\t" + "sh %[a1], 160(%[mb_dqcoeff]) \n\t" + "sh %[a1], 192(%[mb_dqcoeff]) \n\t" + "sh %[a1], 224(%[mb_dqcoeff]) \n\t" + "sh %[a1], 256(%[mb_dqcoeff]) \n\t" + "sh %[a1], 288(%[mb_dqcoeff]) \n\t" + "sh %[a1], 320(%[mb_dqcoeff]) \n\t" + "sh %[a1], 352(%[mb_dqcoeff]) \n\t" + "sh %[a1], 384(%[mb_dqcoeff]) \n\t" + "sh %[a1], 416(%[mb_dqcoeff]) \n\t" + "sh %[a1], 448(%[mb_dqcoeff]) \n\t" + "sh %[a1], 480(%[mb_dqcoeff]) \n\t" + + : + : [a1] "r" (a1), [mb_dqcoeff] "r" (mb_dqcoeff) + ); +} + +#endif diff --git a/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c new file mode 100644 index 0000000..b8e5e4d --- /dev/null +++ b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c @@ -0,0 +1,2622 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include "vpx_rtcd.h" +#include "vp8/common/onyxc_int.h" + +#if HAVE_DSPR2 +typedef unsigned char uc; + +/* prefetch data for load */ +inline void prefetch_load_lf(unsigned char *src) +{ + __asm__ __volatile__ ( + "pref 0, 0(%[src]) \n\t" + : + : [src] "r" (src) + ); +} + + +/* prefetch data for store */ +inline void prefetch_store_lf(unsigned char *dst) +{ + __asm__ __volatile__ ( + "pref 1, 0(%[dst]) \n\t" + : + : [dst] "r" (dst) + ); +} + +/* processing 4 pixels at the same time + * compute hev and mask in the same function + */ +static __inline void vp8_filter_mask_vec_mips +( + uint32_t limit, + uint32_t flimit, + uint32_t p1, + uint32_t p0, + uint32_t p3, + uint32_t p2, + uint32_t q0, + uint32_t q1, + uint32_t q2, + uint32_t q3, + uint32_t thresh, + uint32_t *hev, + uint32_t *mask +) +{ + uint32_t c, r, r3, r_k; + uint32_t s1, s2, s3; + uint32_t ones = 0xFFFFFFFF; + uint32_t hev1; + + __asm__ __volatile__ ( + /* mask |= (abs(p3 - p2) > limit) */ + "subu_s.qb %[c], %[p3], %[p2] \n\t" + "subu_s.qb %[r_k], %[p2], %[p3] \n\t" + "or %[r_k], %[r_k], %[c] \n\t" + "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" + "or %[r], $0, %[c] \n\t" + + /* mask |= (abs(p2 - p1) > limit) */ + "subu_s.qb %[c], %[p2], %[p1] \n\t" + "subu_s.qb %[r_k], %[p1], %[p2] \n\t" + "or %[r_k], %[r_k], %[c] \n\t" + "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" + "or %[r], %[r], %[c] \n\t" + + /* mask |= (abs(p1 - p0) > limit) + * hev |= (abs(p1 - p0) > thresh) + */ + "subu_s.qb %[c], %[p1], %[p0] \n\t" + "subu_s.qb %[r_k], %[p0], %[p1] \n\t" + "or %[r_k], %[r_k], %[c] \n\t" + "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" + "or %[r3], $0, %[c] \n\t" + "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" + "or %[r], %[r], %[c] \n\t" + + /* mask |= (abs(q1 - q0) > limit) + * hev |= (abs(q1 - q0) > thresh) + */ + "subu_s.qb %[c], %[q1], %[q0] \n\t" + "subu_s.qb %[r_k], %[q0], %[q1] \n\t" + "or %[r_k], %[r_k], %[c] \n\t" + "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" + "or %[r3], %[r3], %[c] \n\t" + "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" + "or %[r], %[r], %[c] \n\t" + + /* mask |= (abs(q2 - q1) > limit) */ + "subu_s.qb %[c], %[q2], %[q1] \n\t" + "subu_s.qb %[r_k], %[q1], %[q2] \n\t" + "or %[r_k], %[r_k], %[c] \n\t" + "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" + "or %[r], %[r], %[c] \n\t" + "sll %[r3], %[r3], 24 \n\t" + + /* mask |= (abs(q3 - q2) > limit) */ + "subu_s.qb %[c], %[q3], %[q2] \n\t" + "subu_s.qb %[r_k], %[q2], %[q3] \n\t" + "or %[r_k], %[r_k], %[c] \n\t" + "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" + "or %[r], %[r], %[c] \n\t" + + : [c] "=&r" (c), [r_k] "=&r" (r_k), + [r] "=&r" (r), [r3] "=&r" (r3) + : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), + [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), + [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh) + ); + + __asm__ __volatile__ ( + /* abs(p0 - q0) */ + "subu_s.qb %[c], %[p0], %[q0] \n\t" + "subu_s.qb %[r_k], %[q0], %[p0] \n\t" + "wrdsp %[r3] \n\t" + "or %[s1], %[r_k], %[c] \n\t" + + /* abs(p1 - q1) */ + "subu_s.qb %[c], %[p1], %[q1] \n\t" + "addu_s.qb %[s3], %[s1], %[s1] \n\t" + "pick.qb %[hev1], %[ones], $0 \n\t" + "subu_s.qb %[r_k], %[q1], %[p1] \n\t" + "or %[s2], %[r_k], %[c] \n\t" + + /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ + "shrl.qb %[s2], %[s2], 1 \n\t" + "addu_s.qb %[s1], %[s2], %[s3] \n\t" + "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" + "or %[r], %[r], %[c] \n\t" + "sll %[r], %[r], 24 \n\t" + + "wrdsp %[r] \n\t" + "pick.qb %[s2], $0, %[ones] \n\t" + + : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), + [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) + : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), + [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) + ); + + *hev = hev1; + *mask = s2; +} + + +/* inputs & outputs are quad-byte vectors */ +static __inline void vp8_filter_mips +( + uint32_t mask, + uint32_t hev, + uint32_t *ps1, + uint32_t *ps0, + uint32_t *qs0, + uint32_t *qs1 +) +{ + int32_t vp8_filter_l, vp8_filter_r; + int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; + int32_t subr_r, subr_l; + uint32_t t1, t2, HWM, t3; + uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; + + int32_t vps1, vps0, vqs0, vqs1; + int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; + uint32_t N128; + + N128 = 0x80808080; + t1 = 0x03000300; + t2 = 0x04000400; + t3 = 0x01000100; + HWM = 0xFF00FF00; + + vps0 = (*ps0) ^ N128; + vps1 = (*ps1) ^ N128; + vqs0 = (*qs0) ^ N128; + vqs1 = (*qs1) ^ N128; + + /* use halfword pairs instead quad-bytes because of accuracy */ + vps0_l = vps0 & HWM; + vps0_r = vps0 << 8; + vps0_r = vps0_r & HWM; + + vps1_l = vps1 & HWM; + vps1_r = vps1 << 8; + vps1_r = vps1_r & HWM; + + vqs0_l = vqs0 & HWM; + vqs0_r = vqs0 << 8; + vqs0_r = vqs0_r & HWM; + + vqs1_l = vqs1 & HWM; + vqs1_r = vqs1 << 8; + vqs1_r = vqs1_r & HWM; + + mask_l = mask & HWM; + mask_r = mask << 8; + mask_r = mask_r & HWM; + + hev_l = hev & HWM; + hev_r = hev << 8; + hev_r = hev_r & HWM; + + __asm__ __volatile__ ( + /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */ + "subq_s.ph %[vp8_filter_l], %[vps1_l], %[vqs1_l] \n\t" + "subq_s.ph %[vp8_filter_r], %[vps1_r], %[vqs1_r] \n\t" + + /* qs0 - ps0 */ + "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" + "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" + + /* vp8_filter &= hev; */ + "and %[vp8_filter_l], %[vp8_filter_l], %[hev_l] \n\t" + "and %[vp8_filter_r], %[vp8_filter_r], %[hev_r] \n\t" + + /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */ + "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" + "xor %[invhev_l], %[hev_l], %[HWM] \n\t" + "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" + "xor %[invhev_r], %[hev_r], %[HWM] \n\t" + "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" + + /* vp8_filter &= mask; */ + "and %[vp8_filter_l], %[vp8_filter_l], %[mask_l] \n\t" + "and %[vp8_filter_r], %[vp8_filter_r], %[mask_r] \n\t" + + : [vp8_filter_l] "=&r" (vp8_filter_l), [vp8_filter_r] "=&r" (vp8_filter_r), + [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r), + [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r) + + : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), + [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r), + [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r), + [mask_l] "r" (mask_l), [mask_r] "r" (mask_r), + [hev_l] "r" (hev_l), [hev_r] "r" (hev_r), + [HWM] "r" (HWM) + ); + + /* save bottom 3 bits so that we round one side +4 and the other +3 */ + __asm__ __volatile__ ( + /* Filter2 = vp8_signed_char_clamp(vp8_filter + 3) >>= 3; */ + "addq_s.ph %[Filter1_l], %[vp8_filter_l], %[t2] \n\t" + "addq_s.ph %[Filter1_r], %[vp8_filter_r], %[t2] \n\t" + + /* Filter1 = vp8_signed_char_clamp(vp8_filter + 4) >>= 3; */ + "addq_s.ph %[Filter2_l], %[vp8_filter_l], %[t1] \n\t" + "addq_s.ph %[Filter2_r], %[vp8_filter_r], %[t1] \n\t" + "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" + "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" + + "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" + "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" + + "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" + "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" + + /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ + "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" + "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" + + /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ + "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" + "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" + + : [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r), + [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r), + [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), + [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) + + : [t1] "r" (t1), [t2] "r" (t2), + [vp8_filter_l] "r" (vp8_filter_l), [vp8_filter_r] "r" (vp8_filter_r), + [HWM] "r" (HWM) + ); + + __asm__ __volatile__ ( + /* (vp8_filter += 1) >>= 1 */ + "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" + "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" + + /* vp8_filter &= ~hev; */ + "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" + "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" + + /* vps1 = vp8_signed_char_clamp(ps1 + vp8_filter); */ + "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" + "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" + + /* vqs1 = vp8_signed_char_clamp(qs1 - vp8_filter); */ + "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" + "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" + + : [Filter1_l] "+r" (Filter1_l), [Filter1_r] "+r" (Filter1_r), + [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), + [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r) + + : [t3] "r" (t3), [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r) + ); + + /* Create quad-bytes from halfword pairs */ + vqs0_l = vqs0_l & HWM; + vqs1_l = vqs1_l & HWM; + vps0_l = vps0_l & HWM; + vps1_l = vps1_l & HWM; + + __asm__ __volatile__ ( + "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" + "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" + "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" + "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" + + : [vps1_r] "+r" (vps1_r), [vqs1_r] "+r" (vqs1_r), + [vps0_r] "+r" (vps0_r), [vqs0_r] "+r" (vqs0_r) + : + ); + + vqs0 = vqs0_l | vqs0_r; + vqs1 = vqs1_l | vqs1_r; + vps0 = vps0_l | vps0_r; + vps1 = vps1_l | vps1_r; + + *ps0 = vps0 ^ N128; + *ps1 = vps1 ^ N128; + *qs0 = vqs0 ^ N128; + *qs1 = vqs1 ^ N128; +} + +void vp8_loop_filter_horizontal_edge_mips +( + unsigned char *s, + int p, + unsigned int flimit, + unsigned int limit, + unsigned int thresh, + int count +) +{ + uint32_t mask; + uint32_t hev; + uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; + unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; + + mask = 0; + hev = 0; + p1 = 0; + p2 = 0; + p3 = 0; + p4 = 0; + + /* prefetch data for store */ + prefetch_store_lf(s); + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + + sm1 = s - (p << 2); + s0 = s - p - p - p; + s1 = s - p - p ; + s2 = s - p; + s3 = s; + s4 = s + p; + s5 = s + p + p; + s6 = s + p + p + p; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p1 = *((uint32_t *)(s1)); + p2 = *((uint32_t *)(s2)); + p3 = *((uint32_t *)(s3)); + p4 = *((uint32_t *)(s4)); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + pm1 = *((uint32_t *)(sm1)); + p0 = *((uint32_t *)(s0)); + p5 = *((uint32_t *)(s5)); + p6 = *((uint32_t *)(s6)); + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); + + /* unpack processed 4x4 neighborhood */ + *((uint32_t *)s1) = p1; + *((uint32_t *)s2) = p2; + *((uint32_t *)s3) = p3; + *((uint32_t *)s4) = p4; + } + } + + sm1 += 4; + s0 += 4; + s1 += 4; + s2 += 4; + s3 += 4; + s4 += 4; + s5 += 4; + s6 += 4; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p1 = *((uint32_t *)(s1)); + p2 = *((uint32_t *)(s2)); + p3 = *((uint32_t *)(s3)); + p4 = *((uint32_t *)(s4)); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + pm1 = *((uint32_t *)(sm1)); + p0 = *((uint32_t *)(s0)); + p5 = *((uint32_t *)(s5)); + p6 = *((uint32_t *)(s6)); + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); + + /* unpack processed 4x4 neighborhood */ + *((uint32_t *)s1) = p1; + *((uint32_t *)s2) = p2; + *((uint32_t *)s3) = p3; + *((uint32_t *)s4) = p4; + } + } + + sm1 += 4; + s0 += 4; + s1 += 4; + s2 += 4; + s3 += 4; + s4 += 4; + s5 += 4; + s6 += 4; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p1 = *((uint32_t *)(s1)); + p2 = *((uint32_t *)(s2)); + p3 = *((uint32_t *)(s3)); + p4 = *((uint32_t *)(s4)); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + pm1 = *((uint32_t *)(sm1)); + p0 = *((uint32_t *)(s0)); + p5 = *((uint32_t *)(s5)); + p6 = *((uint32_t *)(s6)); + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); + + /* unpack processed 4x4 neighborhood */ + *((uint32_t *)s1) = p1; + *((uint32_t *)s2) = p2; + *((uint32_t *)s3) = p3; + *((uint32_t *)s4) = p4; + } + } + + sm1 += 4; + s0 += 4; + s1 += 4; + s2 += 4; + s3 += 4; + s4 += 4; + s5 += 4; + s6 += 4; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p1 = *((uint32_t *)(s1)); + p2 = *((uint32_t *)(s2)); + p3 = *((uint32_t *)(s3)); + p4 = *((uint32_t *)(s4)); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + pm1 = *((uint32_t *)(sm1)); + p0 = *((uint32_t *)(s0)); + p5 = *((uint32_t *)(s5)); + p6 = *((uint32_t *)(s6)); + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); + + /* unpack processed 4x4 neighborhood */ + *((uint32_t *)s1) = p1; + *((uint32_t *)s2) = p2; + *((uint32_t *)s3) = p3; + *((uint32_t *)s4) = p4; + } + } +} + +void vp8_loop_filter_uvhorizontal_edge_mips +( + unsigned char *s, + int p, + unsigned int flimit, + unsigned int limit, + unsigned int thresh, + int count +) +{ + uint32_t mask; + uint32_t hev; + uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; + unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; + + mask = 0; + hev = 0; + p1 = 0; + p2 = 0; + p3 = 0; + p4 = 0; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + + sm1 = s - (p << 2); + s0 = s - p - p - p; + s1 = s - p - p ; + s2 = s - p; + s3 = s; + s4 = s + p; + s5 = s + p + p; + s6 = s + p + p + p; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p1 = *((uint32_t *)(s1)); + p2 = *((uint32_t *)(s2)); + p3 = *((uint32_t *)(s3)); + p4 = *((uint32_t *)(s4)); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + pm1 = *((uint32_t *)(sm1)); + p0 = *((uint32_t *)(s0)); + p5 = *((uint32_t *)(s5)); + p6 = *((uint32_t *)(s6)); + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); + + /* unpack processed 4x4 neighborhood */ + *((uint32_t *)s1) = p1; + *((uint32_t *)s2) = p2; + *((uint32_t *)s3) = p3; + *((uint32_t *)s4) = p4; + } + } + + sm1 += 4; + s0 += 4; + s1 += 4; + s2 += 4; + s3 += 4; + s4 += 4; + s5 += 4; + s6 += 4; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p1 = *((uint32_t *)(s1)); + p2 = *((uint32_t *)(s2)); + p3 = *((uint32_t *)(s3)); + p4 = *((uint32_t *)(s4)); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + pm1 = *((uint32_t *)(sm1)); + p0 = *((uint32_t *)(s0)); + p5 = *((uint32_t *)(s5)); + p6 = *((uint32_t *)(s6)); + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); + + /* unpack processed 4x4 neighborhood */ + *((uint32_t *)s1) = p1; + *((uint32_t *)s2) = p2; + *((uint32_t *)s3) = p3; + *((uint32_t *)s4) = p4; + } + } +} + +void vp8_loop_filter_vertical_edge_mips +( + unsigned char *s, + int p, + const unsigned int flimit, + const unsigned int limit, + const unsigned int thresh, + int count +) +{ + int i; + uint32_t mask, hev; + uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; + unsigned char *s1, *s2, *s3, *s4; + uint32_t prim1, prim2, sec3, sec4, prim3, prim4; + + hev = 0; + mask = 0; + i = 0; + pm1 = 0; + p0 = 0; + p1 = 0; + p2 = 0; + p3 = 0; + p4 = 0; + p5 = 0; + p6 = 0; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + + /* apply filter on 4 pixesl at the same time */ + do + { + + /* prefetch data for store */ + prefetch_store_lf(s + p); + + s1 = s; + s2 = s + p; + s3 = s2 + p; + s4 = s3 + p; + s = s4 + p; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p2 = *((uint32_t *)(s1 - 4)); + p6 = *((uint32_t *)(s1)); + p1 = *((uint32_t *)(s2 - 4)); + p5 = *((uint32_t *)(s2)); + p0 = *((uint32_t *)(s3 - 4)); + p4 = *((uint32_t *)(s3)); + pm1 = *((uint32_t *)(s4 - 4)); + p3 = *((uint32_t *)(s4)); + + /* transpose pm1, p0, p1, p2 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" + "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" + "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" + "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" + + "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" + "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" + "append %[p1], %[sec3], 16 \n\t" + "append %[pm1], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* transpose p3, p4, p5, p6 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" + "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" + "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" + "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" + + "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" + "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" + "append %[p5], %[sec3], 16 \n\t" + "append %[p3], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); + + /* unpack processed 4x4 neighborhood + * don't use transpose on output data + * because memory isn't aligned + */ + __asm__ __volatile__ ( + "sb %[p4], 1(%[s4]) \n\t" + "sb %[p3], 0(%[s4]) \n\t" + "sb %[p2], -1(%[s4]) \n\t" + "sb %[p1], -2(%[s4]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), + [p2] "r" (p2), [p1] "r" (p1) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s3]) \n\t" + "sb %[p3], 0(%[s3]) \n\t" + "sb %[p2], -1(%[s3]) \n\t" + "sb %[p1], -2(%[s3]) \n\t" + : [p1] "+r" (p1) + : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s2]) \n\t" + "sb %[p3], 0(%[s2]) \n\t" + "sb %[p2], -1(%[s2]) \n\t" + "sb %[p1], -2(%[s2]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), + [p2] "r" (p2), [p1] "r" (p1) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s1]) \n\t" + "sb %[p3], 0(%[s1]) \n\t" + "sb %[p2], -1(%[s1]) \n\t" + "sb %[p1], -2(%[s1]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), + [p2] "r" (p2), [p1] "r" (p1) + ); + } + } + + s1 = s; + s2 = s + p; + s3 = s2 + p; + s4 = s3 + p; + s = s4 + p; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p2 = *((uint32_t *)(s1 - 4)); + p6 = *((uint32_t *)(s1)); + p1 = *((uint32_t *)(s2 - 4)); + p5 = *((uint32_t *)(s2)); + p0 = *((uint32_t *)(s3 - 4)); + p4 = *((uint32_t *)(s3)); + pm1 = *((uint32_t *)(s4 - 4)); + p3 = *((uint32_t *)(s4)); + + /* transpose pm1, p0, p1, p2 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" + "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" + "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" + "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" + + "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" + "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" + "append %[p1], %[sec3], 16 \n\t" + "append %[pm1], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* transpose p3, p4, p5, p6 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" + "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" + "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" + "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" + + "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" + "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" + "append %[p5], %[sec3], 16 \n\t" + "append %[p3], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); + + /* unpack processed 4x4 neighborhood + * don't use transpose on output data + * because memory isn't aligned + */ + __asm__ __volatile__ ( + "sb %[p4], 1(%[s4]) \n\t" + "sb %[p3], 0(%[s4]) \n\t" + "sb %[p2], -1(%[s4]) \n\t" + "sb %[p1], -2(%[s4]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), + [p2] "r" (p2), [p1] "r" (p1) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s3]) \n\t" + "sb %[p3], 0(%[s3]) \n\t" + "sb %[p2], -1(%[s3]) \n\t" + "sb %[p1], -2(%[s3]) \n\t" + : [p1] "+r" (p1) + : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s2]) \n\t" + "sb %[p3], 0(%[s2]) \n\t" + "sb %[p2], -1(%[s2]) \n\t" + "sb %[p1], -2(%[s2]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), + [p2] "r" (p2), [p1] "r" (p1) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s1]) \n\t" + "sb %[p3], 0(%[s1]) \n\t" + "sb %[p2], -1(%[s1]) \n\t" + "sb %[p1], -2(%[s1]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), + [p2] "r" (p2), [p1] "r" (p1) + ); + } + } + + i += 8; + } + + while (i < count); +} + +void vp8_loop_filter_uvvertical_edge_mips +( + unsigned char *s, + int p, + unsigned int flimit, + unsigned int limit, + unsigned int thresh, + int count +) +{ + uint32_t mask, hev; + uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; + unsigned char *s1, *s2, *s3, *s4; + uint32_t prim1, prim2, sec3, sec4, prim3, prim4; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + + /* apply filter on 4 pixesl at the same time */ + + s1 = s; + s2 = s + p; + s3 = s2 + p; + s4 = s3 + p; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p2 = *((uint32_t *)(s1 - 4)); + p6 = *((uint32_t *)(s1)); + p1 = *((uint32_t *)(s2 - 4)); + p5 = *((uint32_t *)(s2)); + p0 = *((uint32_t *)(s3 - 4)); + p4 = *((uint32_t *)(s3)); + pm1 = *((uint32_t *)(s4 - 4)); + p3 = *((uint32_t *)(s4)); + + /* transpose pm1, p0, p1, p2 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" + "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" + "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" + "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" + + "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" + "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" + "append %[p1], %[sec3], 16 \n\t" + "append %[pm1], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* transpose p3, p4, p5, p6 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" + "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" + "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" + "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" + + "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" + "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" + "append %[p5], %[sec3], 16 \n\t" + "append %[p3], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); + + /* unpack processed 4x4 neighborhood + * don't use transpose on output data + * because memory isn't aligned + */ + __asm__ __volatile__ ( + "sb %[p4], 1(%[s4]) \n\t" + "sb %[p3], 0(%[s4]) \n\t" + "sb %[p2], -1(%[s4]) \n\t" + "sb %[p1], -2(%[s4]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), + [p2] "r" (p2), [p1] "r" (p1) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s3]) \n\t" + "sb %[p3], 0(%[s3]) \n\t" + "sb %[p2], -1(%[s3]) \n\t" + "sb %[p1], -2(%[s3]) \n\t" + : [p1] "+r" (p1) + : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s2]) \n\t" + "sb %[p3], 0(%[s2]) \n\t" + "sb %[p2], -1(%[s2]) \n\t" + "sb %[p1], -2(%[s2]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), + [p2] "r" (p2), [p1] "r" (p1) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s1]) \n\t" + "sb %[p3], 0(%[s1]) \n\t" + "sb %[p2], -1(%[s1]) \n\t" + "sb %[p1], -2(%[s1]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), [p2] "r" (p2), [p1] "r" (p1) + ); + } + } + + s1 = s4 + p; + s2 = s1 + p; + s3 = s2 + p; + s4 = s3 + p; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p2 = *((uint32_t *)(s1 - 4)); + p6 = *((uint32_t *)(s1)); + p1 = *((uint32_t *)(s2 - 4)); + p5 = *((uint32_t *)(s2)); + p0 = *((uint32_t *)(s3 - 4)); + p4 = *((uint32_t *)(s3)); + pm1 = *((uint32_t *)(s4 - 4)); + p3 = *((uint32_t *)(s4)); + + /* transpose pm1, p0, p1, p2 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" + "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" + "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" + "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" + + "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" + "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" + "append %[p1], %[sec3], 16 \n\t" + "append %[pm1], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* transpose p3, p4, p5, p6 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" + "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" + "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" + "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" + + "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" + "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" + "append %[p5], %[sec3], 16 \n\t" + "append %[p3], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); + + /* unpack processed 4x4 neighborhood + * don't use transpose on output data + * because memory isn't aligned + */ + __asm__ __volatile__ ( + "sb %[p4], 1(%[s4]) \n\t" + "sb %[p3], 0(%[s4]) \n\t" + "sb %[p2], -1(%[s4]) \n\t" + "sb %[p1], -2(%[s4]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), + [p2] "r" (p2), [p1] "r" (p1) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s3]) \n\t" + "sb %[p3], 0(%[s3]) \n\t" + "sb %[p2], -1(%[s3]) \n\t" + "sb %[p1], -2(%[s3]) \n\t" + : [p1] "+r" (p1) + : [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), [p2] "r" (p2) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s2]) \n\t" + "sb %[p3], 0(%[s2]) \n\t" + "sb %[p2], -1(%[s2]) \n\t" + "sb %[p1], -2(%[s2]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), + [p2] "r" (p2), [p1] "r" (p1) + ); + + __asm__ __volatile__ ( + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) + : + ); + + __asm__ __volatile__ ( + "sb %[p4], 1(%[s1]) \n\t" + "sb %[p3], 0(%[s1]) \n\t" + "sb %[p2], -1(%[s1]) \n\t" + "sb %[p1], -2(%[s1]) \n\t" + : + : [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), + [p2] "r" (p2), [p1] "r" (p1) + ); + } + } +} + +/* inputs & outputs are quad-byte vectors */ +static __inline void vp8_mbfilter_mips +( + uint32_t mask, + uint32_t hev, + uint32_t *ps2, + uint32_t *ps1, + uint32_t *ps0, + uint32_t *qs0, + uint32_t *qs1, + uint32_t *qs2 +) +{ + int32_t vps2, vps1, vps0, vqs0, vqs1, vqs2; + int32_t vps2_l, vps1_l, vps0_l, vqs0_l, vqs1_l, vqs2_l; + int32_t vps2_r, vps1_r, vps0_r, vqs0_r, vqs1_r, vqs2_r; + uint32_t HWM, vp8_filter_l, vp8_filter_r, mask_l, mask_r, hev_l, hev_r, subr_r, subr_l; + uint32_t Filter2_l, Filter2_r, t1, t2, Filter1_l, Filter1_r, invhev_l, invhev_r; + uint32_t N128, R63; + uint32_t u1_l, u1_r, u2_l, u2_r, u3_l, u3_r; + + R63 = 0x003F003F; + HWM = 0xFF00FF00; + N128 = 0x80808080; + t1 = 0x03000300; + t2 = 0x04000400; + + vps0 = (*ps0) ^ N128; + vps1 = (*ps1) ^ N128; + vps2 = (*ps2) ^ N128; + vqs0 = (*qs0) ^ N128; + vqs1 = (*qs1) ^ N128; + vqs2 = (*qs2) ^ N128; + + /* use halfword pairs instead quad-bytes because of accuracy */ + vps0_l = vps0 & HWM; + vps0_r = vps0 << 8; + vps0_r = vps0_r & HWM; + + vqs0_l = vqs0 & HWM; + vqs0_r = vqs0 << 8; + vqs0_r = vqs0_r & HWM; + + vps1_l = vps1 & HWM; + vps1_r = vps1 << 8; + vps1_r = vps1_r & HWM; + + vqs1_l = vqs1 & HWM; + vqs1_r = vqs1 << 8; + vqs1_r = vqs1_r & HWM; + + vqs2_l = vqs2 & HWM; + vqs2_r = vqs2 << 8; + vqs2_r = vqs2_r & HWM; + + __asm__ __volatile__ ( + /* qs0 - ps0 */ + "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" + "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" + + /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */ + "subq_s.ph %[vp8_filter_l], %[vps1_l], %[vqs1_l] \n\t" + "subq_s.ph %[vp8_filter_r], %[vps1_r], %[vqs1_r] \n\t" + + : [vp8_filter_l] "=&r" (vp8_filter_l), [vp8_filter_r] "=r" (vp8_filter_r), + [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r) + : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), + [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r), + [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r) + ); + + vps2_l = vps2 & HWM; + vps2_r = vps2 << 8; + vps2_r = vps2_r & HWM; + + /* add outer taps if we have high edge variance */ + __asm__ __volatile__ ( + /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */ + "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" + "and %[mask_l], %[HWM], %[mask] \n\t" + "sll %[mask_r], %[mask], 8 \n\t" + "and %[mask_r], %[HWM], %[mask_r] \n\t" + "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" + "and %[hev_l], %[HWM], %[hev] \n\t" + "sll %[hev_r], %[hev], 8 \n\t" + "and %[hev_r], %[HWM], %[hev_r] \n\t" + "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" + + /* vp8_filter &= mask; */ + "and %[vp8_filter_l], %[vp8_filter_l], %[mask_l] \n\t" + "and %[vp8_filter_r], %[vp8_filter_r], %[mask_r] \n\t" + + /* Filter2 = vp8_filter & hev; */ + "and %[Filter2_l], %[vp8_filter_l], %[hev_l] \n\t" + "and %[Filter2_r], %[vp8_filter_r], %[hev_r] \n\t" + + : [vp8_filter_l] "+r" (vp8_filter_l), [vp8_filter_r] "+r" (vp8_filter_r), + [hev_l] "=&r" (hev_l), [hev_r] "=&r" (hev_r), + [mask_l] "=&r" (mask_l), [mask_r] "=&r" (mask_r), + [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r) + : [subr_l] "r" (subr_l), [subr_r] "r" (subr_r), + [HWM] "r" (HWM), [hev] "r" (hev), [mask] "r" (mask) + ); + + /* save bottom 3 bits so that we round one side +4 and the other +3 */ + __asm__ __volatile__ ( + /* Filter1 = vp8_signed_char_clamp(Filter2 + 4) >>= 3; */ + "addq_s.ph %[Filter1_l], %[Filter2_l], %[t2] \n\t" + "xor %[invhev_l], %[hev_l], %[HWM] \n\t" + "addq_s.ph %[Filter1_r], %[Filter2_r], %[t2] \n\t" + + /* Filter2 = vp8_signed_char_clamp(Filter2 + 3) >>= 3; */ + "addq_s.ph %[Filter2_l], %[Filter2_l], %[t1] \n\t" + "addq_s.ph %[Filter2_r], %[Filter2_r], %[t1] \n\t" + + "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" + "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" + + "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" + "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" + "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" + "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" + "xor %[invhev_r], %[hev_r], %[HWM] \n\t" + + /* qs0 = vp8_signed_char_clamp(qs0 - Filter1); */ + "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" + "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" + + /* ps0 = vp8_signed_char_clamp(ps0 + Filter2); */ + "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" + "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" + + : [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r), + [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r), + [Filter2_l] "+r" (Filter2_l), [Filter2_r] "+r" (Filter2_r), + [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), + [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) + : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM), + [hev_l] "r" (hev_l), [hev_r] "r" (hev_r) + ); + + /* only apply wider filter if not high edge variance */ + __asm__ __volatile__ ( + /* vp8_filter &= ~hev; */ + "and %[Filter2_l], %[vp8_filter_l], %[invhev_l] \n\t" + "and %[Filter2_r], %[vp8_filter_r], %[invhev_r] \n\t" + + "shra.ph %[Filter2_l], %[Filter2_l], 8 \n\t" + "shra.ph %[Filter2_r], %[Filter2_r], 8 \n\t" + + : [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r) + : [vp8_filter_l] "r" (vp8_filter_l), [vp8_filter_r] "r" (vp8_filter_r), + [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r) + ); + + /* roughly 3/7th difference across boundary */ + __asm__ __volatile__ ( + "shll.ph %[u3_l], %[Filter2_l], 3 \n\t" + "shll.ph %[u3_r], %[Filter2_r], 3 \n\t" + + "addq.ph %[u3_l], %[u3_l], %[Filter2_l] \n\t" + "addq.ph %[u3_r], %[u3_r], %[Filter2_r] \n\t" + + "shll.ph %[u2_l], %[u3_l], 1 \n\t" + "shll.ph %[u2_r], %[u3_r], 1 \n\t" + + "addq.ph %[u1_l], %[u3_l], %[u2_l] \n\t" + "addq.ph %[u1_r], %[u3_r], %[u2_r] \n\t" + + "addq.ph %[u2_l], %[u2_l], %[R63] \n\t" + "addq.ph %[u2_r], %[u2_r], %[R63] \n\t" + + "addq.ph %[u3_l], %[u3_l], %[R63] \n\t" + "addq.ph %[u3_r], %[u3_r], %[R63] \n\t" + + /* vp8_signed_char_clamp((63 + Filter2 * 27) >> 7) + * vp8_signed_char_clamp((63 + Filter2 * 18) >> 7) + */ + "addq.ph %[u1_l], %[u1_l], %[R63] \n\t" + "addq.ph %[u1_r], %[u1_r], %[R63] \n\t" + "shra.ph %[u1_l], %[u1_l], 7 \n\t" + "shra.ph %[u1_r], %[u1_r], 7 \n\t" + "shra.ph %[u2_l], %[u2_l], 7 \n\t" + "shra.ph %[u2_r], %[u2_r], 7 \n\t" + "shll.ph %[u1_l], %[u1_l], 8 \n\t" + "shll.ph %[u1_r], %[u1_r], 8 \n\t" + "shll.ph %[u2_l], %[u2_l], 8 \n\t" + "shll.ph %[u2_r], %[u2_r], 8 \n\t" + + /* vqs0 = vp8_signed_char_clamp(qs0 - u); */ + "subq_s.ph %[vqs0_l], %[vqs0_l], %[u1_l] \n\t" + "subq_s.ph %[vqs0_r], %[vqs0_r], %[u1_r] \n\t" + + /* vps0 = vp8_signed_char_clamp(ps0 + u); */ + "addq_s.ph %[vps0_l], %[vps0_l], %[u1_l] \n\t" + "addq_s.ph %[vps0_r], %[vps0_r], %[u1_r] \n\t" + + : [u1_l] "=&r" (u1_l), [u1_r] "=&r" (u1_r), [u2_l] "=&r" (u2_l), + [u2_r] "=&r" (u2_r), [u3_l] "=&r" (u3_l), [u3_r] "=&r" (u3_r), + [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), + [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) + : [R63] "r" (R63), + [Filter2_l] "r" (Filter2_l), [Filter2_r] "r" (Filter2_r) + ); + + __asm__ __volatile__ ( + /* vqs1 = vp8_signed_char_clamp(qs1 - u); */ + "subq_s.ph %[vqs1_l], %[vqs1_l], %[u2_l] \n\t" + "addq_s.ph %[vps1_l], %[vps1_l], %[u2_l] \n\t" + + /* vps1 = vp8_signed_char_clamp(ps1 + u); */ + "addq_s.ph %[vps1_r], %[vps1_r], %[u2_r] \n\t" + "subq_s.ph %[vqs1_r], %[vqs1_r], %[u2_r] \n\t" + + : [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), + [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r) + : [u2_l] "r" (u2_l), [u2_r] "r" (u2_r) + ); + + /* roughly 1/7th difference across boundary */ + __asm__ __volatile__ ( + /* u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); */ + "shra.ph %[u3_l], %[u3_l], 7 \n\t" + "shra.ph %[u3_r], %[u3_r], 7 \n\t" + "shll.ph %[u3_l], %[u3_l], 8 \n\t" + "shll.ph %[u3_r], %[u3_r], 8 \n\t" + + /* vqs2 = vp8_signed_char_clamp(qs2 - u); */ + "subq_s.ph %[vqs2_l], %[vqs2_l], %[u3_l] \n\t" + "subq_s.ph %[vqs2_r], %[vqs2_r], %[u3_r] \n\t" + + /* vps2 = vp8_signed_char_clamp(ps2 + u); */ + "addq_s.ph %[vps2_l], %[vps2_l], %[u3_l] \n\t" + "addq_s.ph %[vps2_r], %[vps2_r], %[u3_r] \n\t" + + : [u3_l] "+r" (u3_l), [u3_r] "+r" (u3_r), [vps2_l] "+r" (vps2_l), + [vps2_r] "+r" (vps2_r), [vqs2_l] "+r" (vqs2_l), [vqs2_r] "+r" (vqs2_r) + : + ); + + /* Create quad-bytes from halfword pairs */ + __asm__ __volatile__ ( + "and %[vqs0_l], %[vqs0_l], %[HWM] \n\t" + "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" + + "and %[vps0_l], %[vps0_l], %[HWM] \n\t" + "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" + + "and %[vqs1_l], %[vqs1_l], %[HWM] \n\t" + "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" + + "and %[vps1_l], %[vps1_l], %[HWM] \n\t" + "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" + + "and %[vqs2_l], %[vqs2_l], %[HWM] \n\t" + "shrl.ph %[vqs2_r], %[vqs2_r], 8 \n\t" + + "and %[vps2_l], %[vps2_l], %[HWM] \n\t" + "shrl.ph %[vps2_r], %[vps2_r], 8 \n\t" + + "or %[vqs0_r], %[vqs0_l], %[vqs0_r] \n\t" + "or %[vps0_r], %[vps0_l], %[vps0_r] \n\t" + "or %[vqs1_r], %[vqs1_l], %[vqs1_r] \n\t" + "or %[vps1_r], %[vps1_l], %[vps1_r] \n\t" + "or %[vqs2_r], %[vqs2_l], %[vqs2_r] \n\t" + "or %[vps2_r], %[vps2_l], %[vps2_r] \n\t" + + : [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), [vqs1_l] "+r" (vqs1_l), + [vqs1_r] "+r" (vqs1_r), [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), + [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r), [vqs2_l] "+r" (vqs2_l), + [vqs2_r] "+r" (vqs2_r), [vps2_r] "+r" (vps2_r), [vps2_l] "+r" (vps2_l) + : [HWM] "r" (HWM) + ); + + *ps0 = vps0_r ^ N128; + *ps1 = vps1_r ^ N128; + *ps2 = vps2_r ^ N128; + *qs0 = vqs0_r ^ N128; + *qs1 = vqs1_r ^ N128; + *qs2 = vqs2_r ^ N128; +} + +void vp8_mbloop_filter_horizontal_edge_mips +( + unsigned char *s, + int p, + unsigned int flimit, + unsigned int limit, + unsigned int thresh, + int count +) +{ + int i; + uint32_t mask, hev; + uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; + unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; + + mask = 0; + hev = 0; + i = 0; + p1 = 0; + p2 = 0; + p3 = 0; + p4 = 0; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + + sm1 = s - (p << 2); + s0 = s - p - p - p; + s1 = s - p - p; + s2 = s - p; + s3 = s; + s4 = s + p; + s5 = s + p + p; + s6 = s + p + p + p; + + /* prefetch data for load */ + prefetch_load_lf(s + p); + + /* apply filter on 4 pixesl at the same time */ + do + { + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p1 = *((uint32_t *)(s1)); + p2 = *((uint32_t *)(s2)); + p3 = *((uint32_t *)(s3)); + p4 = *((uint32_t *)(s4)); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + pm1 = *((uint32_t *)(sm1)); + p0 = *((uint32_t *)(s0)); + p5 = *((uint32_t *)(s5)); + p6 = *((uint32_t *)(s6)); + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); + + /* unpack processed 4x4 neighborhood + * memory is 4 byte aligned + */ + *((uint32_t *)s0) = p0; + *((uint32_t *)s1) = p1; + *((uint32_t *)s2) = p2; + *((uint32_t *)s3) = p3; + *((uint32_t *)s4) = p4; + *((uint32_t *)s5) = p5; + } + } + + sm1 += 4; + s0 += 4; + s1 += 4; + s2 += 4; + s3 += 4; + s4 += 4; + s5 += 4; + s6 += 4; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p1 = *((uint32_t *)(s1)); + p2 = *((uint32_t *)(s2)); + p3 = *((uint32_t *)(s3)); + p4 = *((uint32_t *)(s4)); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + pm1 = *((uint32_t *)(sm1)); + p0 = *((uint32_t *)(s0)); + p5 = *((uint32_t *)(s5)); + p6 = *((uint32_t *)(s6)); + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); + + /* unpack processed 4x4 neighborhood + * memory is 4 byte aligned + */ + *((uint32_t *)s0) = p0; + *((uint32_t *)s1) = p1; + *((uint32_t *)s2) = p2; + *((uint32_t *)s3) = p3; + *((uint32_t *)s4) = p4; + *((uint32_t *)s5) = p5; + } + } + + sm1 += 4; + s0 += 4; + s1 += 4; + s2 += 4; + s3 += 4; + s4 += 4; + s5 += 4; + s6 += 4; + + i += 8; + } + + while (i < count); +} + +void vp8_mbloop_filter_uvhorizontal_edge_mips +( + unsigned char *s, + int p, + unsigned int flimit, + unsigned int limit, + unsigned int thresh, + int count +) +{ + uint32_t mask, hev; + uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; + unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; + + mask = 0; + hev = 0; + p1 = 0; + p2 = 0; + p3 = 0; + p4 = 0; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + + sm1 = s - (p << 2); + s0 = s - p - p - p; + s1 = s - p - p; + s2 = s - p; + s3 = s; + s4 = s + p; + s5 = s + p + p; + s6 = s + p + p + p; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p1 = *((uint32_t *)(s1)); + p2 = *((uint32_t *)(s2)); + p3 = *((uint32_t *)(s3)); + p4 = *((uint32_t *)(s4)); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + pm1 = *((uint32_t *)(sm1)); + p0 = *((uint32_t *)(s0)); + p5 = *((uint32_t *)(s5)); + p6 = *((uint32_t *)(s6)); + + /* if mask == 0 do filtering is not needed */ + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + if (mask) + { + /* filtering */ + vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); + + /* unpack processed 4x4 neighborhood + * memory is 4 byte aligned + */ + *((uint32_t *)s0) = p0; + *((uint32_t *)s1) = p1; + *((uint32_t *)s2) = p2; + *((uint32_t *)s3) = p3; + *((uint32_t *)s4) = p4; + *((uint32_t *)s5) = p5; + } + } + + sm1 += 4; + s0 += 4; + s1 += 4; + s2 += 4; + s3 += 4; + s4 += 4; + s5 += 4; + s6 += 4; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p1 = *((uint32_t *)(s1)); + p2 = *((uint32_t *)(s2)); + p3 = *((uint32_t *)(s3)); + p4 = *((uint32_t *)(s4)); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + pm1 = *((uint32_t *)(sm1)); + p0 = *((uint32_t *)(s0)); + p5 = *((uint32_t *)(s5)); + p6 = *((uint32_t *)(s6)); + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); + + /* unpack processed 4x4 neighborhood + * memory is 4 byte aligned + */ + *((uint32_t *)s0) = p0; + *((uint32_t *)s1) = p1; + *((uint32_t *)s2) = p2; + *((uint32_t *)s3) = p3; + *((uint32_t *)s4) = p4; + *((uint32_t *)s5) = p5; + } + } +} + + +void vp8_mbloop_filter_vertical_edge_mips +( + unsigned char *s, + int p, + unsigned int flimit, + unsigned int limit, + unsigned int thresh, + int count +) +{ + + int i; + uint32_t mask, hev; + uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; + unsigned char *s1, *s2, *s3, *s4; + uint32_t prim1, prim2, sec3, sec4, prim3, prim4; + + mask = 0; + hev = 0; + i = 0; + pm1 = 0; + p0 = 0; + p1 = 0; + p2 = 0; + p3 = 0; + p4 = 0; + p5 = 0; + p6 = 0; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + + /* apply filter on 4 pixesl at the same time */ + do + { + s1 = s; + s2 = s + p; + s3 = s2 + p; + s4 = s3 + p; + s = s4 + p; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p2 = *((uint32_t *)(s1 - 4)); + p6 = *((uint32_t *)(s1)); + p1 = *((uint32_t *)(s2 - 4)); + p5 = *((uint32_t *)(s2)); + p0 = *((uint32_t *)(s3 - 4)); + p4 = *((uint32_t *)(s3)); + pm1 = *((uint32_t *)(s4 - 4)); + p3 = *((uint32_t *)(s4)); + + /* transpose pm1, p0, p1, p2 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" + "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" + "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" + "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" + + "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" + "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" + "append %[p1], %[sec3], 16 \n\t" + "append %[pm1], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* transpose p3, p4, p5, p6 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" + "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" + "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" + "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" + + "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" + "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" + "append %[p5], %[sec3], 16 \n\t" + "append %[p3], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); + + /* don't use transpose on output data + * because memory isn't aligned + */ + __asm__ __volatile__ ( + "sb %[p5], 2(%[s4]) \n\t" + "sb %[p4], 1(%[s4]) \n\t" + "sb %[p3], 0(%[s4]) \n\t" + "sb %[p2], -1(%[s4]) \n\t" + "sb %[p1], -2(%[s4]) \n\t" + "sb %[p0], -3(%[s4]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + + __asm__ __volatile__ ( + "srl %[p5], %[p5], 8 \n\t" + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + "srl %[p0], %[p0], 8 \n\t" + : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) + : + ); + + __asm__ __volatile__ ( + "sb %[p5], 2(%[s3]) \n\t" + "sb %[p4], 1(%[s3]) \n\t" + "sb %[p3], 0(%[s3]) \n\t" + "sb %[p2], -1(%[s3]) \n\t" + "sb %[p1], -2(%[s3]) \n\t" + "sb %[p0], -3(%[s3]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + + __asm__ __volatile__ ( + "srl %[p5], %[p5], 8 \n\t" + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + "srl %[p0], %[p0], 8 \n\t" + : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) + : + ); + + __asm__ __volatile__ ( + "sb %[p5], 2(%[s2]) \n\t" + "sb %[p4], 1(%[s2]) \n\t" + "sb %[p3], 0(%[s2]) \n\t" + "sb %[p2], -1(%[s2]) \n\t" + "sb %[p1], -2(%[s2]) \n\t" + "sb %[p0], -3(%[s2]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + + __asm__ __volatile__ ( + "srl %[p5], %[p5], 8 \n\t" + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + "srl %[p0], %[p0], 8 \n\t" + : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) + : + ); + + __asm__ __volatile__ ( + "sb %[p5], 2(%[s1]) \n\t" + "sb %[p4], 1(%[s1]) \n\t" + "sb %[p3], 0(%[s1]) \n\t" + "sb %[p2], -1(%[s1]) \n\t" + "sb %[p1], -2(%[s1]) \n\t" + "sb %[p0], -3(%[s1]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + } + } + + i += 4; + } + + while (i < count); +} + +void vp8_mbloop_filter_uvvertical_edge_mips +( + unsigned char *s, + int p, + unsigned int flimit, + unsigned int limit, + unsigned int thresh, + int count +) +{ + uint32_t mask, hev; + uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; + unsigned char *s1, *s2, *s3, *s4; + uint32_t prim1, prim2, sec3, sec4, prim3, prim4; + + mask = 0; + hev = 0; + pm1 = 0; + p0 = 0; + p1 = 0; + p2 = 0; + p3 = 0; + p4 = 0; + p5 = 0; + p6 = 0; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + + /* apply filter on 4 pixesl at the same time */ + + s1 = s; + s2 = s + p; + s3 = s2 + p; + s4 = s3 + p; + + /* prefetch data for load */ + prefetch_load_lf(s + 2 * p); + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p2 = *((uint32_t *)(s1 - 4)); + p6 = *((uint32_t *)(s1)); + p1 = *((uint32_t *)(s2 - 4)); + p5 = *((uint32_t *)(s2)); + p0 = *((uint32_t *)(s3 - 4)); + p4 = *((uint32_t *)(s3)); + pm1 = *((uint32_t *)(s4 - 4)); + p3 = *((uint32_t *)(s4)); + + /* transpose pm1, p0, p1, p2 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" + "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" + "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" + "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" + + "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" + "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" + "append %[p1], %[sec3], 16 \n\t" + "append %[pm1], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* transpose p3, p4, p5, p6 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" + "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" + "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" + "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" + + "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" + "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" + "append %[p5], %[sec3], 16 \n\t" + "append %[p3], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, + thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); + + /* don't use transpose on output data + * because memory isn't aligned + */ + __asm__ __volatile__ ( + "sb %[p5], 2(%[s4]) \n\t" + "sb %[p4], 1(%[s4]) \n\t" + "sb %[p3], 0(%[s4]) \n\t" + "sb %[p2], -1(%[s4]) \n\t" + "sb %[p1], -2(%[s4]) \n\t" + "sb %[p0], -3(%[s4]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + + __asm__ __volatile__ ( + "srl %[p5], %[p5], 8 \n\t" + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + "srl %[p0], %[p0], 8 \n\t" + : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) + : + ); + + __asm__ __volatile__ ( + "sb %[p5], 2(%[s3]) \n\t" + "sb %[p4], 1(%[s3]) \n\t" + "sb %[p3], 0(%[s3]) \n\t" + "sb %[p2], -1(%[s3]) \n\t" + "sb %[p1], -2(%[s3]) \n\t" + "sb %[p0], -3(%[s3]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + + __asm__ __volatile__ ( + "srl %[p5], %[p5], 8 \n\t" + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + "srl %[p0], %[p0], 8 \n\t" + : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) + : + ); + + __asm__ __volatile__ ( + "sb %[p5], 2(%[s2]) \n\t" + "sb %[p4], 1(%[s2]) \n\t" + "sb %[p3], 0(%[s2]) \n\t" + "sb %[p2], -1(%[s2]) \n\t" + "sb %[p1], -2(%[s2]) \n\t" + "sb %[p0], -3(%[s2]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + + __asm__ __volatile__ ( + "srl %[p5], %[p5], 8 \n\t" + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + "srl %[p0], %[p0], 8 \n\t" + : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) + : + ); + + __asm__ __volatile__ ( + "sb %[p5], 2(%[s1]) \n\t" + "sb %[p4], 1(%[s1]) \n\t" + "sb %[p3], 0(%[s1]) \n\t" + "sb %[p2], -1(%[s1]) \n\t" + "sb %[p1], -2(%[s1]) \n\t" + "sb %[p0], -3(%[s1]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + } + } + + s1 = s4 + p; + s2 = s1 + p; + s3 = s2 + p; + s4 = s3 + p; + + /* load quad-byte vectors + * memory is 4 byte aligned + */ + p2 = *((uint32_t *)(s1 - 4)); + p6 = *((uint32_t *)(s1)); + p1 = *((uint32_t *)(s2 - 4)); + p5 = *((uint32_t *)(s2)); + p0 = *((uint32_t *)(s3 - 4)); + p4 = *((uint32_t *)(s3)); + pm1 = *((uint32_t *)(s4 - 4)); + p3 = *((uint32_t *)(s4)); + + /* transpose pm1, p0, p1, p2 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" + "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" + "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" + "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" + + "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" + "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" + "append %[p1], %[sec3], 16 \n\t" + "append %[pm1], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* transpose p3, p4, p5, p6 */ + __asm__ __volatile__ ( + "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" + "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" + "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" + "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" + + "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" + "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" + "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" + "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" + + "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" + "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" + "append %[p5], %[sec3], 16 \n\t" + "append %[p3], %[sec4], 16 \n\t" + + : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), + [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), + [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) + : + ); + + /* if (p1 - p4 == 0) and (p2 - p3 == 0) + * mask will be zero and filtering is not needed + */ + if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) + { + + vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); + + /* if mask == 0 do filtering is not needed */ + if (mask) + { + /* filtering */ + vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); + + /* don't use transpose on output data + * because memory isn't aligned + */ + __asm__ __volatile__ ( + "sb %[p5], 2(%[s4]) \n\t" + "sb %[p4], 1(%[s4]) \n\t" + "sb %[p3], 0(%[s4]) \n\t" + "sb %[p2], -1(%[s4]) \n\t" + "sb %[p1], -2(%[s4]) \n\t" + "sb %[p0], -3(%[s4]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s4] "r" (s4), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + + __asm__ __volatile__ ( + "srl %[p5], %[p5], 8 \n\t" + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + "srl %[p0], %[p0], 8 \n\t" + : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) + : + ); + + __asm__ __volatile__ ( + "sb %[p5], 2(%[s3]) \n\t" + "sb %[p4], 1(%[s3]) \n\t" + "sb %[p3], 0(%[s3]) \n\t" + "sb %[p2], -1(%[s3]) \n\t" + "sb %[p1], -2(%[s3]) \n\t" + "sb %[p0], -3(%[s3]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s3] "r" (s3), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + + __asm__ __volatile__ ( + "srl %[p5], %[p5], 8 \n\t" + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + "srl %[p0], %[p0], 8 \n\t" + : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) + : + ); + + __asm__ __volatile__ ( + "sb %[p5], 2(%[s2]) \n\t" + "sb %[p4], 1(%[s2]) \n\t" + "sb %[p3], 0(%[s2]) \n\t" + "sb %[p2], -1(%[s2]) \n\t" + "sb %[p1], -2(%[s2]) \n\t" + "sb %[p0], -3(%[s2]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s2] "r" (s2), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + + __asm__ __volatile__ ( + "srl %[p5], %[p5], 8 \n\t" + "srl %[p4], %[p4], 8 \n\t" + "srl %[p3], %[p3], 8 \n\t" + "srl %[p2], %[p2], 8 \n\t" + "srl %[p1], %[p1], 8 \n\t" + "srl %[p0], %[p0], 8 \n\t" + : [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), + [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0) + : + ); + + __asm__ __volatile__ ( + "sb %[p5], 2(%[s1]) \n\t" + "sb %[p4], 1(%[s1]) \n\t" + "sb %[p3], 0(%[s1]) \n\t" + "sb %[p2], -1(%[s1]) \n\t" + "sb %[p1], -2(%[s1]) \n\t" + "sb %[p0], -3(%[s1]) \n\t" + : + : [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), [s1] "r" (s1), + [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0) + ); + } + } +} + +/* Horizontal MB filtering */ +void vp8_loop_filter_mbh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + unsigned int thresh_vec, flimit_vec, limit_vec; + unsigned char thresh, flimit, limit, flimit_temp; + + /* use direct value instead pointers */ + limit = *(lfi->lim); + flimit_temp = *(lfi->mblim); + thresh = *(lfi->hev_thr); + flimit = flimit_temp; + + /* create quad-byte */ + __asm__ __volatile__ ( + "replv.qb %[thresh_vec], %[thresh] \n\t" + "replv.qb %[flimit_vec], %[flimit] \n\t" + "replv.qb %[limit_vec], %[limit] \n\t" + : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) + : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) + ); + + vp8_mbloop_filter_horizontal_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16); + + if (u_ptr) + { + vp8_mbloop_filter_uvhorizontal_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); + } + + if (v_ptr) + { + vp8_mbloop_filter_uvhorizontal_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); + } +} + + +/* Vertical MB Filtering */ +void vp8_loop_filter_mbv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + unsigned int thresh_vec, flimit_vec, limit_vec; + unsigned char thresh, flimit, limit, flimit_temp; + + /* use direct value instead pointers */ + limit = *(lfi->lim); + flimit_temp = *(lfi->mblim); + thresh = *(lfi->hev_thr); + flimit = flimit_temp; + + /* create quad-byte */ + __asm__ __volatile__ ( + "replv.qb %[thresh_vec], %[thresh] \n\t" + "replv.qb %[flimit_vec], %[flimit] \n\t" + "replv.qb %[limit_vec], %[limit] \n\t" + : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) + : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) + ); + + vp8_mbloop_filter_vertical_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16); + + if (u_ptr) + vp8_mbloop_filter_uvvertical_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); + + if (v_ptr) + vp8_mbloop_filter_uvvertical_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); +} + + +/* Horizontal B Filtering */ +void vp8_loop_filter_bh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + unsigned int thresh_vec, flimit_vec, limit_vec; + unsigned char thresh, flimit, limit, flimit_temp; + + /* use direct value instead pointers */ + limit = *(lfi->lim); + flimit_temp = *(lfi->blim); + thresh = *(lfi->hev_thr); + flimit = flimit_temp; + + /* create quad-byte */ + __asm__ __volatile__ ( + "replv.qb %[thresh_vec], %[thresh] \n\t" + "replv.qb %[flimit_vec], %[flimit] \n\t" + "replv.qb %[limit_vec], %[limit] \n\t" + : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) + : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) + ); + + vp8_loop_filter_horizontal_edge_mips(y_ptr + 4 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); + vp8_loop_filter_horizontal_edge_mips(y_ptr + 8 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); + vp8_loop_filter_horizontal_edge_mips(y_ptr + 12 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); + + if (u_ptr) + vp8_loop_filter_uvhorizontal_edge_mips(u_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); + + if (v_ptr) + vp8_loop_filter_uvhorizontal_edge_mips(v_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); +} + + +/* Vertical B Filtering */ +void vp8_loop_filter_bv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + unsigned int thresh_vec, flimit_vec, limit_vec; + unsigned char thresh, flimit, limit, flimit_temp; + + /* use direct value instead pointers */ + limit = *(lfi->lim); + flimit_temp = *(lfi->blim); + thresh = *(lfi->hev_thr); + flimit = flimit_temp; + + /* create quad-byte */ + __asm__ __volatile__ ( + "replv.qb %[thresh_vec], %[thresh] \n\t" + "replv.qb %[flimit_vec], %[flimit] \n\t" + "replv.qb %[limit_vec], %[limit] \n\t" + : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), [limit_vec] "=r" (limit_vec) + : [thresh] "r" (thresh), [flimit] "r" (flimit), [limit] "r" (limit) + ); + + vp8_loop_filter_vertical_edge_mips(y_ptr + 4, y_stride, flimit_vec, limit_vec, thresh_vec, 16); + vp8_loop_filter_vertical_edge_mips(y_ptr + 8, y_stride, flimit_vec, limit_vec, thresh_vec, 16); + vp8_loop_filter_vertical_edge_mips(y_ptr + 12, y_stride, flimit_vec, limit_vec, thresh_vec, 16); + + if (u_ptr) + vp8_loop_filter_uvvertical_edge_mips(u_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); + + if (v_ptr) + vp8_loop_filter_uvvertical_edge_mips(v_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); +} + +#endif diff --git a/vp8/common/mips/dspr2/reconinter_dspr2.c b/vp8/common/mips/dspr2/reconinter_dspr2.c new file mode 100644 index 0000000..a5239a3 --- /dev/null +++ b/vp8/common/mips/dspr2/reconinter_dspr2.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vpx_rtcd.h" +#include "vpx/vpx_integer.h" + +#if HAVE_DSPR2 +inline void prefetch_load_int(unsigned char *src) +{ + __asm__ __volatile__ ( + "pref 0, 0(%[src]) \n\t" + : + : [src] "r" (src) + ); +} + + +__inline void vp8_copy_mem16x16_dspr2( + unsigned char *RESTRICT src, + int src_stride, + unsigned char *RESTRICT dst, + int dst_stride) +{ + int r; + unsigned int a0, a1, a2, a3; + + for (r = 16; r--;) + { + /* load src data in cache memory */ + prefetch_load_int(src + src_stride); + + /* use unaligned memory load and store */ + __asm__ __volatile__ ( + "ulw %[a0], 0(%[src]) \n\t" + "ulw %[a1], 4(%[src]) \n\t" + "ulw %[a2], 8(%[src]) \n\t" + "ulw %[a3], 12(%[src]) \n\t" + "sw %[a0], 0(%[dst]) \n\t" + "sw %[a1], 4(%[dst]) \n\t" + "sw %[a2], 8(%[dst]) \n\t" + "sw %[a3], 12(%[dst]) \n\t" + : [a0] "=&r" (a0), [a1] "=&r" (a1), + [a2] "=&r" (a2), [a3] "=&r" (a3) + : [src] "r" (src), [dst] "r" (dst) + ); + + src += src_stride; + dst += dst_stride; + } +} + + +__inline void vp8_copy_mem8x8_dspr2( + unsigned char *RESTRICT src, + int src_stride, + unsigned char *RESTRICT dst, + int dst_stride) +{ + int r; + unsigned int a0, a1; + + /* load src data in cache memory */ + prefetch_load_int(src + src_stride); + + for (r = 8; r--;) + { + /* use unaligned memory load and store */ + __asm__ __volatile__ ( + "ulw %[a0], 0(%[src]) \n\t" + "ulw %[a1], 4(%[src]) \n\t" + "sw %[a0], 0(%[dst]) \n\t" + "sw %[a1], 4(%[dst]) \n\t" + : [a0] "=&r" (a0), [a1] "=&r" (a1) + : [src] "r" (src), [dst] "r" (dst) + ); + + src += src_stride; + dst += dst_stride; + } +} + + +__inline void vp8_copy_mem8x4_dspr2( + unsigned char *RESTRICT src, + int src_stride, + unsigned char *RESTRICT dst, + int dst_stride) +{ + int r; + unsigned int a0, a1; + + /* load src data in cache memory */ + prefetch_load_int(src + src_stride); + + for (r = 4; r--;) + { + /* use unaligned memory load and store */ + __asm__ __volatile__ ( + "ulw %[a0], 0(%[src]) \n\t" + "ulw %[a1], 4(%[src]) \n\t" + "sw %[a0], 0(%[dst]) \n\t" + "sw %[a1], 4(%[dst]) \n\t" + : [a0] "=&r" (a0), [a1] "=&r" (a1) + : [src] "r" (src), [dst] "r" (dst) + ); + + src += src_stride; + dst += dst_stride; + } +} + +#endif diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index 2e282f6..766b4ea 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -39,14 +39,6 @@ extern "C" typedef enum { - VP8_LAST_FLAG = 1, - VP8_GOLD_FLAG = 2, - VP8_ALT_FLAG = 4 - } VP8_REFFRAME; - - - typedef enum - { USAGE_STREAM_FROM_SERVER = 0x0, USAGE_LOCAL_FILE_PLAYBACK = 0x1, USAGE_CONSTRAINED_QUALITY = 0x2 @@ -102,83 +94,101 @@ extern "C" typedef struct { - int Version; // 4 versions of bitstream defined 0 best quality/slowest decode, 3 lowest quality/fastest decode - int Width; // width of data passed to the compressor - int Height; // height of data passed to the compressor + /* 4 versions of bitstream defined: + * 0 best quality/slowest decode, 3 lowest quality/fastest decode + */ + int Version; + int Width; + int Height; struct vpx_rational timebase; - int target_bandwidth; // bandwidth to be used in kilobits per second + unsigned int target_bandwidth; /* kilobits per second */ + + /* parameter used for applying pre processing blur: recommendation 0 */ + int noise_sensitivity; - int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0 - int Sharpness; // parameter used for sharpening output: recommendation 0: + /* parameter used for sharpening output: recommendation 0: */ + int Sharpness; int cpu_used; unsigned int rc_max_intra_bitrate_pct; - // mode -> - //(0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing - // a television signal or feed from a live camera). ( speed setting controls how fast ) - //(1)=Good Quality Fast Encoding. The encoder balances quality with the amount of time it takes to - // encode the output. ( speed setting controls how fast ) - //(2)=One Pass - Best Quality. The encoder places priority on the quality of the output over encoding - // speed. The output is compressed at the highest possible quality. This option takes the longest - // amount of time to encode. ( speed setting ignored ) - //(3)=Two Pass - First Pass. The encoder generates a file of statistics for use in the second encoding - // pass. ( speed setting controls how fast ) - //(4)=Two Pass - Second Pass. The encoder uses the statistics that were generated in the first encoding - // pass to create the compressed output. ( speed setting controls how fast ) - //(5)=Two Pass - Second Pass Best. The encoder uses the statistics that were generated in the first - // encoding pass to create the compressed output using the highest possible quality, and taking a - // longer amount of time to encode.. ( speed setting ignored ) - int Mode; // - - // Key Framing Operations - int auto_key; // automatically detect cut scenes and set the keyframes - int key_freq; // maximum distance to key frame. - - int allow_lag; // allow lagged compression (if 0 lagin frames is ignored) - int lag_in_frames; // how many frames lag before we start encoding - - //---------------------------------------------------------------- - // DATARATE CONTROL OPTIONS - - int end_usage; // vbr or cbr - - // buffer targeting aggressiveness + /* mode -> + *(0)=Realtime/Live Encoding. This mode is optimized for realtim + * encoding (for example, capturing a television signal or feed + * from a live camera). ( speed setting controls how fast ) + *(1)=Good Quality Fast Encoding. The encoder balances quality with + * the amount of time it takes to encode the output. ( speed + * setting controls how fast ) + *(2)=One Pass - Best Quality. The encoder places priority on the + * quality of the output over encoding speed. The output is + * compressed at the highest possible quality. This option takes + * the longest amount of time to encode. ( speed setting ignored + * ) + *(3)=Two Pass - First Pass. The encoder generates a file of + * statistics for use in the second encoding pass. ( speed + * setting controls how fast ) + *(4)=Two Pass - Second Pass. The encoder uses the statistics that + * were generated in the first encoding pass to create the + * compressed output. ( speed setting controls how fast ) + *(5)=Two Pass - Second Pass Best. The encoder uses the statistics + * that were generated in the first encoding pass to create the + * compressed output using the highest possible quality, and + * taking a longer amount of time to encode.. ( speed setting + * ignored ) + */ + int Mode; + + /* Key Framing Operations */ + int auto_key; /* automatically detect cut scenes */ + int key_freq; /* maximum distance to key frame. */ + + /* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */ + int allow_lag; + int lag_in_frames; /* how many frames lag before we start encoding */ + + /* + * DATARATE CONTROL OPTIONS + */ + + int end_usage; /* vbr or cbr */ + + /* buffer targeting aggressiveness */ int under_shoot_pct; int over_shoot_pct; - // buffering parameters - int64_t starting_buffer_level; // in bytes + /* buffering parameters */ + int64_t starting_buffer_level; int64_t optimal_buffer_level; int64_t maximum_buffer_size; - int64_t starting_buffer_level_in_ms; // in milli-seconds + int64_t starting_buffer_level_in_ms; int64_t optimal_buffer_level_in_ms; int64_t maximum_buffer_size_in_ms; - // controlling quality + /* controlling quality */ int fixed_q; int worst_allowed_q; int best_allowed_q; int cq_level; - // allow internal resizing ( currently disabled in the build !!!!!) + /* allow internal resizing */ int allow_spatial_resampling; int resample_down_water_mark; int resample_up_water_mark; - // allow internal frame rate alterations + /* allow internal frame rate alterations */ int allow_df; int drop_frames_water_mark; - // two pass datarate control - int two_pass_vbrbias; // two pass datarate control tweaks + /* two pass datarate control */ + int two_pass_vbrbias; int two_pass_vbrmin_section; int two_pass_vbrmax_section; - // END DATARATE CONTROL OPTIONS - //---------------------------------------------------------------- + /* + * END DATARATE CONTROL OPTIONS + */ - // these parameters aren't to be used in final build don't use!!! + /* these parameters aren't to be used in final build don't use!!! */ int play_alternate; int alt_freq; int alt_q; @@ -186,26 +196,28 @@ extern "C" int gold_q; - int multi_threaded; // how many threads to run the encoder on - int token_partitions; // how many token partitions to create for multi core decoding - int encode_breakout; // early breakout encode threshold : for video conf recommend 800 + int multi_threaded; /* how many threads to run the encoder on */ + int token_partitions; /* how many token partitions to create */ + + /* early breakout threshold: for video conf recommend 800 */ + int encode_breakout; - unsigned int error_resilient_mode; // Bitfield defining the error - // resiliency features to enable. Can provide - // decodable frames after losses in previous - // frames and decodable partitions after - // losses in the same frame. + /* Bitfield defining the error resiliency features to enable. + * Can provide decodable frames after losses in previous + * frames and decodable partitions after losses in the same frame. + */ + unsigned int error_resilient_mode; int arnr_max_frames; - int arnr_strength ; - int arnr_type ; + int arnr_strength; + int arnr_type; - struct vpx_fixed_buf two_pass_stats_in; + struct vpx_fixed_buf two_pass_stats_in; struct vpx_codec_pkt_list *output_pkt_list; vp8e_tuning tuning; - // Temporal scaling parameters + /* Temporal scaling parameters */ unsigned int number_of_layers; unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY]; unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY]; @@ -236,16 +248,14 @@ extern "C" void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf); void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf); -// receive a frames worth of data caller can assume that a copy of this frame is made -// and not just a copy of the pointer.. int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp); int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush); int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags); int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags); - int vp8_get_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); - int vp8_set_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); + int vp8_get_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); + int vp8_set_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); int vp8_update_entropy(struct VP8_COMP* comp, int update); int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]); int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols); diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index c3215c0..5325bac 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -42,7 +42,6 @@ typedef struct frame_contexts vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; MV_CONTEXT mvc[2]; - MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */ } FRAME_CONTEXT; typedef enum @@ -59,12 +58,6 @@ typedef enum RECON_CLAMP_NOTREQUIRED = 1 } CLAMP_TYPE; -typedef enum -{ - SIXTAP = 0, - BILINEAR = 1 -} INTERPOLATIONFILTERTYPE; - typedef struct VP8Common { @@ -94,6 +87,7 @@ typedef struct VP8Common YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG post_proc_buffer_int; int post_proc_buffer_int_used; + unsigned char *pp_limits_buffer; /* post-processing filter coefficients */ #endif FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */ @@ -114,7 +108,6 @@ typedef struct VP8Common int full_pixel; int base_qindex; - int last_kf_gf_q; /* Q used on the last GF or KF */ int y1dc_delta_q; int y2dc_delta_q; @@ -130,11 +123,11 @@ typedef struct VP8Common MODE_INFO *mip; /* Base of allocated array */ MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ +#if CONFIG_ERROR_CONCEALMENT MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ +#endif - - INTERPOLATIONFILTERTYPE mcomp_filter_type; LOOPFILTERTYPE filter_type; loop_filter_info_n lf_info; @@ -158,14 +151,6 @@ typedef struct VP8Common ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */ - - /* keyframe block modes are predicted by their above, left neighbors */ - - vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]; - vp8_prob kf_ymode_prob [VP8_YMODES-1]; /* keyframe "" */ - vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1]; - - FRAME_CONTEXT lfc; /* last frame entropy */ FRAME_CONTEXT fc; /* this frame entropy */ diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h index 35a8b6e..fd7e051 100644 --- a/vp8/common/onyxd.h +++ b/vp8/common/onyxd.h @@ -22,6 +22,7 @@ extern "C" #include "ppflags.h" #include "vpx_ports/mem.h" #include "vpx/vpx_codec.h" +#include "vpx/vp8.h" struct VP8D_COMP; @@ -35,12 +36,6 @@ extern "C" int error_concealment; int input_fragments; } VP8D_CONFIG; - typedef enum - { - VP8_LAST_FLAG = 1, - VP8_GOLD_FLAG = 2, - VP8_ALT_FLAG = 4 - } VP8_REFFRAME; typedef enum { @@ -53,11 +48,13 @@ extern "C" int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst); - int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, unsigned long size, const unsigned char *dest, int64_t time_stamp); + int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, + size_t size, const uint8_t *dest, + int64_t time_stamp); int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags); - vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); - vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); + vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); + vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); struct VP8D_COMP* vp8dx_create_decompressor(VP8D_CONFIG *oxcf); diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index ccf6ad7..80fa530 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -127,27 +127,24 @@ extern void vp8_blit_text(const char *msg, unsigned char *address, const int pit extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch); /*********************************************************************************************************** */ -void vp8_post_proc_down_and_across_c +void vp8_post_proc_down_and_across_mb_row_c ( unsigned char *src_ptr, unsigned char *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, - int rows, int cols, - int flimit + unsigned char *f, + int size ) { unsigned char *p_src, *p_dst; int row; int col; - int i; - int v; - int pitch = src_pixels_per_line; - unsigned char d[8]; - (void)dst_pixels_per_line; + unsigned char v; + unsigned char d[4]; - for (row = 0; row < rows; row++) + for (row = 0; row < size; row++) { /* post_proc_down for one row */ p_src = src_ptr; @@ -155,20 +152,23 @@ void vp8_post_proc_down_and_across_c for (col = 0; col < cols; col++) { + unsigned char p_above2 = p_src[col - 2 * src_pixels_per_line]; + unsigned char p_above1 = p_src[col - src_pixels_per_line]; + unsigned char p_below1 = p_src[col + src_pixels_per_line]; + unsigned char p_below2 = p_src[col + 2 * src_pixels_per_line]; - int kernel = 4; - int v = p_src[col]; + v = p_src[col]; - for (i = -2; i <= 2; i++) + if ((abs(v - p_above2) < f[col]) && (abs(v - p_above1) < f[col]) + && (abs(v - p_below1) < f[col]) && (abs(v - p_below2) < f[col])) { - if (abs(v - p_src[col+i*pitch]) > flimit) - goto down_skip_convolve; - - kernel += kernel5[2+i] * p_src[col+i*pitch]; + unsigned char k1, k2, k3; + k1 = (p_above2 + p_above1 + 1) >> 1; + k2 = (p_below2 + p_below1 + 1) >> 1; + k3 = (k1 + k2 + 1) >> 1; + v = (k3 + v + 1) >> 1; } - v = (kernel >> 3); - down_skip_convolve: p_dst[col] = v; } @@ -176,45 +176,38 @@ void vp8_post_proc_down_and_across_c p_src = dst_ptr; p_dst = dst_ptr; - for (i = -8; i<0; i++) - p_src[i]=p_src[0]; - - for (i = cols; i flimit) - goto across_skip_convolve; - - kernel += kernel5[2+i] * p_src[col+i]; + unsigned char k1, k2, k3; + k1 = (p_src[col - 2] + p_src[col - 1] + 1) >> 1; + k2 = (p_src[col + 2] + p_src[col + 1] + 1) >> 1; + k3 = (k1 + k2 + 1) >> 1; + v = (k3 + v + 1) >> 1; } - d[col&7] = (kernel >> 3); - across_skip_convolve: + d[col & 3] = v; if (col >= 2) - p_dst[col-2] = d[(col-2)&7]; + p_dst[col - 2] = d[(col - 2) & 3]; } /* handle the last two pixels */ - p_dst[col-2] = d[(col-2)&7]; - p_dst[col-1] = d[(col-1)&7]; - + p_dst[col - 2] = d[(col - 2) & 3]; + p_dst[col - 1] = d[(col - 1) & 3]; /* next row */ - src_ptr += pitch; - dst_ptr += pitch; + src_ptr += src_pixels_per_line; + dst_ptr += dst_pixels_per_line; } } @@ -240,8 +233,9 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co for (i = -8; i<0; i++) s[i]=s[0]; - // 17 avoids valgrind warning - we buffer values in c in d - // and only write them when we've read 8 ahead... + /* 17 avoids valgrind warning - we buffer values in c in d + * and only write them when we've read 8 ahead... + */ for (i = cols; iy_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl); - vp8_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); - vp8_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); - - vp8_post_proc_down_and_across(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); - vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); - + vp8_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, + post->y_width, q2mbl(q)); + vp8_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, + post->y_width, q2mbl(q)); } -void vp8_deblock(YV12_BUFFER_CONFIG *source, +void vp8_deblock(VP8_COMMON *cm, + YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, @@ -351,16 +332,64 @@ void vp8_deblock(YV12_BUFFER_CONFIG *source, { double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; int ppl = (int)(level + .5); + + const MODE_INFO *mode_info_context = cm->mi; + int mbr, mbc; + + /* The pixel thresholds are adjusted according to if or not the macroblock + * is a skipped block. */ + unsigned char *ylimits = cm->pp_limits_buffer; + unsigned char *uvlimits = cm->pp_limits_buffer + 16 * cm->mb_cols; (void) low_var_thresh; (void) flag; - vp8_post_proc_down_and_across(source->y_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl); - vp8_post_proc_down_and_across(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); - vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); + if (ppl > 0) + { + for (mbr = 0; mbr < cm->mb_rows; mbr++) + { + unsigned char *ylptr = ylimits; + unsigned char *uvlptr = uvlimits; + for (mbc = 0; mbc < cm->mb_cols; mbc++) + { + unsigned char mb_ppl; + + if (mode_info_context->mbmi.mb_skip_coeff) + mb_ppl = (unsigned char)ppl >> 1; + else + mb_ppl = (unsigned char)ppl; + + vpx_memset(ylptr, mb_ppl, 16); + vpx_memset(uvlptr, mb_ppl, 8); + + ylptr += 16; + uvlptr += 8; + mode_info_context++; + } + mode_info_context++; + + vp8_post_proc_down_and_across_mb_row( + source->y_buffer + 16 * mbr * source->y_stride, + post->y_buffer + 16 * mbr * post->y_stride, source->y_stride, + post->y_stride, source->y_width, ylimits, 16); + + vp8_post_proc_down_and_across_mb_row( + source->u_buffer + 8 * mbr * source->uv_stride, + post->u_buffer + 8 * mbr * post->uv_stride, source->uv_stride, + post->uv_stride, source->uv_width, uvlimits, 8); + vp8_post_proc_down_and_across_mb_row( + source->v_buffer + 8 * mbr * source->uv_stride, + post->v_buffer + 8 * mbr * post->uv_stride, source->uv_stride, + post->uv_stride, source->uv_width, uvlimits, 8); + } + } else + { + vp8_yv12_copy_frame(source, post); + } } #if !(CONFIG_TEMPORAL_DENOISING) -void vp8_de_noise(YV12_BUFFER_CONFIG *source, +void vp8_de_noise(VP8_COMMON *cm, + YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, @@ -368,33 +397,33 @@ void vp8_de_noise(YV12_BUFFER_CONFIG *source, { double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; int ppl = (int)(level + .5); + int mb_rows = source->y_width >> 4; + int mb_cols = source->y_height >> 4; + unsigned char *limits = cm->pp_limits_buffer;; + int mbr, mbc; (void) post; (void) low_var_thresh; (void) flag; - vp8_post_proc_down_and_across( - source->y_buffer + 2 * source->y_stride + 2, - source->y_buffer + 2 * source->y_stride + 2, - source->y_stride, - source->y_stride, - source->y_height - 4, - source->y_width - 4, - ppl); - vp8_post_proc_down_and_across( - source->u_buffer + 2 * source->uv_stride + 2, - source->u_buffer + 2 * source->uv_stride + 2, - source->uv_stride, - source->uv_stride, - source->uv_height - 4, - source->uv_width - 4, ppl); - vp8_post_proc_down_and_across( - source->v_buffer + 2 * source->uv_stride + 2, - source->v_buffer + 2 * source->uv_stride + 2, - source->uv_stride, - source->uv_stride, - source->uv_height - 4, - source->uv_width - 4, ppl); + vpx_memset(limits, (unsigned char)ppl, 16 * mb_cols); + /* TODO: The original code don't filter the 2 outer rows and columns. */ + for (mbr = 0; mbr < mb_rows; mbr++) + { + vp8_post_proc_down_and_across_mb_row( + source->y_buffer + 16 * mbr * source->y_stride, + source->y_buffer + 16 * mbr * source->y_stride, + source->y_stride, source->y_stride, source->y_width, limits, 16); + + vp8_post_proc_down_and_across_mb_row( + source->u_buffer + 8 * mbr * source->uv_stride, + source->u_buffer + 8 * mbr * source->uv_stride, + source->uv_stride, source->uv_stride, source->uv_width, limits, 8); + vp8_post_proc_down_and_across_mb_row( + source->v_buffer + 8 * mbr * source->uv_stride, + source->v_buffer + 8 * mbr * source->uv_stride, + source->uv_stride, source->uv_stride, source->uv_width, limits, 8); + } } #endif @@ -441,7 +470,7 @@ static void fillrd(struct postproc_state *state, int q, int a) } - for (next = next; next < 256; next++) + for (; next < 256; next++) char_dist[next] = 0; } @@ -731,21 +760,21 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t oci->post_proc_buffer_int_used = 1; - // insure that postproc is set to all 0's so that post proc - // doesn't pull random data in from edge + /* insure that postproc is set to all 0's so that post proc + * doesn't pull random data in from edge + */ vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); } } -#if ARCH_X86||ARCH_X86_64 - vpx_reset_mmx_state(); -#endif + vp8_clear_system_state(); if ((flags & VP8D_MFQE) && oci->postproc_state.last_frame_valid && oci->current_video_frame >= 2 && - oci->base_qindex - oci->postproc_state.last_base_qindex >= 10) + oci->postproc_state.last_base_qindex < 60 && + oci->base_qindex - oci->postproc_state.last_base_qindex >= 20) { vp8_multiframe_quality_enhance(oci); if (((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK)) && @@ -754,12 +783,14 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t vp8_yv12_copy_frame(&oci->post_proc_buffer, &oci->post_proc_buffer_int); if (flags & VP8D_DEMACROBLOCK) { - vp8_deblock_and_de_macro_block(&oci->post_proc_buffer_int, &oci->post_proc_buffer, + vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, q + (deblock_level - 5) * 10, 1, 0); + vp8_de_mblock(&oci->post_proc_buffer, + q + (deblock_level - 5) * 10); } else if (flags & VP8D_DEBLOCK) { - vp8_deblock(&oci->post_proc_buffer_int, &oci->post_proc_buffer, + vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, q, 1, 0); } } @@ -768,13 +799,15 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t } else if (flags & VP8D_DEMACROBLOCK) { - vp8_deblock_and_de_macro_block(oci->frame_to_show, &oci->post_proc_buffer, - q + (deblock_level - 5) * 10, 1, 0); + vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, + q + (deblock_level - 5) * 10, 1, 0); + vp8_de_mblock(&oci->post_proc_buffer, q + (deblock_level - 5) * 10); + oci->postproc_state.last_base_qindex = oci->base_qindex; } else if (flags & VP8D_DEBLOCK) { - vp8_deblock(oci->frame_to_show, &oci->post_proc_buffer, + vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, q, 1, 0); oci->postproc_state.last_base_qindex = oci->base_qindex; } diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h index 6ac788c..495a2c9 100644 --- a/vp8/common/postproc.h +++ b/vp8/common/postproc.h @@ -30,13 +30,15 @@ int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); -void vp8_de_noise(YV12_BUFFER_CONFIG *source, +void vp8_de_noise(struct VP8Common *oci, + YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag); -void vp8_deblock(YV12_BUFFER_CONFIG *source, +void vp8_deblock(struct VP8Common *oci, + YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c index 7046a63..87f4cac 100644 --- a/vp8/common/ppc/systemdependent.c +++ b/vp8/common/ppc/systemdependent.c @@ -19,14 +19,14 @@ void (*vp8_short_idct4x4)(short *input, short *output, int pitch); void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch); void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch); -extern void (*vp8_post_proc_down_and_across)( +extern void (*vp8_post_proc_down_and_across_mb_row)( unsigned char *src_ptr, unsigned char *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, - int rows, int cols, - int flimit + unsigned char *f, + int size ); extern void (*vp8_mbpost_proc_down)(unsigned char *dst, int pitch, int rows, int cols, int flimit); @@ -34,15 +34,15 @@ extern void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int extern void (*vp8_mbpost_proc_across_ip)(unsigned char *src, int pitch, int rows, int cols, int flimit); extern void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int cols, int flimit); -extern void vp8_post_proc_down_and_across_c +extern void vp8_post_proc_down_and_across_mb_row_c ( unsigned char *src_ptr, unsigned char *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, - int rows, int cols, - int flimit + unsigned char *f, + int size ); void vp8_plane_add_noise_c(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a); @@ -158,7 +158,7 @@ void vp8_machine_specific_config(void) vp8_lf_mbhsimple = loop_filter_mbhs_ppc; vp8_lf_bhsimple = loop_filter_bhs_ppc; - vp8_post_proc_down_and_across = vp8_post_proc_down_and_across_c; + vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c; vp8_mbpost_proc_down = vp8_mbpost_proc_down_c; vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c; vp8_plane_add_noise = vp8_plane_add_noise_c; diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c index e9833fe..05f9210 100644 --- a/vp8/common/quant_common.c +++ b/vp8/common/quant_common.c @@ -109,7 +109,10 @@ int vp8_ac2quant(int QIndex, int Delta) else if (QIndex < 0) QIndex = 0; - retval = (ac_qlookup[ QIndex ] * 155) / 100; + /* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16. + * The smallest precision for that is '(x*6349) >> 12' but 16 is a good + * word size. */ + retval = (ac_qlookup[ QIndex ] * 101581) >> 16; if (retval < 8) retval = 8; diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c index dcc35ec..7bb8d0a 100644 --- a/vp8/common/reconintra4x4.c +++ b/vp8/common/reconintra4x4.c @@ -13,11 +13,11 @@ #include "vpx_rtcd.h" #include "blockd.h" -void vp8_intra4x4_predict_d_c(unsigned char *Above, - unsigned char *yleft, int left_stride, - int b_mode, - unsigned char *dst, int dst_stride, - unsigned char top_left) +void vp8_intra4x4_predict_c(unsigned char *Above, + unsigned char *yleft, int left_stride, + B_PREDICTION_MODE b_mode, + unsigned char *dst, int dst_stride, + unsigned char top_left) { int i, r, c; @@ -290,19 +290,8 @@ void vp8_intra4x4_predict_d_c(unsigned char *Above, } break; + default: + break; } } - -void vp8_intra4x4_predict_c(unsigned char *src, int src_stride, - int b_mode, - unsigned char *dst, int dst_stride) -{ - unsigned char *Above = src - src_stride; - - vp8_intra4x4_predict_d_c(Above, - src - 1, src_stride, - b_mode, - dst, dst_stride, - Above[-1]); -} diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c index 232640d..01dad46 100644 --- a/vp8/common/rtcd.c +++ b/vp8/common/rtcd.c @@ -10,3 +10,96 @@ #include "vpx_config.h" #define RTCD_C #include "vpx_rtcd.h" + +#if CONFIG_MULTITHREAD && defined(_WIN32) +#include +#include +static void once(void (*func)(void)) +{ + static CRITICAL_SECTION *lock; + static LONG waiters; + static int done; + void *lock_ptr = &lock; + + /* If the initialization is complete, return early. This isn't just an + * optimization, it prevents races on the destruction of the global + * lock. + */ + if(done) + return; + + InterlockedIncrement(&waiters); + + /* Get a lock. We create one and try to make it the one-true-lock, + * throwing it away if we lost the race. + */ + + { + /* Scope to protect access to new_lock */ + CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION)); + InitializeCriticalSection(new_lock); + if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL) + { + DeleteCriticalSection(new_lock); + free(new_lock); + } + } + + /* At this point, we have a lock that can be synchronized on. We don't + * care which thread actually performed the allocation. + */ + + EnterCriticalSection(lock); + + if (!done) + { + func(); + done = 1; + } + + LeaveCriticalSection(lock); + + /* Last one out should free resources. The destructed objects are + * protected by checking if(done) above. + */ + if(!InterlockedDecrement(&waiters)) + { + DeleteCriticalSection(lock); + free(lock); + lock = NULL; + } +} + + +#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H +#include +static void once(void (*func)(void)) +{ + static pthread_once_t lock = PTHREAD_ONCE_INIT; + pthread_once(&lock, func); +} + + +#else +/* No-op version that performs no synchronization. vpx_rtcd() is idempotent, + * so as long as your platform provides atomic loads/stores of pointers + * no synchronization is strictly necessary. + */ + +static void once(void (*func)(void)) +{ + static int done; + + if(!done) + { + func(); + done = 1; + } +} +#endif + + +void vpx_rtcd() +{ + once(setup_rtcd_internal); +} diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh index 33bf08b..0f950f8 100644 --- a/vp8/common/rtcd_defs.sh +++ b/vp8/common/rtcd_defs.sh @@ -1,5 +1,7 @@ common_forward_decls() { cat < #include #include "vpx_config.h" #include "vpx/vpx_integer.h" -static -unsigned int sad_mx_n_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad, - int m, - int n) +static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int max_sad, int m, int n) { - int r, c; unsigned int sad = 0; @@ -48,298 +42,211 @@ unsigned int sad_mx_n_c( * implementations of these functions are not required to check it. */ -unsigned int vp8_sad16x16_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) +unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16); } - -unsigned int vp8_sad8x8_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) +unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8); } - -unsigned int vp8_sad16x8_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) +unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8); } - -unsigned int vp8_sad8x16_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) +unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16); } - -unsigned int vp8_sad4x4_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) +unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4); } -void vp8_sad16x16x3_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array -) +void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int *sad_array) { - sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); - sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); + sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); + sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); } -void vp8_sad16x16x8_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array -) +void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); - sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); - sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); - sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); - sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); - sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); - sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); + sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); + sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); + sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); + sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); + sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); + sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); + sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); + sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); } -void vp8_sad16x8x3_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array -) +void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int *sad_array) { - sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); - sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); + sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); + sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); } -void vp8_sad16x8x8_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array -) +void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); - sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); - sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); - sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); - sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); - sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); - sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); + sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); + sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); + sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); + sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); + sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); + sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); + sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); + sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); } -void vp8_sad8x8x3_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array -) +void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int *sad_array) { - sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); - sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); + sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); + sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); } -void vp8_sad8x8x8_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array -) +void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); - sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); - sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); - sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); - sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); - sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); - sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); + sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); + sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); + sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); + sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); + sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); + sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); + sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); + sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); } -void vp8_sad8x16x3_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array -) +void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int *sad_array) { - sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); - sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); + sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); + sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); } -void vp8_sad8x16x8_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array -) +void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); - sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); - sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); - sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); - sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); - sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); - sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); + sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); + sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); + sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); + sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); + sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); + sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); + sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); + sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); } -void vp8_sad4x4x3_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array -) +void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int *sad_array) { - sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); - sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); + sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); + sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); } -void vp8_sad4x4x8_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array -) +void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); - sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); - sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); - sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); - sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); - sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); - sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); + sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX); + sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX); + sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX); + sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX); + sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX); + sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX); + sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX); + sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX); } -void vp8_sad16x16x4d_c( - const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array -) +void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, + const unsigned char * const ref_ptr[], int ref_stride, + unsigned int *sad_array) { - sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); + sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); + sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); + sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); + sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); } -void vp8_sad16x8x4d_c( - const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array -) +void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, + const unsigned char * const ref_ptr[], int ref_stride, + unsigned int *sad_array) { - sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); + sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); + sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); + sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); + sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); } -void vp8_sad8x8x4d_c( - const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array -) +void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, + const unsigned char * const ref_ptr[], int ref_stride, + unsigned int *sad_array) { - sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); + sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); + sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); + sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); + sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); } -void vp8_sad8x16x4d_c( - const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array -) +void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, + const unsigned char * const ref_ptr[], int ref_stride, + unsigned int *sad_array) { - sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); + sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); + sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); + sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); + sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); } -void vp8_sad4x4x4d_c( - const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array -) +void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, + const unsigned char * const ref_ptr[], int ref_stride, + unsigned int *sad_array) { - sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff); + sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX); + sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX); + sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX); + sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX); } /* Copy 2 macroblocks to a buffer */ -void vp8_copy32xn_c( - unsigned char *src_ptr, - int src_stride, - unsigned char *dst_ptr, - int dst_stride, - int height) +void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride, + unsigned char *dst_ptr, int dst_stride, + int height) { int r; diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c index 7976e25..60afe51 100644 --- a/vp8/common/setupintrarecon.c +++ b/vp8/common/setupintrarecon.c @@ -30,3 +30,10 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; } + +void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf) +{ + vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); + vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); + vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); +} diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h index 5264fd0..e515c3a 100644 --- a/vp8/common/setupintrarecon.h +++ b/vp8/common/setupintrarecon.h @@ -11,3 +11,23 @@ #include "vpx_scale/yv12config.h" extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); +extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf); + +static +void setup_intra_recon_left(unsigned char *y_buffer, + unsigned char *u_buffer, + unsigned char *v_buffer, + int y_stride, + int uv_stride) +{ + int i; + + for (i = 0; i < 16; i++) + y_buffer[y_stride *i] = (unsigned char) 129; + + for (i = 0; i < 8; i++) + u_buffer[uv_stride *i] = (unsigned char) 129; + + for (i = 0; i < 8; i++) + v_buffer[uv_stride *i] = (unsigned char) 129; +} diff --git a/vp8/common/variance.h b/vp8/common/variance.h index b77aa28..01193b8 100644 --- a/vp8/common/variance.h +++ b/vp8/common/variance.h @@ -12,14 +12,14 @@ #ifndef VARIANCE_H #define VARIANCE_H -typedef unsigned int(*vp8_sad_fn_t) - ( +#include "vpx_config.h" + +typedef unsigned int(*vp8_sad_fn_t)( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, - int max_sad - ); + unsigned int max_sad); typedef void (*vp8_copy32xn_fn_t)( const unsigned char *src_ptr, @@ -48,7 +48,7 @@ typedef void (*vp8_sad_multi_d_fn_t) ( const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr[4], + const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array ); diff --git a/vp8/common/variance_c.c b/vp8/common/variance_c.c index 996404d..da08aff 100644 --- a/vp8/common/variance_c.c +++ b/vp8/common/variance_c.c @@ -205,14 +205,14 @@ static void var_filter_block2d_bil_first_pass { for (j = 0; j < output_width; j++) { - // Apply bilinear filter + /* Apply bilinear filter */ output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + ((int)src_ptr[pixel_step] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; src_ptr++; } - // Next row... + /* Next row... */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } @@ -264,15 +264,15 @@ static void var_filter_block2d_bil_second_pass { for (j = 0; j < output_width; j++) { - // Apply filter - Temp = ((int)src_ptr[0] * vp8_filter[0]) + + /* Apply filter */ + Temp = ((int)src_ptr[0] * vp8_filter[0]) + ((int)src_ptr[pixel_step] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2); output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); src_ptr++; } - // Next row... + /* Next row... */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } @@ -292,15 +292,15 @@ unsigned int vp8_sub_pixel_variance4x4_c { unsigned char temp2[20*16]; const short *HFilter, *VFilter; - unsigned short FData3[5*4]; // Temp data bufffer used in filtering + unsigned short FData3[5*4]; /* Temp data bufffer used in filtering */ HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; - // First filter 1d Horizontal + /* First filter 1d Horizontal */ var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); - // Now filter Verticaly + /* Now filter Verticaly */ var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); @@ -318,7 +318,7 @@ unsigned int vp8_sub_pixel_variance8x8_c unsigned int *sse ) { - unsigned short FData3[9*8]; // Temp data bufffer used in filtering + unsigned short FData3[9*8]; /* Temp data bufffer used in filtering */ unsigned char temp2[20*16]; const short *HFilter, *VFilter; @@ -342,7 +342,7 @@ unsigned int vp8_sub_pixel_variance16x16_c unsigned int *sse ) { - unsigned short FData3[17*16]; // Temp data bufffer used in filtering + unsigned short FData3[17*16]; /* Temp data bufffer used in filtering */ unsigned char temp2[20*16]; const short *HFilter, *VFilter; @@ -418,7 +418,7 @@ unsigned int vp8_sub_pixel_variance16x8_c unsigned int *sse ) { - unsigned short FData3[16*9]; // Temp data bufffer used in filtering + unsigned short FData3[16*9]; /* Temp data bufffer used in filtering */ unsigned char temp2[20*16]; const short *HFilter, *VFilter; @@ -442,7 +442,7 @@ unsigned int vp8_sub_pixel_variance8x16_c unsigned int *sse ) { - unsigned short FData3[9*16]; // Temp data bufffer used in filtering + unsigned short FData3[9*16]; /* Temp data bufffer used in filtering */ unsigned char temp2[20*16]; const short *HFilter, *VFilter; diff --git a/vp8/common/vp8_entropymodedata.h b/vp8/common/vp8_entropymodedata.h old mode 100755 new mode 100644 diff --git a/vp8/common/x86/dequantize_mmx.asm b/vp8/common/x86/dequantize_mmx.asm index de9eba8..4e551f0 100644 --- a/vp8/common/x86/dequantize_mmx.asm +++ b/vp8/common/x86/dequantize_mmx.asm @@ -13,7 +13,7 @@ ;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q) -global sym(vp8_dequantize_b_impl_mmx) +global sym(vp8_dequantize_b_impl_mmx) PRIVATE sym(vp8_dequantize_b_impl_mmx): push rbp mov rbp, rsp @@ -55,7 +55,7 @@ sym(vp8_dequantize_b_impl_mmx): ;short *dq, 1 ;unsigned char *dest, 2 ;int stride) 3 -global sym(vp8_dequant_idct_add_mmx) +global sym(vp8_dequant_idct_add_mmx) PRIVATE sym(vp8_dequant_idct_add_mmx): push rbp mov rbp, rsp diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm index 0c9c205..96fa2c6 100644 --- a/vp8/common/x86/idctllm_mmx.asm +++ b/vp8/common/x86/idctllm_mmx.asm @@ -34,7 +34,7 @@ ;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, ;int pitch, unsigned char *dest,int stride) -global sym(vp8_short_idct4x4llm_mmx) +global sym(vp8_short_idct4x4llm_mmx) PRIVATE sym(vp8_short_idct4x4llm_mmx): push rbp mov rbp, rsp @@ -224,7 +224,7 @@ sym(vp8_short_idct4x4llm_mmx): ;int pred_stride, ;unsigned char *dst_ptr, ;int stride) -global sym(vp8_dc_only_idct_add_mmx) +global sym(vp8_dc_only_idct_add_mmx) PRIVATE sym(vp8_dc_only_idct_add_mmx): push rbp mov rbp, rsp diff --git a/vp8/common/x86/idctllm_mmx_test.cc b/vp8/common/x86/idctllm_mmx_test.cc deleted file mode 100755 index 8c11533..0000000 --- a/vp8/common/x86/idctllm_mmx_test.cc +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - - extern "C" { - void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred_ptr, - int pred_stride, unsigned char *dst_ptr, - int dst_stride); -} - -#include "vp8/common/idctllm_test.h" - -namespace -{ - -INSTANTIATE_TEST_CASE_P(MMX, IDCTTest, - ::testing::Values(vp8_short_idct4x4llm_mmx)); - -} // namespace - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm index abeb0b6..bf8e2c4 100644 --- a/vp8/common/x86/idctllm_sse2.asm +++ b/vp8/common/x86/idctllm_sse2.asm @@ -19,7 +19,7 @@ ; int dst_stride - 3 ; ) -global sym(vp8_idct_dequant_0_2x_sse2) +global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE sym(vp8_idct_dequant_0_2x_sse2): push rbp mov rbp, rsp @@ -101,7 +101,7 @@ sym(vp8_idct_dequant_0_2x_sse2): ; unsigned char *dst - 2 ; int dst_stride - 3 ; ) -global sym(vp8_idct_dequant_full_2x_sse2) +global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE sym(vp8_idct_dequant_full_2x_sse2): push rbp mov rbp, rsp @@ -358,7 +358,7 @@ sym(vp8_idct_dequant_full_2x_sse2): ; int dst_stride - 3 ; short *dc - 4 ; ) -global sym(vp8_idct_dequant_dc_0_2x_sse2) +global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE sym(vp8_idct_dequant_dc_0_2x_sse2): push rbp mov rbp, rsp @@ -434,7 +434,7 @@ sym(vp8_idct_dequant_dc_0_2x_sse2): ; int dst_stride - 3 ; short *dc - 4 ; ) -global sym(vp8_idct_dequant_dc_full_2x_sse2) +global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE sym(vp8_idct_dequant_dc_full_2x_sse2): push rbp mov rbp, rsp diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm index 6582687..4aac094 100644 --- a/vp8/common/x86/iwalsh_mmx.asm +++ b/vp8/common/x86/iwalsh_mmx.asm @@ -12,7 +12,7 @@ %include "vpx_ports/x86_abi_support.asm" ;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) -global sym(vp8_short_inv_walsh4x4_mmx) +global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE sym(vp8_short_inv_walsh4x4_mmx): push rbp mov rbp, rsp diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm index 51cb5e2..06e86a8 100644 --- a/vp8/common/x86/iwalsh_sse2.asm +++ b/vp8/common/x86/iwalsh_sse2.asm @@ -12,7 +12,7 @@ %include "vpx_ports/x86_abi_support.asm" ;void vp8_short_inv_walsh4x4_sse2(short *input, short *output) -global sym(vp8_short_inv_walsh4x4_sse2) +global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE sym(vp8_short_inv_walsh4x4_sse2): push rbp mov rbp, rsp diff --git a/vp8/common/x86/loopfilter_block_sse2.asm b/vp8/common/x86/loopfilter_block_sse2.asm index 4918eb5..3d45c61 100644 --- a/vp8/common/x86/loopfilter_block_sse2.asm +++ b/vp8/common/x86/loopfilter_block_sse2.asm @@ -133,7 +133,7 @@ ; const char *limit, ; const char *thresh ;) -global sym(vp8_loop_filter_bh_y_sse2) +global sym(vp8_loop_filter_bh_y_sse2) PRIVATE sym(vp8_loop_filter_bh_y_sse2): %ifidn __OUTPUT_FORMAT__,x64 @@ -150,6 +150,7 @@ sym(vp8_loop_filter_bh_y_sse2): push rbp mov rbp, rsp + SAVE_XMM 11 push r12 push r13 mov thresh, arg(4) @@ -258,6 +259,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 %ifidn __OUTPUT_FORMAT__,x64 pop r13 pop r12 + RESTORE_XMM pop rbp %endif @@ -273,7 +275,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 ; const char *thresh ;) -global sym(vp8_loop_filter_bv_y_sse2) +global sym(vp8_loop_filter_bv_y_sse2) PRIVATE sym(vp8_loop_filter_bv_y_sse2): %ifidn __OUTPUT_FORMAT__,x64 diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm index 697a5de..f388d24 100644 --- a/vp8/common/x86/loopfilter_mmx.asm +++ b/vp8/common/x86/loopfilter_mmx.asm @@ -21,7 +21,7 @@ ; const char *thresh, ; int count ;) -global sym(vp8_loop_filter_horizontal_edge_mmx) +global sym(vp8_loop_filter_horizontal_edge_mmx) PRIVATE sym(vp8_loop_filter_horizontal_edge_mmx): push rbp mov rbp, rsp @@ -233,7 +233,7 @@ sym(vp8_loop_filter_horizontal_edge_mmx): ; const char *thresh, ; int count ;) -global sym(vp8_loop_filter_vertical_edge_mmx) +global sym(vp8_loop_filter_vertical_edge_mmx) PRIVATE sym(vp8_loop_filter_vertical_edge_mmx): push rbp mov rbp, rsp @@ -603,7 +603,7 @@ sym(vp8_loop_filter_vertical_edge_mmx): ; const char *thresh, ; int count ;) -global sym(vp8_mbloop_filter_horizontal_edge_mmx) +global sym(vp8_mbloop_filter_horizontal_edge_mmx) PRIVATE sym(vp8_mbloop_filter_horizontal_edge_mmx): push rbp mov rbp, rsp @@ -920,7 +920,7 @@ sym(vp8_mbloop_filter_horizontal_edge_mmx): ; const char *thresh, ; int count ;) -global sym(vp8_mbloop_filter_vertical_edge_mmx) +global sym(vp8_mbloop_filter_vertical_edge_mmx) PRIVATE sym(vp8_mbloop_filter_vertical_edge_mmx): push rbp mov rbp, rsp @@ -1384,7 +1384,7 @@ sym(vp8_mbloop_filter_vertical_edge_mmx): ; int src_pixel_step, ; const char *blimit ;) -global sym(vp8_loop_filter_simple_horizontal_edge_mmx) +global sym(vp8_loop_filter_simple_horizontal_edge_mmx) PRIVATE sym(vp8_loop_filter_simple_horizontal_edge_mmx): push rbp mov rbp, rsp @@ -1500,7 +1500,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx): ; int src_pixel_step, ; const char *blimit ;) -global sym(vp8_loop_filter_simple_vertical_edge_mmx) +global sym(vp8_loop_filter_simple_vertical_edge_mmx) PRIVATE sym(vp8_loop_filter_simple_vertical_edge_mmx): push rbp mov rbp, rsp diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm index 9944c33..a66753b 100644 --- a/vp8/common/x86/loopfilter_sse2.asm +++ b/vp8/common/x86/loopfilter_sse2.asm @@ -286,7 +286,7 @@ ; const char *limit, ; const char *thresh, ;) -global sym(vp8_loop_filter_horizontal_edge_sse2) +global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE sym(vp8_loop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp @@ -334,7 +334,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2): ; const char *thresh, ; int count ;) -global sym(vp8_loop_filter_horizontal_edge_uv_sse2) +global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE sym(vp8_loop_filter_horizontal_edge_uv_sse2): push rbp mov rbp, rsp @@ -561,7 +561,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): ; const char *limit, ; const char *thresh, ;) -global sym(vp8_mbloop_filter_horizontal_edge_sse2) +global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE sym(vp8_mbloop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp @@ -607,7 +607,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): ; const char *thresh, ; unsigned char *v ;) -global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) +global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): push rbp mov rbp, rsp @@ -928,7 +928,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): ; const char *limit, ; const char *thresh, ;) -global sym(vp8_loop_filter_vertical_edge_sse2) +global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE sym(vp8_loop_filter_vertical_edge_sse2): push rbp mov rbp, rsp @@ -993,7 +993,7 @@ sym(vp8_loop_filter_vertical_edge_sse2): ; const char *thresh, ; unsigned char *v ;) -global sym(vp8_loop_filter_vertical_edge_uv_sse2) +global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE sym(vp8_loop_filter_vertical_edge_uv_sse2): push rbp mov rbp, rsp @@ -1142,7 +1142,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2): ; const char *limit, ; const char *thresh, ;) -global sym(vp8_mbloop_filter_vertical_edge_sse2) +global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE sym(vp8_mbloop_filter_vertical_edge_sse2): push rbp mov rbp, rsp @@ -1209,7 +1209,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): ; const char *thresh, ; unsigned char *v ;) -global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) +global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE sym(vp8_mbloop_filter_vertical_edge_uv_sse2): push rbp mov rbp, rsp @@ -1269,7 +1269,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2): ; int src_pixel_step, ; const char *blimit, ;) -global sym(vp8_loop_filter_simple_horizontal_edge_sse2) +global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE sym(vp8_loop_filter_simple_horizontal_edge_sse2): push rbp mov rbp, rsp @@ -1374,7 +1374,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): ; int src_pixel_step, ; const char *blimit, ;) -global sym(vp8_loop_filter_simple_vertical_edge_sse2) +global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE sym(vp8_loop_filter_simple_vertical_edge_sse2): push rbp ; save old base pointer value. mov rbp, rsp ; set new base pointer value. diff --git a/vp8/common/x86/mfqe_sse2.asm b/vp8/common/x86/mfqe_sse2.asm index 10d21f3..c1d2174 100644 --- a/vp8/common/x86/mfqe_sse2.asm +++ b/vp8/common/x86/mfqe_sse2.asm @@ -19,7 +19,7 @@ ; int dst_stride, ; int src_weight ;) -global sym(vp8_filter_by_weight16x16_sse2) +global sym(vp8_filter_by_weight16x16_sse2) PRIVATE sym(vp8_filter_by_weight16x16_sse2): push rbp mov rbp, rsp @@ -97,7 +97,7 @@ sym(vp8_filter_by_weight16x16_sse2): ; int dst_stride, ; int src_weight ;) -global sym(vp8_filter_by_weight8x8_sse2) +global sym(vp8_filter_by_weight8x8_sse2) PRIVATE sym(vp8_filter_by_weight8x8_sse2): push rbp mov rbp, rsp @@ -165,7 +165,7 @@ sym(vp8_filter_by_weight8x8_sse2): ; unsigned int *variance, 4 ; unsigned int *sad, 5 ;) -global sym(vp8_variance_and_sad_16x16_sse2) +global sym(vp8_variance_and_sad_16x16_sse2) PRIVATE sym(vp8_variance_and_sad_16x16_sse2): push rbp mov rbp, rsp diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm index d24f740..966c586 100644 --- a/vp8/common/x86/postproc_mmx.asm +++ b/vp8/common/x86/postproc_mmx.asm @@ -14,275 +14,10 @@ %define VP8_FILTER_WEIGHT 128 %define VP8_FILTER_SHIFT 7 -;void vp8_post_proc_down_and_across_mmx -;( -; unsigned char *src_ptr, -; unsigned char *dst_ptr, -; int src_pixels_per_line, -; int dst_pixels_per_line, -; int rows, -; int cols, -; int flimit -;) -global sym(vp8_post_proc_down_and_across_mmx) -sym(vp8_post_proc_down_and_across_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -%if ABI_IS_32BIT=1 && CONFIG_PIC=1 - ; move the global rd onto the stack, since we don't have enough registers - ; to do PIC addressing - movq mm0, [GLOBAL(rd)] - sub rsp, 8 - movq [rsp], mm0 -%define RD [rsp] -%else -%define RD [GLOBAL(rd)] -%endif - - push rbx - lea rbx, [GLOBAL(Blur)] - movd mm2, dword ptr arg(6) ;flimit - punpcklwd mm2, mm2 - punpckldq mm2, mm2 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(1) ;dst_ptr - - movsxd rcx, DWORD PTR arg(4) ;rows - movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? - pxor mm0, mm0 ; mm0 = 00000000 - -.nextrow: - - xor rdx, rdx ; clear out rdx for use as loop counter -.nextcol: - - pxor mm7, mm7 ; mm7 = 00000000 - movq mm6, [rbx + 32 ] ; mm6 = kernel 2 taps - movq mm3, [rsi] ; mm4 = r0 p0..p7 - punpcklbw mm3, mm0 ; mm3 = p0..p3 - movq mm1, mm3 ; mm1 = p0..p3 - pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers - - movq mm6, [rbx + 48] ; mm6 = kernel 3 taps - movq mm5, [rsi + rax] ; mm4 = r1 p0..p7 - punpcklbw mm5, mm0 ; mm5 = r1 p0..p3 - pmullw mm6, mm5 ; mm6 *= p0..p3 * kernel 3 modifiers - paddusw mm3, mm6 ; mm3 += mm6 - - ; thresholding - movq mm7, mm1 ; mm7 = r0 p0..p3 - psubusw mm7, mm5 ; mm7 = r0 p0..p3 - r1 p0..p3 - psubusw mm5, mm1 ; mm5 = r1 p0..p3 - r0 p0..p3 - paddusw mm7, mm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) - pcmpgtw mm7, mm2 - - movq mm6, [rbx + 64 ] ; mm6 = kernel 4 modifiers - movq mm5, [rsi + 2*rax] ; mm4 = r2 p0..p7 - punpcklbw mm5, mm0 ; mm5 = r2 p0..p3 - pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers - paddusw mm3, mm6 ; mm3 += mm5 - - ; thresholding - movq mm6, mm1 ; mm6 = r0 p0..p3 - psubusw mm6, mm5 ; mm6 = r0 p0..p3 - r2 p0..p3 - psubusw mm5, mm1 ; mm5 = r2 p0..p3 - r2 p0..p3 - paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) - pcmpgtw mm6, mm2 - por mm7, mm6 ; accumulate thresholds - - - neg rax - movq mm6, [rbx ] ; kernel 0 taps - movq mm5, [rsi+2*rax] ; mm4 = r-2 p0..p7 - punpcklbw mm5, mm0 ; mm5 = r-2 p0..p3 - pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers - paddusw mm3, mm6 ; mm3 += mm5 - - ; thresholding - movq mm6, mm1 ; mm6 = r0 p0..p3 - psubusw mm6, mm5 ; mm6 = p0..p3 - r-2 p0..p3 - psubusw mm5, mm1 ; mm5 = r-2 p0..p3 - p0..p3 - paddusw mm6, mm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) - pcmpgtw mm6, mm2 - por mm7, mm6 ; accumulate thresholds - - movq mm6, [rbx + 16] ; kernel 1 taps - movq mm4, [rsi+rax] ; mm4 = r-1 p0..p7 - punpcklbw mm4, mm0 ; mm4 = r-1 p0..p3 - pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers. - paddusw mm3, mm6 ; mm3 += mm5 - - ; thresholding - movq mm6, mm1 ; mm6 = r0 p0..p3 - psubusw mm6, mm4 ; mm6 = p0..p3 - r-2 p0..p3 - psubusw mm4, mm1 ; mm5 = r-1 p0..p3 - p0..p3 - paddusw mm6, mm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) - pcmpgtw mm6, mm2 - por mm7, mm6 ; accumulate thresholds - - - paddusw mm3, RD ; mm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 - - pand mm1, mm7 ; mm1 select vals > thresh from source - pandn mm7, mm3 ; mm7 select vals < thresh from blurred result - paddusw mm1, mm7 ; combination - - packuswb mm1, mm0 ; pack to bytes - - movd [rdi], mm1 ; - neg rax ; pitch is positive - - - add rsi, 4 - add rdi, 4 - add rdx, 4 - - cmp edx, dword ptr arg(5) ;cols - jl .nextcol - ; done with the all cols, start the across filtering in place - sub rsi, rdx - sub rdi, rdx - - ; dup the first byte into the left border 8 times - movq mm1, [rdi] - punpcklbw mm1, mm1 - punpcklwd mm1, mm1 - punpckldq mm1, mm1 - - mov rdx, -8 - movq [rdi+rdx], mm1 - - ; dup the last byte into the right border - movsxd rdx, dword arg(5) - movq mm1, [rdi + rdx + -1] - punpcklbw mm1, mm1 - punpcklwd mm1, mm1 - punpckldq mm1, mm1 - movq [rdi+rdx], mm1 - - - push rax - xor rdx, rdx - mov rax, [rdi-4]; - -.acrossnextcol: - pxor mm7, mm7 ; mm7 = 00000000 - movq mm6, [rbx + 32 ] ; - movq mm4, [rdi+rdx] ; mm4 = p0..p7 - movq mm3, mm4 ; mm3 = p0..p7 - punpcklbw mm3, mm0 ; mm3 = p0..p3 - movq mm1, mm3 ; mm1 = p0..p3 - pmullw mm3, mm6 ; mm3 *= kernel 2 modifiers - - movq mm6, [rbx + 48] - psrlq mm4, 8 ; mm4 = p1..p7 - movq mm5, mm4 ; mm5 = p1..p7 - punpcklbw mm5, mm0 ; mm5 = p1..p4 - pmullw mm6, mm5 ; mm6 *= p1..p4 * kernel 3 modifiers - paddusw mm3, mm6 ; mm3 += mm6 - - ; thresholding - movq mm7, mm1 ; mm7 = p0..p3 - psubusw mm7, mm5 ; mm7 = p0..p3 - p1..p4 - psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3 - paddusw mm7, mm5 ; mm7 = abs(p0..p3 - p1..p4) - pcmpgtw mm7, mm2 - - movq mm6, [rbx + 64 ] - psrlq mm4, 8 ; mm4 = p2..p7 - movq mm5, mm4 ; mm5 = p2..p7 - punpcklbw mm5, mm0 ; mm5 = p2..p5 - pmullw mm6, mm5 ; mm5 *= kernel 4 modifiers - paddusw mm3, mm6 ; mm3 += mm5 - - ; thresholding - movq mm6, mm1 ; mm6 = p0..p3 - psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4 - psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3 - paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw mm6, mm2 - por mm7, mm6 ; accumulate thresholds - - - movq mm6, [rbx ] - movq mm4, [rdi+rdx-2] ; mm4 = p-2..p5 - movq mm5, mm4 ; mm5 = p-2..p5 - punpcklbw mm5, mm0 ; mm5 = p-2..p1 - pmullw mm6, mm5 ; mm5 *= kernel 0 modifiers - paddusw mm3, mm6 ; mm3 += mm5 - - ; thresholding - movq mm6, mm1 ; mm6 = p0..p3 - psubusw mm6, mm5 ; mm6 = p0..p3 - p1..p4 - psubusw mm5, mm1 ; mm5 = p1..p4 - p0..p3 - paddusw mm6, mm5 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw mm6, mm2 - por mm7, mm6 ; accumulate thresholds - - movq mm6, [rbx + 16] - psrlq mm4, 8 ; mm4 = p-1..p5 - punpcklbw mm4, mm0 ; mm4 = p-1..p2 - pmullw mm6, mm4 ; mm4 *= kernel 1 modifiers. - paddusw mm3, mm6 ; mm3 += mm5 - - ; thresholding - movq mm6, mm1 ; mm6 = p0..p3 - psubusw mm6, mm4 ; mm6 = p0..p3 - p1..p4 - psubusw mm4, mm1 ; mm5 = p1..p4 - p0..p3 - paddusw mm6, mm4 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw mm6, mm2 - por mm7, mm6 ; accumulate thresholds - - paddusw mm3, RD ; mm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 - - pand mm1, mm7 ; mm1 select vals > thresh from source - pandn mm7, mm3 ; mm7 select vals < thresh from blurred result - paddusw mm1, mm7 ; combination - - packuswb mm1, mm0 ; pack to bytes - mov DWORD PTR [rdi+rdx-4], eax ; store previous four bytes - movd eax, mm1 - - add rdx, 4 - cmp edx, dword ptr arg(5) ;cols - jl .acrossnextcol; - - mov DWORD PTR [rdi+rdx-4], eax - pop rax - - ; done with this rwo - add rsi,rax ; next line - movsxd rax, dword ptr arg(3) ;dst_pixels_per_line ; destination pitch? - add rdi,rax ; next destination - movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; destination pitch? - - dec rcx ; decrement count - jnz .nextrow ; next row - pop rbx - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret -%undef RD - - ;void vp8_mbpost_proc_down_mmx(unsigned char *dst, ; int pitch, int rows, int cols,int flimit) extern sym(vp8_rv) -global sym(vp8_mbpost_proc_down_mmx) +global sym(vp8_mbpost_proc_down_mmx) PRIVATE sym(vp8_mbpost_proc_down_mmx): push rbp mov rbp, rsp @@ -510,7 +245,7 @@ sym(vp8_mbpost_proc_down_mmx): ; unsigned char bothclamp[16], ; unsigned int Width, unsigned int Height, int Pitch) extern sym(rand) -global sym(vp8_plane_add_noise_mmx) +global sym(vp8_plane_add_noise_mmx) PRIVATE sym(vp8_plane_add_noise_mmx): push rbp mov rbp, rsp diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm index 966aafd..00f84a3 100644 --- a/vp8/common/x86/postproc_sse2.asm +++ b/vp8/common/x86/postproc_sse2.asm @@ -11,146 +11,159 @@ %include "vpx_ports/x86_abi_support.asm" -;void vp8_post_proc_down_and_across_xmm +;macro in deblock functions +%macro FIRST_2_ROWS 0 + movdqa xmm4, xmm0 + movdqa xmm6, xmm0 + movdqa xmm5, xmm1 + pavgb xmm5, xmm3 + + ;calculate absolute value + psubusb xmm4, xmm1 + psubusb xmm1, xmm0 + psubusb xmm6, xmm3 + psubusb xmm3, xmm0 + paddusb xmm4, xmm1 + paddusb xmm6, xmm3 + + ;get threshold + movdqa xmm2, flimit + pxor xmm1, xmm1 + movdqa xmm7, xmm2 + + ;get mask + psubusb xmm2, xmm4 + psubusb xmm7, xmm6 + pcmpeqb xmm2, xmm1 + pcmpeqb xmm7, xmm1 + por xmm7, xmm2 +%endmacro + +%macro SECOND_2_ROWS 0 + movdqa xmm6, xmm0 + movdqa xmm4, xmm0 + movdqa xmm2, xmm1 + pavgb xmm1, xmm3 + + ;calculate absolute value + psubusb xmm6, xmm2 + psubusb xmm2, xmm0 + psubusb xmm4, xmm3 + psubusb xmm3, xmm0 + paddusb xmm6, xmm2 + paddusb xmm4, xmm3 + + pavgb xmm5, xmm1 + + ;get threshold + movdqa xmm2, flimit + pxor xmm1, xmm1 + movdqa xmm3, xmm2 + + ;get mask + psubusb xmm2, xmm6 + psubusb xmm3, xmm4 + pcmpeqb xmm2, xmm1 + pcmpeqb xmm3, xmm1 + + por xmm7, xmm2 + por xmm7, xmm3 + + pavgb xmm5, xmm0 + + ;decide if or not to use filtered value + pand xmm0, xmm7 + pandn xmm7, xmm5 + paddusb xmm0, xmm7 +%endmacro + +%macro UPDATE_FLIMIT 0 + movdqa xmm2, XMMWORD PTR [rbx] + movdqa [rsp], xmm2 + add rbx, 16 +%endmacro + +;void vp8_post_proc_down_and_across_mb_row_sse2 ;( ; unsigned char *src_ptr, ; unsigned char *dst_ptr, ; int src_pixels_per_line, ; int dst_pixels_per_line, -; int rows, ; int cols, -; int flimit +; int *flimits, +; int size ;) -global sym(vp8_post_proc_down_and_across_xmm) -sym(vp8_post_proc_down_and_across_xmm): +global sym(vp8_post_proc_down_and_across_mb_row_sse2) PRIVATE +sym(vp8_post_proc_down_and_across_mb_row_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 - GET_GOT rbx + push rbx push rsi push rdi ; end prolog - -%if ABI_IS_32BIT=1 && CONFIG_PIC=1 ALIGN_STACK 16, rax - ; move the global rd onto the stack, since we don't have enough registers - ; to do PIC addressing - movdqa xmm0, [GLOBAL(rd42)] sub rsp, 16 - movdqa [rsp], xmm0 -%define RD42 [rsp] -%else -%define RD42 [GLOBAL(rd42)] -%endif - - movd xmm2, dword ptr arg(6) ;flimit - punpcklwd xmm2, xmm2 - punpckldq xmm2, xmm2 - punpcklqdq xmm2, xmm2 + ; put flimit on stack + mov rbx, arg(5) ;flimits ptr + UPDATE_FLIMIT - mov rsi, arg(0) ;src_ptr - mov rdi, arg(1) ;dst_ptr +%define flimit [rsp] - movsxd rcx, DWORD PTR arg(4) ;rows - movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? - pxor xmm0, xmm0 ; mm0 = 00000000 + mov rsi, arg(0) ;src_ptr + mov rdi, arg(1) ;dst_ptr + movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line + movsxd rcx, DWORD PTR arg(6) ;rows in a macroblock .nextrow: - - xor rdx, rdx ; clear out rdx for use as loop counter + xor rdx, rdx ;col .nextcol: - movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7 - punpcklbw xmm3, xmm0 ; mm3 = p0..p3 - movdqa xmm1, xmm3 ; mm1 = p0..p3 - psllw xmm3, 2 ; - - movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7 - punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3 - paddusw xmm3, xmm5 ; mm3 += mm6 - - ; thresholding - movdqa xmm7, xmm1 ; mm7 = r0 p0..p3 - psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3 - psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3 - paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) - pcmpgtw xmm7, xmm2 - - movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7 - punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 - psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3 - psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds + ;load current and next 2 rows + movdqu xmm0, XMMWORD PTR [rsi] + movdqu xmm1, XMMWORD PTR [rsi + rax] + movdqu xmm3, XMMWORD PTR [rsi + 2*rax] + FIRST_2_ROWS + ;load above 2 rows neg rax - movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7 - punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 - psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3 - psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7 - punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3 - paddusw xmm3, xmm4 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 - psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3 - psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3 - paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - - paddusw xmm3, RD42 ; mm3 += round value - psraw xmm3, 3 ; mm3 /= 8 - - pand xmm1, xmm7 ; mm1 select vals > thresh from source - pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result - paddusw xmm1, xmm7 ; combination + movdqu xmm1, XMMWORD PTR [rsi + 2*rax] + movdqu xmm3, XMMWORD PTR [rsi + rax] - packuswb xmm1, xmm0 ; pack to bytes - movq QWORD PTR [rdi], xmm1 ; + SECOND_2_ROWS - neg rax ; pitch is positive - add rsi, 8 - add rdi, 8 + movdqu XMMWORD PTR [rdi], xmm0 - add rdx, 8 - cmp edx, dword arg(5) ;cols + neg rax ; positive stride + add rsi, 16 + add rdi, 16 - jl .nextcol + add rdx, 16 + cmp edx, dword arg(4) ;cols + jge .downdone + UPDATE_FLIMIT + jmp .nextcol +.downdone: ; done with the all cols, start the across filtering in place sub rsi, rdx sub rdi, rdx + mov rbx, arg(5) ; flimits + UPDATE_FLIMIT ; dup the first byte into the left border 8 times movq mm1, [rdi] punpcklbw mm1, mm1 punpcklwd mm1, mm1 punpckldq mm1, mm1 - mov rdx, -8 movq [rdi+rdx], mm1 ; dup the last byte into the right border - movsxd rdx, dword arg(5) + movsxd rdx, dword arg(4) movq mm1, [rdi + rdx + -1] punpcklbw mm1, mm1 punpcklwd mm1, mm1 @@ -158,118 +171,69 @@ sym(vp8_post_proc_down_and_across_xmm): movq [rdi+rdx], mm1 xor rdx, rdx - movq mm0, QWORD PTR [rdi-8]; + movq mm0, QWORD PTR [rdi-16]; + movq mm1, QWORD PTR [rdi-8]; .acrossnextcol: - movq xmm7, QWORD PTR [rdi +rdx -2] - movd xmm4, DWORD PTR [rdi +rdx +6] - - pslldq xmm4, 8 - por xmm4, xmm7 - - movdqa xmm3, xmm4 - psrldq xmm3, 2 - punpcklbw xmm3, xmm0 ; mm3 = p0..p3 - movdqa xmm1, xmm3 ; mm1 = p0..p3 - psllw xmm3, 2 - - - movdqa xmm5, xmm4 - psrldq xmm5, 3 - punpcklbw xmm5, xmm0 ; mm5 = p1..p4 - paddusw xmm3, xmm5 ; mm3 += mm6 - - ; thresholding - movdqa xmm7, xmm1 ; mm7 = p0..p3 - psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4 - psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4) - pcmpgtw xmm7, xmm2 - - movdqa xmm5, xmm4 - psrldq xmm5, 4 - punpcklbw xmm5, xmm0 ; mm5 = p2..p5 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = p0..p3 - psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 - psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - - movdqa xmm5, xmm4 ; mm5 = p-2..p5 - punpcklbw xmm5, xmm0 ; mm5 = p-2..p1 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = p0..p3 - psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 - psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - psrldq xmm4, 1 ; mm4 = p-1..p5 - punpcklbw xmm4, xmm0 ; mm4 = p-1..p2 - paddusw xmm3, xmm4 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = p0..p3 - psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4 - psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - paddusw xmm3, RD42 ; mm3 += round value - psraw xmm3, 3 ; mm3 /= 8 - - pand xmm1, xmm7 ; mm1 select vals > thresh from source - pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result - paddusw xmm1, xmm7 ; combination - - packuswb xmm1, xmm0 ; pack to bytes - movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes - movdq2q mm0, xmm1 - - add rdx, 8 - cmp edx, dword arg(5) ;cols - jl .acrossnextcol; - - ; last 8 pixels - movq QWORD PTR [rdi+rdx-8], mm0 + movdqu xmm0, XMMWORD PTR [rdi + rdx] + movdqu xmm1, XMMWORD PTR [rdi + rdx -2] + movdqu xmm3, XMMWORD PTR [rdi + rdx -1] + + FIRST_2_ROWS + + movdqu xmm1, XMMWORD PTR [rdi + rdx +1] + movdqu xmm3, XMMWORD PTR [rdi + rdx +2] + + SECOND_2_ROWS + + movq QWORD PTR [rdi+rdx-16], mm0 ; store previous 8 bytes + movq QWORD PTR [rdi+rdx-8], mm1 ; store previous 8 bytes + movdq2q mm0, xmm0 + psrldq xmm0, 8 + movdq2q mm1, xmm0 + + add rdx, 16 + cmp edx, dword arg(4) ;cols + jge .acrossdone + UPDATE_FLIMIT + jmp .acrossnextcol +.acrossdone + ; last 16 pixels + movq QWORD PTR [rdi+rdx-16], mm0 + + cmp edx, dword arg(4) + jne .throw_last_8 + movq QWORD PTR [rdi+rdx-8], mm1 +.throw_last_8: ; done with this rwo - add rsi,rax ; next line - mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch? - add rdi,rax ; next destination - mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch? + add rsi,rax ;next src line + mov eax, dword arg(3) ;dst_pixels_per_line + add rdi,rax ;next destination + mov eax, dword arg(2) ;src_pixels_per_line - dec rcx ; decrement count - jnz .nextrow ; next row + mov rbx, arg(5) ;flimits + UPDATE_FLIMIT -%if ABI_IS_32BIT=1 && CONFIG_PIC=1 - add rsp,16 + dec rcx ;decrement count + jnz .nextrow ;next row + + add rsp, 16 pop rsp -%endif ; begin epilog pop rdi pop rsi - RESTORE_GOT + pop rbx RESTORE_XMM UNSHADOW_ARGS pop rbp ret -%undef RD42 - +%undef flimit ;void vp8_mbpost_proc_down_xmm(unsigned char *dst, ; int pitch, int rows, int cols,int flimit) extern sym(vp8_rv) -global sym(vp8_mbpost_proc_down_xmm) +global sym(vp8_mbpost_proc_down_xmm) PRIVATE sym(vp8_mbpost_proc_down_xmm): push rbp mov rbp, rsp @@ -497,7 +461,7 @@ sym(vp8_mbpost_proc_down_xmm): ;void vp8_mbpost_proc_across_ip_xmm(unsigned char *src, ; int pitch, int rows, int cols,int flimit) -global sym(vp8_mbpost_proc_across_ip_xmm) +global sym(vp8_mbpost_proc_across_ip_xmm) PRIVATE sym(vp8_mbpost_proc_across_ip_xmm): push rbp mov rbp, rsp @@ -694,7 +658,7 @@ sym(vp8_mbpost_proc_across_ip_xmm): ; unsigned char bothclamp[16], ; unsigned int Width, unsigned int Height, int Pitch) extern sym(rand) -global sym(vp8_plane_add_noise_wmt) +global sym(vp8_plane_add_noise_wmt) PRIVATE sym(vp8_plane_add_noise_wmt): push rbp mov rbp, rsp @@ -753,7 +717,5 @@ sym(vp8_plane_add_noise_wmt): SECTION_RODATA align 16 -rd42: - times 8 dw 0x04 four8s: times 4 dd 8 diff --git a/vp8/common/x86/postproc_x86.c b/vp8/common/x86/postproc_x86.c index a25921b..3ec0106 100644 --- a/vp8/common/x86/postproc_x86.c +++ b/vp8/common/x86/postproc_x86.c @@ -18,4 +18,7 @@ extern int rand(void) { return __rand(); } +#else +/* ISO C forbids an empty translation unit. */ +int vp8_unused; #endif diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm index 19c0faf..15e9871 100644 --- a/vp8/common/x86/recon_mmx.asm +++ b/vp8/common/x86/recon_mmx.asm @@ -18,7 +18,7 @@ ; unsigned char *dst, ; int dst_stride ; ) -global sym(vp8_copy_mem8x8_mmx) +global sym(vp8_copy_mem8x8_mmx) PRIVATE sym(vp8_copy_mem8x8_mmx): push rbp mov rbp, rsp @@ -81,7 +81,7 @@ sym(vp8_copy_mem8x8_mmx): ; unsigned char *dst, ; int dst_stride ; ) -global sym(vp8_copy_mem8x4_mmx) +global sym(vp8_copy_mem8x4_mmx) PRIVATE sym(vp8_copy_mem8x4_mmx): push rbp mov rbp, rsp @@ -125,7 +125,7 @@ sym(vp8_copy_mem8x4_mmx): ; unsigned char *dst, ; int dst_stride ; ) -global sym(vp8_copy_mem16x16_mmx) +global sym(vp8_copy_mem16x16_mmx) PRIVATE sym(vp8_copy_mem16x16_mmx): push rbp mov rbp, rsp diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm index 7b6e3cf..1434bcd 100644 --- a/vp8/common/x86/recon_sse2.asm +++ b/vp8/common/x86/recon_sse2.asm @@ -17,7 +17,7 @@ ; unsigned char *dst, ; int dst_stride ; ) -global sym(vp8_copy_mem16x16_sse2) +global sym(vp8_copy_mem16x16_sse2) PRIVATE sym(vp8_copy_mem16x16_sse2): push rbp mov rbp, rsp @@ -123,7 +123,7 @@ sym(vp8_copy_mem16x16_sse2): ; unsigned char *left, ; int left_stride, ; ) -global sym(vp8_intra_pred_uv_dc_mmx2) +global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE sym(vp8_intra_pred_uv_dc_mmx2): push rbp mov rbp, rsp @@ -196,7 +196,7 @@ sym(vp8_intra_pred_uv_dc_mmx2): ; unsigned char *left, ; int left_stride, ; ) -global sym(vp8_intra_pred_uv_dctop_mmx2) +global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE sym(vp8_intra_pred_uv_dctop_mmx2): push rbp mov rbp, rsp @@ -250,7 +250,7 @@ sym(vp8_intra_pred_uv_dctop_mmx2): ; unsigned char *left, ; int left_stride, ; ) -global sym(vp8_intra_pred_uv_dcleft_mmx2) +global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE sym(vp8_intra_pred_uv_dcleft_mmx2): push rbp mov rbp, rsp @@ -317,7 +317,7 @@ sym(vp8_intra_pred_uv_dcleft_mmx2): ; unsigned char *left, ; int left_stride, ; ) -global sym(vp8_intra_pred_uv_dc128_mmx) +global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE sym(vp8_intra_pred_uv_dc128_mmx): push rbp mov rbp, rsp @@ -357,7 +357,7 @@ sym(vp8_intra_pred_uv_dc128_mmx): ; int left_stride, ; ) %macro vp8_intra_pred_uv_tm 1 -global sym(vp8_intra_pred_uv_tm_%1) +global sym(vp8_intra_pred_uv_tm_%1) PRIVATE sym(vp8_intra_pred_uv_tm_%1): push rbp mov rbp, rsp @@ -437,7 +437,7 @@ vp8_intra_pred_uv_tm ssse3 ; unsigned char *left, ; int left_stride, ; ) -global sym(vp8_intra_pred_uv_ve_mmx) +global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE sym(vp8_intra_pred_uv_ve_mmx): push rbp mov rbp, rsp @@ -479,7 +479,7 @@ sym(vp8_intra_pred_uv_ve_mmx): ; int left_stride ; ) %macro vp8_intra_pred_uv_ho 1 -global sym(vp8_intra_pred_uv_ho_%1) +global sym(vp8_intra_pred_uv_ho_%1) PRIVATE sym(vp8_intra_pred_uv_ho_%1): push rbp mov rbp, rsp @@ -577,7 +577,7 @@ vp8_intra_pred_uv_ho ssse3 ; unsigned char *left, ; int left_stride ; ) -global sym(vp8_intra_pred_y_dc_sse2) +global sym(vp8_intra_pred_y_dc_sse2) PRIVATE sym(vp8_intra_pred_y_dc_sse2): push rbp mov rbp, rsp @@ -683,7 +683,7 @@ sym(vp8_intra_pred_y_dc_sse2): ; unsigned char *left, ; int left_stride ; ) -global sym(vp8_intra_pred_y_dctop_sse2) +global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE sym(vp8_intra_pred_y_dctop_sse2): push rbp mov rbp, rsp @@ -745,7 +745,7 @@ sym(vp8_intra_pred_y_dctop_sse2): ; unsigned char *left, ; int left_stride ; ) -global sym(vp8_intra_pred_y_dcleft_sse2) +global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE sym(vp8_intra_pred_y_dcleft_sse2): push rbp mov rbp, rsp @@ -838,7 +838,7 @@ sym(vp8_intra_pred_y_dcleft_sse2): ; unsigned char *left, ; int left_stride ; ) -global sym(vp8_intra_pred_y_dc128_sse2) +global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE sym(vp8_intra_pred_y_dc128_sse2): push rbp mov rbp, rsp @@ -885,11 +885,12 @@ sym(vp8_intra_pred_y_dc128_sse2): ; int left_stride ; ) %macro vp8_intra_pred_y_tm 1 -global sym(vp8_intra_pred_y_tm_%1) +global sym(vp8_intra_pred_y_tm_%1) PRIVATE sym(vp8_intra_pred_y_tm_%1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 + SAVE_XMM 7 push rsi push rdi GET_GOT rbx @@ -957,6 +958,7 @@ vp8_intra_pred_y_tm_%1_loop: RESTORE_GOT pop rdi pop rsi + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -972,7 +974,7 @@ vp8_intra_pred_y_tm ssse3 ; unsigned char *left, ; int left_stride ; ) -global sym(vp8_intra_pred_y_ve_sse2) +global sym(vp8_intra_pred_y_ve_sse2) PRIVATE sym(vp8_intra_pred_y_ve_sse2): push rbp mov rbp, rsp @@ -1020,7 +1022,7 @@ sym(vp8_intra_pred_y_ve_sse2): ; unsigned char *left, ; int left_stride, ; ) -global sym(vp8_intra_pred_y_ho_sse2) +global sym(vp8_intra_pred_y_ho_sse2) PRIVATE sym(vp8_intra_pred_y_ho_sse2): push rbp mov rbp, rsp diff --git a/vp8/common/x86/sad_mmx.asm b/vp8/common/x86/sad_mmx.asm index 407b399..592112f 100644 --- a/vp8/common/x86/sad_mmx.asm +++ b/vp8/common/x86/sad_mmx.asm @@ -11,11 +11,11 @@ %include "vpx_ports/x86_abi_support.asm" -global sym(vp8_sad16x16_mmx) -global sym(vp8_sad8x16_mmx) -global sym(vp8_sad8x8_mmx) -global sym(vp8_sad4x4_mmx) -global sym(vp8_sad16x8_mmx) +global sym(vp8_sad16x16_mmx) PRIVATE +global sym(vp8_sad8x16_mmx) PRIVATE +global sym(vp8_sad8x8_mmx) PRIVATE +global sym(vp8_sad4x4_mmx) PRIVATE +global sym(vp8_sad16x8_mmx) PRIVATE ;unsigned int vp8_sad16x16_mmx( ; unsigned char *src_ptr, diff --git a/vp8/common/x86/sad_sse2.asm b/vp8/common/x86/sad_sse2.asm index 0b01d7b..8d86abc 100644 --- a/vp8/common/x86/sad_sse2.asm +++ b/vp8/common/x86/sad_sse2.asm @@ -16,7 +16,7 @@ ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride) -global sym(vp8_sad16x16_wmt) +global sym(vp8_sad16x16_wmt) PRIVATE sym(vp8_sad16x16_wmt): push rbp mov rbp, rsp @@ -90,7 +90,7 @@ sym(vp8_sad16x16_wmt): ; unsigned char *ref_ptr, ; int ref_stride, ; int max_sad) -global sym(vp8_sad8x16_wmt) +global sym(vp8_sad8x16_wmt) PRIVATE sym(vp8_sad8x16_wmt): push rbp mov rbp, rsp @@ -115,7 +115,7 @@ sym(vp8_sad8x16_wmt): movq rax, mm7 cmp eax, arg(4) - jg .x8x16sad_wmt_early_exit + ja .x8x16sad_wmt_early_exit movq mm0, QWORD PTR [rsi] movq mm1, QWORD PTR [rdi] @@ -153,7 +153,7 @@ sym(vp8_sad8x16_wmt): ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride) -global sym(vp8_sad8x8_wmt) +global sym(vp8_sad8x8_wmt) PRIVATE sym(vp8_sad8x8_wmt): push rbp mov rbp, rsp @@ -176,7 +176,7 @@ sym(vp8_sad8x8_wmt): movq rax, mm7 cmp eax, arg(4) - jg .x8x8sad_wmt_early_exit + ja .x8x8sad_wmt_early_exit movq mm0, QWORD PTR [rsi] movq mm1, QWORD PTR [rdi] @@ -206,7 +206,7 @@ sym(vp8_sad8x8_wmt): ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride) -global sym(vp8_sad4x4_wmt) +global sym(vp8_sad4x4_wmt) PRIVATE sym(vp8_sad4x4_wmt): push rbp mov rbp, rsp @@ -261,7 +261,7 @@ sym(vp8_sad4x4_wmt): ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride) -global sym(vp8_sad16x8_wmt) +global sym(vp8_sad16x8_wmt) PRIVATE sym(vp8_sad16x8_wmt): push rbp mov rbp, rsp @@ -285,7 +285,7 @@ sym(vp8_sad16x8_wmt): movq rax, mm7 cmp eax, arg(4) - jg .x16x8sad_wmt_early_exit + ja .x16x8sad_wmt_early_exit movq mm0, QWORD PTR [rsi] movq mm2, QWORD PTR [rsi+8] @@ -335,7 +335,7 @@ sym(vp8_sad16x8_wmt): ; unsigned char *dst_ptr, ; int dst_stride, ; int height); -global sym(vp8_copy32xn_sse2) +global sym(vp8_copy32xn_sse2) PRIVATE sym(vp8_copy32xn_sse2): push rbp mov rbp, rsp diff --git a/vp8/common/x86/sad_sse3.asm b/vp8/common/x86/sad_sse3.asm index c2af3c8..f90a589 100644 --- a/vp8/common/x86/sad_sse3.asm +++ b/vp8/common/x86/sad_sse3.asm @@ -380,7 +380,7 @@ ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad16x16x3_sse3) +global sym(vp8_sad16x16x3_sse3) PRIVATE sym(vp8_sad16x16x3_sse3): STACK_FRAME_CREATE_X3 @@ -422,7 +422,7 @@ sym(vp8_sad16x16x3_sse3): ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad16x8x3_sse3) +global sym(vp8_sad16x8x3_sse3) PRIVATE sym(vp8_sad16x8x3_sse3): STACK_FRAME_CREATE_X3 @@ -460,7 +460,7 @@ sym(vp8_sad16x8x3_sse3): ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad8x16x3_sse3) +global sym(vp8_sad8x16x3_sse3) PRIVATE sym(vp8_sad8x16x3_sse3): STACK_FRAME_CREATE_X3 @@ -489,7 +489,7 @@ sym(vp8_sad8x16x3_sse3): ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad8x8x3_sse3) +global sym(vp8_sad8x8x3_sse3) PRIVATE sym(vp8_sad8x8x3_sse3): STACK_FRAME_CREATE_X3 @@ -514,7 +514,7 @@ sym(vp8_sad8x8x3_sse3): ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad4x4x3_sse3) +global sym(vp8_sad4x4x3_sse3) PRIVATE sym(vp8_sad4x4x3_sse3): STACK_FRAME_CREATE_X3 @@ -589,7 +589,7 @@ sym(vp8_sad4x4x3_sse3): ; int ref_stride, ; int max_sad) ;%define lddqu movdqu -global sym(vp8_sad16x16_sse3) +global sym(vp8_sad16x16_sse3) PRIVATE sym(vp8_sad16x16_sse3): STACK_FRAME_CREATE_X3 @@ -642,7 +642,7 @@ sym(vp8_sad16x16_sse3): ; unsigned char *dst_ptr, ; int dst_stride, ; int height); -global sym(vp8_copy32xn_sse3) +global sym(vp8_copy32xn_sse3) PRIVATE sym(vp8_copy32xn_sse3): STACK_FRAME_CREATE_X3 @@ -703,7 +703,7 @@ sym(vp8_copy32xn_sse3): ; unsigned char *ref_ptr_base, ; int ref_stride, ; int *results) -global sym(vp8_sad16x16x4d_sse3) +global sym(vp8_sad16x16x4d_sse3) PRIVATE sym(vp8_sad16x16x4d_sse3): STACK_FRAME_CREATE_X4 @@ -754,7 +754,7 @@ sym(vp8_sad16x16x4d_sse3): ; unsigned char *ref_ptr_base, ; int ref_stride, ; int *results) -global sym(vp8_sad16x8x4d_sse3) +global sym(vp8_sad16x8x4d_sse3) PRIVATE sym(vp8_sad16x8x4d_sse3): STACK_FRAME_CREATE_X4 @@ -801,7 +801,7 @@ sym(vp8_sad16x8x4d_sse3): ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad8x16x4d_sse3) +global sym(vp8_sad8x16x4d_sse3) PRIVATE sym(vp8_sad8x16x4d_sse3): STACK_FRAME_CREATE_X4 @@ -834,7 +834,7 @@ sym(vp8_sad8x16x4d_sse3): ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad8x8x4d_sse3) +global sym(vp8_sad8x8x4d_sse3) PRIVATE sym(vp8_sad8x8x4d_sse3): STACK_FRAME_CREATE_X4 @@ -863,7 +863,7 @@ sym(vp8_sad8x8x4d_sse3): ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad4x4x4d_sse3) +global sym(vp8_sad4x4x4d_sse3) PRIVATE sym(vp8_sad4x4x4d_sse3): STACK_FRAME_CREATE_X4 diff --git a/vp8/common/x86/sad_sse4.asm b/vp8/common/x86/sad_sse4.asm index 03ecec4..f7fccd7 100644 --- a/vp8/common/x86/sad_sse4.asm +++ b/vp8/common/x86/sad_sse4.asm @@ -161,7 +161,7 @@ ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array); -global sym(vp8_sad16x16x8_sse4) +global sym(vp8_sad16x16x8_sse4) PRIVATE sym(vp8_sad16x16x8_sse4): push rbp mov rbp, rsp @@ -203,7 +203,7 @@ sym(vp8_sad16x16x8_sse4): ; int ref_stride, ; unsigned short *sad_array ;); -global sym(vp8_sad16x8x8_sse4) +global sym(vp8_sad16x8x8_sse4) PRIVATE sym(vp8_sad16x8x8_sse4): push rbp mov rbp, rsp @@ -241,7 +241,7 @@ sym(vp8_sad16x8x8_sse4): ; int ref_stride, ; unsigned short *sad_array ;); -global sym(vp8_sad8x8x8_sse4) +global sym(vp8_sad8x8x8_sse4) PRIVATE sym(vp8_sad8x8x8_sse4): push rbp mov rbp, rsp @@ -279,7 +279,7 @@ sym(vp8_sad8x8x8_sse4): ; int ref_stride, ; unsigned short *sad_array ;); -global sym(vp8_sad8x16x8_sse4) +global sym(vp8_sad8x16x8_sse4) PRIVATE sym(vp8_sad8x16x8_sse4): push rbp mov rbp, rsp @@ -320,7 +320,7 @@ sym(vp8_sad8x16x8_sse4): ; int ref_stride, ; unsigned short *sad_array ;); -global sym(vp8_sad4x4x8_sse4) +global sym(vp8_sad4x4x8_sse4) PRIVATE sym(vp8_sad4x4x8_sse4): push rbp mov rbp, rsp diff --git a/vp8/common/x86/sad_ssse3.asm b/vp8/common/x86/sad_ssse3.asm index 95b6c89..278fc06 100644 --- a/vp8/common/x86/sad_ssse3.asm +++ b/vp8/common/x86/sad_ssse3.asm @@ -152,7 +152,7 @@ ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad16x16x3_ssse3) +global sym(vp8_sad16x16x3_ssse3) PRIVATE sym(vp8_sad16x16x3_ssse3): push rbp mov rbp, rsp @@ -265,7 +265,7 @@ sym(vp8_sad16x16x3_ssse3): ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) -global sym(vp8_sad16x8x3_ssse3) +global sym(vp8_sad16x8x3_ssse3) PRIVATE sym(vp8_sad16x8x3_ssse3): push rbp mov rbp, rsp diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm index 5528fd0..47dd452 100644 --- a/vp8/common/x86/subpixel_mmx.asm +++ b/vp8/common/x86/subpixel_mmx.asm @@ -28,7 +28,7 @@ extern sym(vp8_bilinear_filters_x86_8) ; unsigned int output_width, ; short * vp8_filter ;) -global sym(vp8_filter_block1d_h6_mmx) +global sym(vp8_filter_block1d_h6_mmx) PRIVATE sym(vp8_filter_block1d_h6_mmx): push rbp mov rbp, rsp @@ -125,7 +125,7 @@ sym(vp8_filter_block1d_h6_mmx): ; unsigned int output_width, ; short * vp8_filter ;) -global sym(vp8_filter_block1dc_v6_mmx) +global sym(vp8_filter_block1dc_v6_mmx) PRIVATE sym(vp8_filter_block1dc_v6_mmx): push rbp mov rbp, rsp @@ -213,7 +213,7 @@ sym(vp8_filter_block1dc_v6_mmx): ; unsigned char *dst_ptr, ; int dst_pitch ;) -global sym(vp8_bilinear_predict8x8_mmx) +global sym(vp8_bilinear_predict8x8_mmx) PRIVATE sym(vp8_bilinear_predict8x8_mmx): push rbp mov rbp, rsp @@ -370,7 +370,7 @@ sym(vp8_bilinear_predict8x8_mmx): ; unsigned char *dst_ptr, ; int dst_pitch ;) -global sym(vp8_bilinear_predict8x4_mmx) +global sym(vp8_bilinear_predict8x4_mmx) PRIVATE sym(vp8_bilinear_predict8x4_mmx): push rbp mov rbp, rsp @@ -525,7 +525,7 @@ sym(vp8_bilinear_predict8x4_mmx): ; unsigned char *dst_ptr, ; int dst_pitch ;) -global sym(vp8_bilinear_predict4x4_mmx) +global sym(vp8_bilinear_predict4x4_mmx) PRIVATE sym(vp8_bilinear_predict4x4_mmx): push rbp mov rbp, rsp diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm index cb550af..69f8d10 100644 --- a/vp8/common/x86/subpixel_sse2.asm +++ b/vp8/common/x86/subpixel_sse2.asm @@ -33,7 +33,7 @@ extern sym(vp8_bilinear_filters_x86_8) ; unsigned int output_width, ; short *vp8_filter ;) -global sym(vp8_filter_block1d8_h6_sse2) +global sym(vp8_filter_block1d8_h6_sse2) PRIVATE sym(vp8_filter_block1d8_h6_sse2): push rbp mov rbp, rsp @@ -153,7 +153,7 @@ sym(vp8_filter_block1d8_h6_sse2): ; even number. This function handles 8 pixels in horizontal direction, calculating ONE ; rows each iteration to take advantage of the 128 bits operations. ;*************************************************************************************/ -global sym(vp8_filter_block1d16_h6_sse2) +global sym(vp8_filter_block1d16_h6_sse2) PRIVATE sym(vp8_filter_block1d16_h6_sse2): push rbp mov rbp, rsp @@ -329,7 +329,7 @@ sym(vp8_filter_block1d16_h6_sse2): ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The ; input pixel array has output_height rows. ;*************************************************************************************/ -global sym(vp8_filter_block1d8_v6_sse2) +global sym(vp8_filter_block1d8_v6_sse2) PRIVATE sym(vp8_filter_block1d8_v6_sse2): push rbp mov rbp, rsp @@ -424,7 +424,7 @@ sym(vp8_filter_block1d8_v6_sse2): ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The ; input pixel array has output_height rows. ;*************************************************************************************/ -global sym(vp8_filter_block1d16_v6_sse2) +global sym(vp8_filter_block1d16_v6_sse2) PRIVATE sym(vp8_filter_block1d16_v6_sse2): push rbp mov rbp, rsp @@ -534,7 +534,7 @@ sym(vp8_filter_block1d16_v6_sse2): ; const short *vp8_filter ;) ; First-pass filter only when yoffset==0 -global sym(vp8_filter_block1d8_h6_only_sse2) +global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE sym(vp8_filter_block1d8_h6_only_sse2): push rbp mov rbp, rsp @@ -647,7 +647,7 @@ sym(vp8_filter_block1d8_h6_only_sse2): ; const short *vp8_filter ;) ; First-pass filter only when yoffset==0 -global sym(vp8_filter_block1d16_h6_only_sse2) +global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE sym(vp8_filter_block1d16_h6_only_sse2): push rbp mov rbp, rsp @@ -812,7 +812,7 @@ sym(vp8_filter_block1d16_h6_only_sse2): ; const short *vp8_filter ;) ; Second-pass filter only when xoffset==0 -global sym(vp8_filter_block1d8_v6_only_sse2) +global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE sym(vp8_filter_block1d8_v6_only_sse2): push rbp mov rbp, rsp @@ -904,7 +904,7 @@ sym(vp8_filter_block1d8_v6_only_sse2): ; unsigned int output_height, ; unsigned int output_width ;) -global sym(vp8_unpack_block1d16_h6_sse2) +global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE sym(vp8_unpack_block1d16_h6_sse2): push rbp mov rbp, rsp @@ -963,7 +963,7 @@ sym(vp8_unpack_block1d16_h6_sse2): ; int dst_pitch ;) extern sym(vp8_bilinear_filters_x86_8) -global sym(vp8_bilinear_predict16x16_sse2) +global sym(vp8_bilinear_predict16x16_sse2) PRIVATE sym(vp8_bilinear_predict16x16_sse2): push rbp mov rbp, rsp @@ -1231,7 +1231,7 @@ sym(vp8_bilinear_predict16x16_sse2): ; unsigned char *dst_ptr, ; int dst_pitch ;) -global sym(vp8_bilinear_predict8x8_sse2) +global sym(vp8_bilinear_predict8x8_sse2) PRIVATE sym(vp8_bilinear_predict8x8_sse2): push rbp mov rbp, rsp diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm index 6bca82b..c06f245 100644 --- a/vp8/common/x86/subpixel_ssse3.asm +++ b/vp8/common/x86/subpixel_ssse3.asm @@ -34,7 +34,7 @@ ; unsigned int output_height, ; unsigned int vp8_filter_index ;) -global sym(vp8_filter_block1d8_h6_ssse3) +global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE sym(vp8_filter_block1d8_h6_ssse3): push rbp mov rbp, rsp @@ -177,7 +177,7 @@ vp8_filter_block1d8_h4_ssse3: ; unsigned int output_height, ; unsigned int vp8_filter_index ;) -global sym(vp8_filter_block1d16_h6_ssse3) +global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE sym(vp8_filter_block1d16_h6_ssse3): push rbp mov rbp, rsp @@ -284,7 +284,7 @@ sym(vp8_filter_block1d16_h6_ssse3): ; unsigned int output_height, ; unsigned int vp8_filter_index ;) -global sym(vp8_filter_block1d4_h6_ssse3) +global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE sym(vp8_filter_block1d4_h6_ssse3): push rbp mov rbp, rsp @@ -352,6 +352,7 @@ sym(vp8_filter_block1d4_h6_ssse3): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -413,7 +414,7 @@ sym(vp8_filter_block1d4_h6_ssse3): ; unsigned int output_height, ; unsigned int vp8_filter_index ;) -global sym(vp8_filter_block1d16_v6_ssse3) +global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE sym(vp8_filter_block1d16_v6_ssse3): push rbp mov rbp, rsp @@ -601,7 +602,7 @@ sym(vp8_filter_block1d16_v6_ssse3): ; unsigned int output_height, ; unsigned int vp8_filter_index ;) -global sym(vp8_filter_block1d8_v6_ssse3) +global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE sym(vp8_filter_block1d8_v6_ssse3): push rbp mov rbp, rsp @@ -741,7 +742,7 @@ sym(vp8_filter_block1d8_v6_ssse3): ; unsigned int output_height, ; unsigned int vp8_filter_index ;) -global sym(vp8_filter_block1d4_v6_ssse3) +global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE sym(vp8_filter_block1d4_v6_ssse3): push rbp mov rbp, rsp @@ -880,7 +881,7 @@ sym(vp8_filter_block1d4_v6_ssse3): ; unsigned char *dst_ptr, ; int dst_pitch ;) -global sym(vp8_bilinear_predict16x16_ssse3) +global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE sym(vp8_bilinear_predict16x16_ssse3): push rbp mov rbp, rsp @@ -1143,7 +1144,7 @@ sym(vp8_bilinear_predict16x16_ssse3): ; unsigned char *dst_ptr, ; int dst_pitch ;) -global sym(vp8_bilinear_predict8x8_ssse3) +global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE sym(vp8_bilinear_predict8x8_ssse3): push rbp mov rbp, rsp diff --git a/vp8/common/x86/variance_impl_mmx.asm b/vp8/common/x86/variance_impl_mmx.asm index 2be8bbe..d9120d0 100644 --- a/vp8/common/x86/variance_impl_mmx.asm +++ b/vp8/common/x86/variance_impl_mmx.asm @@ -12,7 +12,7 @@ %include "vpx_ports/x86_abi_support.asm" ;unsigned int vp8_get_mb_ss_mmx( short *src_ptr ) -global sym(vp8_get_mb_ss_mmx) +global sym(vp8_get_mb_ss_mmx) PRIVATE sym(vp8_get_mb_ss_mmx): push rbp mov rbp, rsp @@ -72,7 +72,7 @@ sym(vp8_get_mb_ss_mmx): ; unsigned int *SSE, ; int *Sum ;) -global sym(vp8_get8x8var_mmx) +global sym(vp8_get8x8var_mmx) PRIVATE sym(vp8_get8x8var_mmx): push rbp mov rbp, rsp @@ -320,7 +320,7 @@ sym(vp8_get8x8var_mmx): ; unsigned int *SSE, ; int *Sum ;) -global sym(vp8_get4x4var_mmx) +global sym(vp8_get4x4var_mmx) PRIVATE sym(vp8_get4x4var_mmx): push rbp mov rbp, rsp @@ -433,7 +433,7 @@ sym(vp8_get4x4var_mmx): ; unsigned char *ref_ptr, ; int recon_stride ;) -global sym(vp8_get4x4sse_cs_mmx) +global sym(vp8_get4x4sse_cs_mmx) PRIVATE sym(vp8_get4x4sse_cs_mmx): push rbp mov rbp, rsp @@ -522,7 +522,7 @@ sym(vp8_get4x4sse_cs_mmx): ; int *sum, ; unsigned int *sumsquared ;) -global sym(vp8_filter_block2d_bil4x4_var_mmx) +global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE sym(vp8_filter_block2d_bil4x4_var_mmx): push rbp mov rbp, rsp @@ -667,7 +667,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx): ; int *sum, ; unsigned int *sumsquared ;) -global sym(vp8_filter_block2d_bil_var_mmx) +global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE sym(vp8_filter_block2d_bil_var_mmx): push rbp mov rbp, rsp diff --git a/vp8/common/x86/variance_impl_sse2.asm b/vp8/common/x86/variance_impl_sse2.asm index 7629220..761433c 100644 --- a/vp8/common/x86/variance_impl_sse2.asm +++ b/vp8/common/x86/variance_impl_sse2.asm @@ -17,7 +17,7 @@ ;( ; short *src_ptr ;) -global sym(vp8_get_mb_ss_sse2) +global sym(vp8_get_mb_ss_sse2) PRIVATE sym(vp8_get_mb_ss_sse2): push rbp mov rbp, rsp @@ -80,7 +80,7 @@ sym(vp8_get_mb_ss_sse2): ; unsigned int * SSE, ; int * Sum ;) -global sym(vp8_get16x16var_sse2) +global sym(vp8_get16x16var_sse2) PRIVATE sym(vp8_get16x16var_sse2): push rbp mov rbp, rsp @@ -224,7 +224,7 @@ sym(vp8_get16x16var_sse2): ; unsigned int * SSE, ; int * Sum ;) -global sym(vp8_get8x8var_sse2) +global sym(vp8_get8x8var_sse2) PRIVATE sym(vp8_get8x8var_sse2): push rbp mov rbp, rsp @@ -413,7 +413,7 @@ sym(vp8_get8x8var_sse2): ; unsigned int *sumsquared;; ; ;) -global sym(vp8_filter_block2d_bil_var_sse2) +global sym(vp8_filter_block2d_bil_var_sse2) PRIVATE sym(vp8_filter_block2d_bil_var_sse2): push rbp mov rbp, rsp @@ -690,7 +690,7 @@ filter_block2d_bil_variance: ; int *sum, ; unsigned int *sumsquared ;) -global sym(vp8_half_horiz_vert_variance8x_h_sse2) +global sym(vp8_half_horiz_vert_variance8x_h_sse2) PRIVATE sym(vp8_half_horiz_vert_variance8x_h_sse2): push rbp mov rbp, rsp @@ -812,7 +812,7 @@ vp8_half_horiz_vert_variance8x_h_1: ; int *sum, ; unsigned int *sumsquared ;) -global sym(vp8_half_horiz_vert_variance16x_h_sse2) +global sym(vp8_half_horiz_vert_variance16x_h_sse2) PRIVATE sym(vp8_half_horiz_vert_variance16x_h_sse2): push rbp mov rbp, rsp @@ -928,7 +928,7 @@ vp8_half_horiz_vert_variance16x_h_1: ; int *sum, ; unsigned int *sumsquared ;) -global sym(vp8_half_vert_variance8x_h_sse2) +global sym(vp8_half_vert_variance8x_h_sse2) PRIVATE sym(vp8_half_vert_variance8x_h_sse2): push rbp mov rbp, rsp @@ -1035,7 +1035,7 @@ vp8_half_vert_variance8x_h_1: ; int *sum, ; unsigned int *sumsquared ;) -global sym(vp8_half_vert_variance16x_h_sse2) +global sym(vp8_half_vert_variance16x_h_sse2) PRIVATE sym(vp8_half_vert_variance16x_h_sse2): push rbp mov rbp, rsp @@ -1143,7 +1143,7 @@ vp8_half_vert_variance16x_h_1: ; int *sum, ; unsigned int *sumsquared ;) -global sym(vp8_half_horiz_variance8x_h_sse2) +global sym(vp8_half_horiz_variance8x_h_sse2) PRIVATE sym(vp8_half_horiz_variance8x_h_sse2): push rbp mov rbp, rsp @@ -1248,7 +1248,7 @@ vp8_half_horiz_variance8x_h_1: ; int *sum, ; unsigned int *sumsquared ;) -global sym(vp8_half_horiz_variance16x_h_sse2) +global sym(vp8_half_horiz_variance16x_h_sse2) PRIVATE sym(vp8_half_horiz_variance16x_h_sse2): push rbp mov rbp, rsp diff --git a/vp8/common/x86/variance_impl_ssse3.asm b/vp8/common/x86/variance_impl_ssse3.asm index 97e8b0e..686b4a9 100644 --- a/vp8/common/x86/variance_impl_ssse3.asm +++ b/vp8/common/x86/variance_impl_ssse3.asm @@ -29,7 +29,7 @@ ;) ;Note: The filter coefficient at offset=0 is 128. Since the second register ;for Pmaddubsw is signed bytes, we must calculate zero offset seperately. -global sym(vp8_filter_block2d_bil_var_ssse3) +global sym(vp8_filter_block2d_bil_var_ssse3) PRIVATE sym(vp8_filter_block2d_bil_var_ssse3): push rbp mov rbp, rsp diff --git a/vp8/common/x86/variance_sse2.c b/vp8/common/x86/variance_sse2.c index 2769a30..afd6429 100644 --- a/vp8/common/x86/variance_sse2.c +++ b/vp8/common/x86/variance_sse2.c @@ -332,8 +332,9 @@ unsigned int vp8_sub_pixel_variance16x16_wmt unsigned int xxsum0, xxsum1; - // note we could avoid these if statements if the calling function - // just called the appropriate functions inside. + /* note we could avoid these if statements if the calling function + * just called the appropriate functions inside. + */ if (xoffset == 4 && yoffset == 0) { vp8_half_horiz_variance16x_h_sse2( diff --git a/vp8/common/x86/variance_ssse3.c b/vp8/common/x86/variance_ssse3.c index 1be0d92..ba2055c 100644 --- a/vp8/common/x86/variance_ssse3.c +++ b/vp8/common/x86/variance_ssse3.c @@ -79,8 +79,9 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 int xsum0; unsigned int xxsum0; - // note we could avoid these if statements if the calling function - // just called the appropriate functions inside. + /* note we could avoid these if statements if the calling function + * just called the appropriate functions inside. + */ if (xoffset == 4 && yoffset == 0) { vp8_half_horiz_variance16x_h_sse2( diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c index 23a7fdc..3437a23 100644 --- a/vp8/common/x86/vp8_asm_stubs.c +++ b/vp8/common/x86/vp8_asm_stubs.c @@ -438,19 +438,35 @@ void vp8_sixtap_predict16x16_ssse3 { if (yoffset) { - vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset); - vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset); + vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, FData2, + 16, 21, xoffset); + vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, + 16, yoffset); } else { /* First-pass only */ - vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset); + vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 16, xoffset); } } else { - /* Second-pass only */ - vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset); + if (yoffset) + { + /* Second-pass only */ + vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 16, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); + } } } @@ -470,18 +486,34 @@ void vp8_sixtap_predict8x8_ssse3 { if (yoffset) { - vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset); - vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset); + vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, FData2, + 8, 13, xoffset); + vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, + 8, yoffset); } else { - vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset); + vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 8, xoffset); } } else { - /* Second-pass only */ - vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset); + if (yoffset) + { + /* Second-pass only */ + vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 8, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); + } } } @@ -502,19 +534,35 @@ void vp8_sixtap_predict8x4_ssse3 { if (yoffset) { - vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset); - vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset); + vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, FData2, + 8, 9, xoffset); + vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, + 4, yoffset); } else { /* First-pass only */ - vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); + vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 4, xoffset); } } else { - /* Second-pass only */ - vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); + if (yoffset) + { + /* Second-pass only */ + vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 4, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); + } } } @@ -534,19 +582,48 @@ void vp8_sixtap_predict4x4_ssse3 { if (yoffset) { - vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset); - vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset); + vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + FData2, 4, 9, xoffset); + vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, + 4, yoffset); } else { - vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); + vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 4, xoffset); } } else { - vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); + if (yoffset) + { + vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 4, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + int r; + + for (r = 0; r < 4; r++) + { + #if !(CONFIG_FAST_UNALIGNED) + dst_ptr[0] = src_ptr[0]; + dst_ptr[1] = src_ptr[1]; + dst_ptr[2] = src_ptr[2]; + dst_ptr[3] = src_ptr[3]; + #else + *(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ; + #endif + dst_ptr += dst_pitch; + src_ptr += src_pixels_per_line; + } + } } - } #endif diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h index 880c185..1a08c05 100644 --- a/vp8/decoder/dboolhuff.h +++ b/vp8/decoder/dboolhuff.h @@ -55,7 +55,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br); int loop_end, x; \ size_t bits_left = ((_bufend)-(_bufptr))*CHAR_BIT; \ \ - x = shift + CHAR_BIT - bits_left; \ + x = (int)(shift + CHAR_BIT - bits_left); \ loop_end = 0; \ if(x >= 0) \ { \ diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c index 51e2420..8027a07 100644 --- a/vp8/decoder/decodemv.c +++ b/vp8/decoder/decodemv.c @@ -48,11 +48,11 @@ static MB_PREDICTION_MODE read_uv_mode(vp8_reader *bc, const vp8_prob *p) static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi) { - vp8_reader *const bc = & pbi->bc; + vp8_reader *const bc = & pbi->mbc[8]; const int mis = pbi->common.mode_info_stride; mi->mbmi.ref_frame = INTRA_FRAME; - mi->mbmi.mode = read_kf_ymode(bc, pbi->common.kf_ymode_prob); + mi->mbmi.mode = read_kf_ymode(bc, vp8_kf_ymode_prob); if (mi->mbmi.mode == B_PRED) { @@ -65,12 +65,12 @@ static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi) const B_PREDICTION_MODE L = left_block_mode(mi, i); mi->bmi[i].as_mode = - read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]); + read_bmode(bc, vp8_kf_bmode_prob [A] [L]); } while (++i < 16); } - mi->mbmi.uv_mode = read_uv_mode(bc, pbi->common.kf_uv_mode_prob); + mi->mbmi.uv_mode = read_uv_mode(bc, vp8_kf_uv_mode_prob); } static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) @@ -150,7 +150,7 @@ static const unsigned char mbsplit_fill_offset[4][16] = { static void mb_mode_mv_init(VP8D_COMP *pbi) { - vp8_reader *const bc = & pbi->bc; + vp8_reader *const bc = & pbi->mbc[8]; MV_CONTEXT *const mvc = pbi->common.fc.mvc; #if CONFIG_ERROR_CONCEALMENT @@ -159,6 +159,9 @@ static void mb_mode_mv_init(VP8D_COMP *pbi) * outside the frame. */ pbi->mvs_corrupt_from_mb = UINT_MAX; #endif + /* Read the mb_no_coeff_skip flag */ + pbi->common.mb_no_coeff_skip = (int)vp8_read_bit(bc); + pbi->prob_skip_false = 0; if (pbi->common.mb_no_coeff_skip) pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8); @@ -293,26 +296,24 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, blockmv.as_mv.row += best_mv.as_mv.row; blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) << 1; blockmv.as_mv.col += best_mv.as_mv.col; - - mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv, - mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); } } else { blockmv.as_int = abovemv.as_int; - mbmi->need_to_clamp_mvs |= above_mb->mbmi.need_to_clamp_mvs; } } else { blockmv.as_int = leftmv.as_int; - mbmi->need_to_clamp_mvs |= left_mb->mbmi.need_to_clamp_mvs; } + mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv, + mb_to_left_edge, + mb_to_right_edge, + mb_to_top_edge, + mb_to_bottom_edge); + { /* Fill (uniform) modes, mvs of jth subset. Must do it here because ensuing subsets can @@ -337,7 +338,7 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi) { - vp8_reader *const bc = & pbi->bc; + vp8_reader *const bc = & pbi->mbc[8]; mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra); if (mbmi->ref_frame) /* inter MB */ { @@ -595,14 +596,14 @@ static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi, * By default on a key frame reset all MBs to segment 0 */ if (pbi->mb.update_mb_segmentation_map) - read_mb_features(&pbi->bc, &mi->mbmi, &pbi->mb); + read_mb_features(&pbi->mbc[8], &mi->mbmi, &pbi->mb); else if(pbi->common.frame_type == KEY_FRAME) mi->mbmi.segment_id = 0; /* Read the macroblock coeff skip flag if this feature is in use, * else default to 0 */ if (pbi->common.mb_no_coeff_skip) - mi->mbmi.mb_skip_coeff = vp8_read(&pbi->bc, pbi->prob_skip_false); + mi->mbmi.mb_skip_coeff = vp8_read(&pbi->mbc[8], pbi->prob_skip_false); else mi->mbmi.mb_skip_coeff = 0; @@ -644,7 +645,8 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) #if CONFIG_ERROR_CONCEALMENT /* look for corruption. set mvs_corrupt_from_mb to the current * mb_num if the frame is corrupt from this macroblock. */ - if (vp8dx_bool_error(&pbi->bc) && mb_num < pbi->mvs_corrupt_from_mb) + if (vp8dx_bool_error(&pbi->mbc[8]) && mb_num < + (int)pbi->mvs_corrupt_from_mb) { pbi->mvs_corrupt_from_mb = mb_num; /* no need to continue since the partition is corrupt from diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 62a068b..a4a00f6 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -177,7 +177,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, { short *DQC = xd->dequant_y1; int dst_stride = xd->dst.y_stride; - unsigned char *base_dst = xd->dst.y_buffer; /* clear out residual eob info */ if(xd->mode_info_context->mbmi.mb_skip_coeff) @@ -188,38 +187,29 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, for (i = 0; i < 16; i++) { BLOCKD *b = &xd->block[i]; - int b_mode = xd->mode_info_context->bmi[i].as_mode; - unsigned char *yabove; - unsigned char *yleft; - int left_stride; - unsigned char top_left; - - yabove = base_dst + b->offset - dst_stride; - yleft = base_dst + b->offset - 1; - left_stride = dst_stride; - top_left = yabove[-1]; - - // vp8_intra4x4_predict (base_dst + b->offset, dst_stride, b_mode, - // base_dst + b->offset, dst_stride ); - vp8_intra4x4_predict_d_c(yabove, yleft, left_stride, - b_mode, - base_dst + b->offset, dst_stride, - top_left); + unsigned char *dst = xd->dst.y_buffer + b->offset; + B_PREDICTION_MODE b_mode = + xd->mode_info_context->bmi[i].as_mode; + unsigned char *Above = dst - dst_stride; + unsigned char *yleft = dst - 1; + int left_stride = dst_stride; + unsigned char top_left = Above[-1]; + + vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, + dst, dst_stride, top_left); if (xd->eobs[i]) { if (xd->eobs[i] > 1) { - vp8_dequant_idct_add - (b->qcoeff, DQC, - base_dst + b->offset, dst_stride); + vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); } else { vp8_dc_only_idct_add (b->qcoeff[0] * DQC[0], - base_dst + b->offset, dst_stride, - base_dst + b->offset, dst_stride); + dst, dst_stride, + dst, dst_stride); ((int *)b->qcoeff)[0] = 0; } } @@ -317,48 +307,253 @@ static int get_delta_q(vp8_reader *bc, int prev, int *q_update) FILE *vpxlog = 0; #endif +static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) +{ + int i; + unsigned char *src_ptr1; + unsigned char *dest_ptr1; + + unsigned int Border; + int plane_stride; + + /***********/ + /* Y Plane */ + /***********/ + Border = ybf->border; + plane_stride = ybf->y_stride; + src_ptr1 = ybf->y_buffer - Border; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + + for (i = 0; i < (int)Border; i++) + { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + dest_ptr1 += plane_stride; + } + + + /***********/ + /* U Plane */ + /***********/ + plane_stride = ybf->uv_stride; + Border /= 2; + src_ptr1 = ybf->u_buffer - Border; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + + for (i = 0; i < (int)(Border); i++) + { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + dest_ptr1 += plane_stride; + } + + /***********/ + /* V Plane */ + /***********/ + + src_ptr1 = ybf->v_buffer - Border; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + + for (i = 0; i < (int)(Border); i++) + { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + dest_ptr1 += plane_stride; + } +} + +static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) +{ + int i; + unsigned char *src_ptr1, *src_ptr2; + unsigned char *dest_ptr2; + + unsigned int Border; + int plane_stride; + int plane_height; + + /***********/ + /* Y Plane */ + /***********/ + Border = ybf->border; + plane_stride = ybf->y_stride; + plane_height = ybf->y_height; + + src_ptr1 = ybf->y_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)Border; i++) + { + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr2 += plane_stride; + } + + + /***********/ + /* U Plane */ + /***********/ + plane_stride = ybf->uv_stride; + plane_height = ybf->uv_height; + Border /= 2; + + src_ptr1 = ybf->u_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)(Border); i++) + { + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr2 += plane_stride; + } + + /***********/ + /* V Plane */ + /***********/ + + src_ptr1 = ybf->v_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)(Border); i++) + { + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr2 += plane_stride; + } +} + +static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, + unsigned char *y_src, + unsigned char *u_src, + unsigned char *v_src) +{ + int i; + unsigned char *src_ptr1, *src_ptr2; + unsigned char *dest_ptr1, *dest_ptr2; + + unsigned int Border; + int plane_stride; + int plane_height; + int plane_width; + + /***********/ + /* Y Plane */ + /***********/ + Border = ybf->border; + plane_stride = ybf->y_stride; + plane_height = 16; + plane_width = ybf->y_width; + + /* copy the left and right most columns out */ + src_ptr1 = y_src; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) + { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /***********/ + /* U Plane */ + /***********/ + plane_stride = ybf->uv_stride; + plane_height = 8; + plane_width = ybf->uv_width; + Border /= 2; + + /* copy the left and right most columns out */ + src_ptr1 = u_src; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) + { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /***********/ + /* V Plane */ + /***********/ + + /* copy the left and right most columns out */ + src_ptr1 = v_src; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) + { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } +} + static void decode_mb_rows(VP8D_COMP *pbi) { VP8_COMMON *const pc = & pbi->common; MACROBLOCKD *const xd = & pbi->mb; + MODE_INFO *lf_mic = xd->mode_info_context; + int ibc = 0; int num_part = 1 << pc->multi_token_partition; int recon_yoffset, recon_uvoffset; int mb_row, mb_col; int mb_idx = 0; - int dst_fb_idx = pc->new_fb_idx; - int recon_y_stride = pc->yv12_fb[dst_fb_idx].y_stride; - int recon_uv_stride = pc->yv12_fb[dst_fb_idx].uv_stride; + + YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; + + int recon_y_stride = yv12_fb_new->y_stride; + int recon_uv_stride = yv12_fb_new->uv_stride; unsigned char *ref_buffer[MAX_REF_FRAMES][3]; unsigned char *dst_buffer[3]; + unsigned char *lf_dst[3]; + unsigned char *eb_dst[3]; int i; - int ref_fb_index[MAX_REF_FRAMES]; int ref_fb_corrupted[MAX_REF_FRAMES]; ref_fb_corrupted[INTRA_FRAME] = 0; - ref_fb_index[LAST_FRAME] = pc->lst_fb_idx; - ref_fb_index[GOLDEN_FRAME] = pc->gld_fb_idx; - ref_fb_index[ALTREF_FRAME] = pc->alt_fb_idx; - for(i = 1; i < MAX_REF_FRAMES; i++) { - ref_buffer[i][0] = pc->yv12_fb[ref_fb_index[i]].y_buffer; - ref_buffer[i][1] = pc->yv12_fb[ref_fb_index[i]].u_buffer; - ref_buffer[i][2] = pc->yv12_fb[ref_fb_index[i]].v_buffer; + YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; + + ref_buffer[i][0] = this_fb->y_buffer; + ref_buffer[i][1] = this_fb->u_buffer; + ref_buffer[i][2] = this_fb->v_buffer; - ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted; + ref_fb_corrupted[i] = this_fb->corrupted; } - dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer; - dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer; - dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer; + /* Set up the buffer pointers */ + eb_dst[0] = lf_dst[0] = dst_buffer[0] = yv12_fb_new->y_buffer; + eb_dst[1] = lf_dst[1] = dst_buffer[1] = yv12_fb_new->u_buffer; + eb_dst[2] = lf_dst[2] = dst_buffer[2] = yv12_fb_new->v_buffer; xd->up_available = 0; + /* Initialize the loop filter for this frame. */ + if(pc->filter_level) + vp8_loop_filter_frame_init(pc, xd, pc->filter_level); + + vp8_setup_intra_recon_top_line(yv12_fb_new); + /* Decode the individual macro block */ for (mb_row = 0; mb_row < pc->mb_rows; mb_row++) { @@ -395,10 +590,14 @@ static void decode_mb_rows(VP8D_COMP *pbi) xd->recon_above[1] -= xd->dst.uv_stride; xd->recon_above[2] -= xd->dst.uv_stride; - //TODO: move to outside row loop + /* TODO: move to outside row loop */ xd->recon_left_stride[0] = xd->dst.y_stride; xd->recon_left_stride[1] = xd->dst.uv_stride; + setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], + xd->recon_left[2], xd->dst.y_stride, + xd->dst.uv_stride); + for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) { /* Distance of Mb to the various image edges. @@ -460,26 +659,103 @@ static void decode_mb_rows(VP8D_COMP *pbi) xd->recon_left[1] += 8; xd->recon_left[2] += 8; - recon_yoffset += 16; recon_uvoffset += 8; ++xd->mode_info_context; /* next mb */ xd->above_context++; - } /* adjust to the next row of mbs */ - vp8_extend_mb_row( - &pc->yv12_fb[dst_fb_idx], - xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8 - ); + vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); ++xd->mode_info_context; /* skip prediction column */ xd->up_available = 1; + if(pc->filter_level) + { + if(mb_row > 0) + { + if (pc->filter_type == NORMAL_LOOPFILTER) + vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, + recon_y_stride, recon_uv_stride, + lf_dst[0], lf_dst[1], lf_dst[2]); + else + vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, + recon_y_stride, recon_uv_stride, + lf_dst[0], lf_dst[1], lf_dst[2]); + + if(mb_row > 1) + { + yv12_extend_frame_left_right_c(yv12_fb_new, + eb_dst[0], + eb_dst[1], + eb_dst[2]); + + eb_dst[0] += recon_y_stride * 16; + eb_dst[1] += recon_uv_stride * 8; + eb_dst[2] += recon_uv_stride * 8; + + if(mb_row == 2) + yv12_extend_frame_top_c(yv12_fb_new); + + } + + lf_dst[0] += recon_y_stride * 16; + lf_dst[1] += recon_uv_stride * 8; + lf_dst[2] += recon_uv_stride * 8; + lf_mic += pc->mb_cols; + lf_mic++; /* Skip border mb */ + } + } + else + { + if(mb_row > 0) + { + /**/ + yv12_extend_frame_left_right_c(yv12_fb_new, + eb_dst[0], + eb_dst[1], + eb_dst[2]); + + eb_dst[0] += recon_y_stride * 16; + eb_dst[1] += recon_uv_stride * 8; + eb_dst[2] += recon_uv_stride * 8; + + if(mb_row == 1) + yv12_extend_frame_top_c(yv12_fb_new); + } + } + } + + if(pc->filter_level) + { + if (pc->filter_type == NORMAL_LOOPFILTER) + vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, recon_y_stride, + recon_uv_stride, lf_dst[0], lf_dst[1], + lf_dst[2]); + else + vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, recon_y_stride, + recon_uv_stride, lf_dst[0], lf_dst[1], + lf_dst[2]); + + yv12_extend_frame_left_right_c(yv12_fb_new, + eb_dst[0], + eb_dst[1], + eb_dst[2]); + eb_dst[0] += recon_y_stride * 16; + eb_dst[1] += recon_uv_stride * 8; + eb_dst[2] += recon_uv_stride * 8; } + yv12_extend_frame_left_right_c(yv12_fb_new, + eb_dst[0], + eb_dst[1], + eb_dst[2]); + + yv12_extend_frame_bottom_c(yv12_fb_new); + } static unsigned int read_partition_size(const unsigned char *cx_size) @@ -519,13 +795,13 @@ static unsigned int read_available_partition_size( if (read_is_valid(partition_size_ptr, 3, first_fragment_end)) partition_size = read_partition_size(partition_size_ptr); else if (pbi->ec_active) - partition_size = bytes_left; + partition_size = (unsigned int)bytes_left; else vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated partition size data"); } else - partition_size = bytes_left; + partition_size = (unsigned int)bytes_left; /* Validate the calculated partition length. If the buffer * described by the partition can't be fully read, then restrict @@ -534,7 +810,7 @@ static unsigned int read_available_partition_size( if (!read_is_valid(fragment_start, partition_size, fragment_end)) { if (pbi->ec_active) - partition_size = bytes_left; + partition_size = (unsigned int)bytes_left; else vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt partition " @@ -547,24 +823,18 @@ static unsigned int read_available_partition_size( static void setup_token_decoder(VP8D_COMP *pbi, const unsigned char* token_part_sizes) { - vp8_reader *bool_decoder = &pbi->bc2; + vp8_reader *bool_decoder = &pbi->mbc[0]; unsigned int partition_idx; - int fragment_idx; - int num_token_partitions; + unsigned int fragment_idx; + unsigned int num_token_partitions; const unsigned char *first_fragment_end = pbi->fragments[0] + pbi->fragment_sizes[0]; TOKEN_PARTITION multi_token_partition = - (TOKEN_PARTITION)vp8_read_literal(&pbi->bc, 2); - if (!vp8dx_bool_error(&pbi->bc)) + (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2); + if (!vp8dx_bool_error(&pbi->mbc[8])) pbi->common.multi_token_partition = multi_token_partition; num_token_partitions = 1 << pbi->common.multi_token_partition; - if (num_token_partitions > 1) - { - CHECK_MEM_ERROR(pbi->mbc, vpx_malloc(num_token_partitions * - sizeof(vp8_reader))); - bool_decoder = pbi->mbc; - } /* Check for partitions within the fragments and unpack the fragments * so that each fragment pointer points to its corresponding partition. */ @@ -580,10 +850,10 @@ static void setup_token_decoder(VP8D_COMP *pbi, /* Size of first partition + token partition sizes element */ ptrdiff_t ext_first_part_size = token_part_sizes - pbi->fragments[0] + 3 * (num_token_partitions - 1); - fragment_size -= ext_first_part_size; + fragment_size -= (unsigned int)ext_first_part_size; if (fragment_size > 0) { - pbi->fragment_sizes[0] = ext_first_part_size; + pbi->fragment_sizes[0] = (unsigned int)ext_first_part_size; /* The fragment contains an additional partition. Move to * next. */ fragment_idx++; @@ -602,8 +872,8 @@ static void setup_token_decoder(VP8D_COMP *pbi, fragment_end, fragment_idx - 1, num_token_partitions); - pbi->fragment_sizes[fragment_idx] = partition_size; - fragment_size -= partition_size; + pbi->fragment_sizes[fragment_idx] = (unsigned int)partition_size; + fragment_size -= (unsigned int)partition_size; assert(fragment_idx <= num_token_partitions); if (fragment_size > 0) { @@ -637,16 +907,6 @@ static void setup_token_decoder(VP8D_COMP *pbi, #endif } -static void stop_token_decoder(VP8D_COMP *pbi) -{ - VP8_COMMON *pc = &pbi->common; - - if (pc->multi_token_partition != ONE_PARTITION) - { - vpx_free(pbi->mbc); - pbi->mbc = NULL; - } -} static void init_frame(VP8D_COMP *pbi) { @@ -661,7 +921,6 @@ static void init_frame(VP8D_COMP *pbi) vp8_init_mbmode_probs(pc); vp8_default_coef_probs(pc); - vp8_kf_default_bmode_probs(pc->kf_bmode_prob); /* reset the segment feature data to 0 with delta coding (Default state). */ vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); @@ -685,13 +944,8 @@ static void init_frame(VP8D_COMP *pbi) } else { - if (!pc->use_bilinear_mc_filter) - pc->mcomp_filter_type = SIXTAP; - else - pc->mcomp_filter_type = BILINEAR; - /* To enable choice of different interploation filters */ - if (pc->mcomp_filter_type == SIXTAP) + if (!pc->use_bilinear_mc_filter) { xd->subpixel_predict = vp8_sixtap_predict4x4; xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; @@ -725,7 +979,7 @@ static void init_frame(VP8D_COMP *pbi) int vp8_decode_frame(VP8D_COMP *pbi) { - vp8_reader *const bc = & pbi->bc; + vp8_reader *const bc = & pbi->mbc[8]; VP8_COMMON *const pc = & pbi->common; MACROBLOCKD *const xd = & pbi->mb; const unsigned char *data = pbi->fragments[0]; @@ -737,9 +991,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) int corrupt_tokens = 0; int prev_independent_partitions = pbi->independent_partitions; + YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; + /* start with no corruption of current frame */ xd->corrupted = 0; - pc->yv12_fb[pc->new_fb_idx].corrupted = 0; + yv12_fb_new->corrupted = 0; if (data_end - data < 3) { @@ -774,11 +1030,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) vp8_setup_version(pc); + if (pc->frame_type == KEY_FRAME) { - const int Width = pc->Width; - const int Height = pc->Height; - /* vet via sync code */ /* When error concealment is enabled we should only check the sync * code if we have enough bits available @@ -803,56 +1057,21 @@ int vp8_decode_frame(VP8D_COMP *pbi) } data += 7; - if (Width != pc->Width || Height != pc->Height) - { - int prev_mb_rows = pc->mb_rows; - - if (pc->Width <= 0) - { - pc->Width = Width; - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame width"); - } - - if (pc->Height <= 0) - { - pc->Height = Height; - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame height"); - } - - if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffers"); - -#if CONFIG_ERROR_CONCEALMENT - pbi->overlaps = NULL; - if (pbi->ec_enabled) - { - if (vp8_alloc_overlap_lists(pbi)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate overlap lists " - "for error concealment"); - } -#endif - -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd) - vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); -#endif - } + } + else + { + vpx_memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); + vpx_memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); } } - - if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) || - pc->Width == 0 || pc->Height == 0) + if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME)) { return -1; } init_frame(pbi); - if (vp8dx_start_decode(bc, data, data_end - data)) + if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data))) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder 0"); if (pc->frame_type == KEY_FRAME) { @@ -961,7 +1180,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) setup_token_decoder(pbi, data + first_partition_length_in_bytes); - xd->current_bc = &pbi->bc2; + xd->current_bc = &pbi->mbc[0]; /* Read the default quantizers. */ { @@ -1094,26 +1313,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) } } - vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG)); - vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG)); - - /* set up frame new frame for intra coded blocks */ -#if CONFIG_MULTITHREAD - if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level)) -#endif - vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]); - - vp8_setup_block_dptrs(xd); - - vp8_build_block_doffsets(xd); - /* clear out the coeff buffer */ vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - /* Read the mb_no_coeff_skip flag */ - pc->mb_no_coeff_skip = (int)vp8_read_bit(bc); - - vp8_decode_mode_mvs(pbi); #if CONFIG_ERROR_CONCEALMENT @@ -1132,9 +1334,9 @@ int vp8_decode_frame(VP8D_COMP *pbi) #if CONFIG_MULTITHREAD if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) { - int i; + unsigned int i; vp8mt_decode_mb_rows(pbi, xd); - vp8_yv12_extend_frame_borders(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/ + vp8_yv12_extend_frame_borders(yv12_fb_new); for (i = 0; i < pbi->decoding_thread_count; ++i) corrupt_tokens |= pbi->mb_row_di[i].mbd.corrupted; } @@ -1145,18 +1347,16 @@ int vp8_decode_frame(VP8D_COMP *pbi) corrupt_tokens |= xd->corrupted; } - stop_token_decoder(pbi); - /* Collect information about decoder corruption. */ /* 1. Check first boolean decoder for errors. */ - pc->yv12_fb[pc->new_fb_idx].corrupted = vp8dx_bool_error(bc); + yv12_fb_new->corrupted = vp8dx_bool_error(bc); /* 2. Check the macroblock information */ - pc->yv12_fb[pc->new_fb_idx].corrupted |= corrupt_tokens; + yv12_fb_new->corrupted |= corrupt_tokens; if (!pbi->decoded_key_frame) { if (pc->frame_type == KEY_FRAME && - !pc->yv12_fb[pc->new_fb_idx].corrupted) + !yv12_fb_new->corrupted) pbi->decoded_key_frame = 1; else vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, @@ -1165,13 +1365,6 @@ int vp8_decode_frame(VP8D_COMP *pbi) /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */ - /* If this was a kf or Gf note the Q used */ - if ((pc->frame_type == KEY_FRAME) || - pc->refresh_golden_frame || pc->refresh_alt_ref_frame) - { - pc->last_kf_gf_q = pc->base_qindex; - } - if (pc->refresh_entropy_probs == 0) { vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c index 0c39848..452ff6c 100644 --- a/vp8/decoder/detokenize.c +++ b/vp8/decoder/detokenize.c @@ -53,7 +53,8 @@ static const uint8_t kZigzag[16] = { #define NUM_PROBAS 11 #define NUM_CTX 3 -typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting +/* for const-casting */ +typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; static int GetSigned(BOOL_DECODER *br, int value_to_sign) { diff --git a/vp8/decoder/error_concealment.c b/vp8/decoder/error_concealment.c index 7750728..8b2e32b 100644 --- a/vp8/decoder/error_concealment.c +++ b/vp8/decoder/error_concealment.c @@ -51,12 +51,13 @@ int vp8_alloc_overlap_lists(VP8D_COMP *pbi) vpx_free(pbi->overlaps); pbi->overlaps = NULL; } + pbi->overlaps = vpx_calloc(pbi->common.mb_rows * pbi->common.mb_cols, sizeof(MB_OVERLAP)); + if (pbi->overlaps == NULL) return -1; - vpx_memset(pbi->overlaps, 0, - sizeof(MB_OVERLAP) * pbi->common.mb_rows * pbi->common.mb_cols); + return 0; } diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index c59ce25..8d6871b 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -80,6 +80,7 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) #if CONFIG_ERROR_CONCEALMENT pbi->ec_enabled = oxcf->error_concealment; + pbi->overlaps = NULL; #else pbi->ec_enabled = 0; #endif @@ -99,6 +100,8 @@ struct VP8D_COMP * vp8dx_create_decompressor(VP8D_CONFIG *oxcf) */ pbi->independent_partitions = 0; + vp8_setup_block_dptrs(&pbi->mb); + return pbi; } @@ -117,21 +120,20 @@ void vp8dx_remove_decompressor(VP8D_COMP *pbi) vp8_de_alloc_overlap_lists(pbi); #endif vp8_remove_common(&pbi->common); - vpx_free(pbi->mbc); vpx_free(pbi); } -vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) +vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP8_COMMON *cm = &pbi->common; int ref_fb_idx; - if (ref_frame_flag == VP8_LAST_FLAG) + if (ref_frame_flag == VP8_LAST_FRAME) ref_fb_idx = cm->lst_fb_idx; - else if (ref_frame_flag == VP8_GOLD_FLAG) + else if (ref_frame_flag == VP8_GOLD_FRAME) ref_fb_idx = cm->gld_fb_idx; - else if (ref_frame_flag == VP8_ALT_FLAG) + else if (ref_frame_flag == VP8_ALTR_FRAME) ref_fb_idx = cm->alt_fb_idx; else{ vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, @@ -153,17 +155,17 @@ vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, } -vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) +vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP8_COMMON *cm = &pbi->common; int *ref_fb_ptr = NULL; int free_fb; - if (ref_frame_flag == VP8_LAST_FLAG) + if (ref_frame_flag == VP8_LAST_FRAME) ref_fb_ptr = &cm->lst_fb_idx; - else if (ref_frame_flag == VP8_GOLD_FLAG) + else if (ref_frame_flag == VP8_GOLD_FRAME) ref_fb_ptr = &cm->gld_fb_idx; - else if (ref_frame_flag == VP8_ALT_FLAG) + else if (ref_frame_flag == VP8_ALTR_FRAME) ref_fb_ptr = &cm->alt_fb_idx; else{ vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, @@ -279,28 +281,22 @@ static int swap_frame_buffers (VP8_COMMON *cm) return err; } -int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsigned char *source, int64_t time_stamp) +int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, + const uint8_t *source, + int64_t time_stamp) { #if HAVE_NEON int64_t dx_store_reg[8]; #endif VP8_COMMON *cm = &pbi->common; - int retcode = 0; - - /*if(pbi->ready_for_new_data == 0) - return -1;*/ - - if (pbi == 0) - { - return -1; - } + int retcode = -1; pbi->common.error.error_code = VPX_CODEC_OK; if (pbi->num_fragments == 0) { /* New frame, reset fragment pointers and sizes */ - vpx_memset(pbi->fragments, 0, sizeof(pbi->fragments)); + vpx_memset((void*)pbi->fragments, 0, sizeof(pbi->fragments)); vpx_memset(pbi->fragment_sizes, 0, sizeof(pbi->fragment_sizes)); } if (pbi->input_fragments && !(source == NULL && size == 0)) @@ -381,20 +377,14 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi cm->new_fb_idx = get_free_fb (cm); + /* setup reference frames for vp8_decode_frame */ + pbi->dec_fb_ref[INTRA_FRAME] = &cm->yv12_fb[cm->new_fb_idx]; + pbi->dec_fb_ref[LAST_FRAME] = &cm->yv12_fb[cm->lst_fb_idx]; + pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx]; + pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx]; + if (setjmp(pbi->common.error.jmp)) { -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_pop_neon(dx_store_reg); - } -#endif - pbi->common.error.setjmp = 0; - - pbi->num_fragments = 0; - /* We do not know if the missing frame(s) was supposed to update * any of the reference buffers, but we act conservative and * mark only the last buffer as corrupted. @@ -403,7 +393,8 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - return -1; + + goto decode_exit; } pbi->common.error.setjmp = 1; @@ -412,68 +403,19 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi if (retcode < 0) { -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_pop_neon(dx_store_reg); - } -#endif - pbi->common.error.error_code = VPX_CODEC_ERROR; - pbi->common.error.setjmp = 0; - pbi->num_fragments = 0; if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - return retcode; + + pbi->common.error.error_code = VPX_CODEC_ERROR; + goto decode_exit; } -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION) - { - if (swap_frame_buffers (cm)) - { -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_pop_neon(dx_store_reg); - } -#endif - pbi->common.error.error_code = VPX_CODEC_ERROR; - pbi->common.error.setjmp = 0; - pbi->num_fragments = 0; - return -1; - } - } else -#endif + if (swap_frame_buffers (cm)) { - if (swap_frame_buffers (cm)) - { -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_pop_neon(dx_store_reg); - } -#endif - pbi->common.error.error_code = VPX_CODEC_ERROR; - pbi->common.error.setjmp = 0; - pbi->num_fragments = 0; - return -1; - } - - if(cm->filter_level) - { - /* Apply the loop filter if appropriate. */ - vp8_loop_filter_frame(cm, &pbi->mb); - } - vp8_yv12_extend_frame_borders(cm->frame_to_show); + pbi->common.error.error_code = VPX_CODEC_ERROR; + goto decode_exit; } - vp8_clear_system_state(); #if CONFIG_ERROR_CONCEALMENT @@ -498,49 +440,13 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi } #endif - /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/ - if (cm->show_frame) cm->current_video_frame++; pbi->ready_for_new_data = 0; pbi->last_time_stamp = time_stamp; - pbi->num_fragments = 0; - -#if 0 - { - int i; - int64_t earliest_time = pbi->dr[0].time_stamp; - int64_t latest_time = pbi->dr[0].time_stamp; - int64_t time_diff = 0; - int bytes = 0; - - pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;; - pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp; - - for (i = 0; i < 16; i++) - { - - bytes += pbi->dr[i].size; - - if (pbi->dr[i].time_stamp < earliest_time) - earliest_time = pbi->dr[i].time_stamp; - - if (pbi->dr[i].time_stamp > latest_time) - latest_time = pbi->dr[i].time_stamp; - } - - time_diff = latest_time - earliest_time; - - if (time_diff > 0) - { - pbi->common.bitrate = 80000.00 * bytes / time_diff ; - pbi->common.framerate = 160000000.00 / time_diff ; - } - - } -#endif +decode_exit: #if HAVE_NEON #if CONFIG_RUNTIME_CPU_DETECT if (cm->cpu_caps & HAS_NEON) @@ -549,7 +455,9 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi vp8_pop_neon(dx_store_reg); } #endif + pbi->common.error.setjmp = 0; + pbi->num_fragments = 0; return retcode; } int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags) diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index 97cf0dc..0063beb 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -31,23 +31,18 @@ typedef struct typedef struct { MACROBLOCKD mbd; - int mb_row; } MB_ROW_DEC; -typedef struct -{ - int64_t time_stamp; - int size; -} DATARATE; - - typedef struct VP8D_COMP { DECLARE_ALIGNED(16, MACROBLOCKD, mb); + YV12_BUFFER_CONFIG *dec_fb_ref[NUM_YV12_BUFFERS]; + DECLARE_ALIGNED(16, VP8_COMMON, common); - vp8_reader bc, bc2; + /* the last partition will be used for the modes/mvs */ + vp8_reader mbc[MAX_PARTITIONS]; VP8D_CONFIG oxcf; @@ -62,7 +57,7 @@ typedef struct VP8D_COMP volatile int b_multithreaded_rd; int max_threads; int current_mb_col_main; - int decoding_thread_count; + unsigned int decoding_thread_count; int allocated_decoding_thread_count; int mt_baseline_filter_level[MAX_MB_SEGMENTS]; @@ -85,12 +80,9 @@ typedef struct VP8D_COMP /* end of threading data */ #endif - vp8_reader *mbc; int64_t last_time_stamp; int ready_for_new_data; - DATARATE dr[16]; - vp8_prob prob_intra; vp8_prob prob_last; vp8_prob prob_gf; diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 47a0349..88c06be 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -24,10 +24,18 @@ #include "detokenize.h" #include "vp8/common/reconintra4x4.h" #include "vp8/common/reconinter.h" +#include "vp8/common/setupintrarecon.h" #if CONFIG_ERROR_CONCEALMENT #include "error_concealment.h" #endif +#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n))) +#define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \ + CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \ + memset((p), 0, (n) * sizeof(*(p))); \ +} while (0) + + extern void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) @@ -47,11 +55,9 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D mbd->mode_info_stride = pc->mode_info_stride; mbd->frame_type = pc->frame_type; - mbd->pre = pc->yv12_fb[pc->lst_fb_idx]; - mbd->dst = pc->yv12_fb[pc->new_fb_idx]; + mbd->pre = xd->pre; + mbd->dst = xd->dst; - vp8_setup_block_dptrs(mbd); - vp8_build_block_doffsets(mbd); mbd->segmentation_enabled = xd->segmentation_enabled; mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); @@ -65,7 +71,7 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; - mbd->current_bc = &pbi->bc2; + mbd->current_bc = &pbi->mbc[0]; vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); @@ -73,16 +79,18 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); mbd->fullpixel_mask = 0xffffffff; - if(pc->full_pixel) + + if (pc->full_pixel) mbd->fullpixel_mask = 0xfffffff8; } - for (i=0; i< pc->mb_rows; i++) - pbi->mt_current_mb_col[i]=-1; + for (i = 0; i < pc->mb_rows; i++) + pbi->mt_current_mb_col[i] = -1; } -static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_idx) +static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, + unsigned int mb_idx) { MB_PREDICTION_MODE mode; int i; @@ -166,7 +174,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i { short *DQC = xd->dequant_y1; int dst_stride = xd->dst.y_stride; - unsigned char *base_dst = xd->dst.y_buffer; /* clear out residual eob info */ if(xd->mode_info_context->mbmi.mb_skip_coeff) @@ -177,17 +184,19 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i for (i = 0; i < 16; i++) { BLOCKD *b = &xd->block[i]; - int b_mode = xd->mode_info_context->bmi[i].as_mode; - unsigned char *yabove; + unsigned char *dst = xd->dst.y_buffer + b->offset; + B_PREDICTION_MODE b_mode = + xd->mode_info_context->bmi[i].as_mode; + unsigned char *Above; unsigned char *yleft; int left_stride; unsigned char top_left; /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/ if (i < 4 && pbi->common.filter_level) - yabove = xd->recon_above[0] + b->offset; //i*4; + Above = xd->recon_above[0] + b->offset; else - yabove = (base_dst - dst_stride) + b->offset; + Above = dst - dst_stride; if (i%4==0 && pbi->common.filter_level) { @@ -196,34 +205,28 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i } else { - yleft = (base_dst - 1) + b->offset; + yleft = dst - 1; left_stride = dst_stride; } if ((i==4 || i==8 || i==12) && pbi->common.filter_level) top_left = *(xd->recon_left[0] + i - 1); else - top_left = yabove[-1]; + top_left = Above[-1]; - vp8_intra4x4_predict_d_c(yabove, yleft, left_stride, - b_mode, - base_dst + b->offset, dst_stride, - top_left); + vp8_intra4x4_predict(Above, yleft, left_stride, + b_mode, dst, dst_stride, top_left); if (xd->eobs[i] ) { if (xd->eobs[i] > 1) { - vp8_dequant_idct_add - (b->qcoeff, DQC, - base_dst + b->offset, dst_stride); + vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); } else { - vp8_dc_only_idct_add - (b->qcoeff[0] * DQC[0], - base_dst + b->offset, dst_stride, - base_dst + b->offset, dst_stride); + vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], + dst, dst_stride, dst, dst_stride); ((int *)b->qcoeff)[0] = 0; } } @@ -297,60 +300,44 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_i } } -typedef void (*init_current_bc_fn_t)(VP8D_COMP *pbi, MACROBLOCKD *xd, - int start_mb_row, int mb_row, int num_part); - -static void init_current_bc(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, - int mb_row, int num_part) +static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) { - (void) start_mb_row; - - xd->current_bc = &pbi->mbc[mb_row%num_part]; -} - -static void init_current_bc_threads(VP8D_COMP *pbi, MACROBLOCKD *xd, - int start_mb_row, int mb_row, int num_part) -{ - (void) xd; - pbi->mb_row_di[start_mb_row - 1].mb_row = mb_row; - pbi->mb_row_di[start_mb_row - 1].mbd.current_bc = &pbi->mbc[mb_row%num_part]; -} - - -static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, - init_current_bc_fn_t init_current_bc_fn) -{ - volatile int *last_row_current_mb_col = NULL; + volatile const int *last_row_current_mb_col; + volatile int *current_mb_col; int mb_row; VP8_COMMON *pc = &pbi->common; - int nsync = pbi->sync_range; + const int nsync = pbi->sync_range; + const int first_row_no_sync_above = pc->mb_cols + nsync; int num_part = 1 << pbi->common.multi_token_partition; + int last_mb_row = start_mb_row; + + YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; + YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME]; + + int recon_y_stride = yv12_fb_new->y_stride; + int recon_uv_stride = yv12_fb_new->uv_stride; - int dst_fb_idx = pc->new_fb_idx; unsigned char *ref_buffer[MAX_REF_FRAMES][3]; unsigned char *dst_buffer[3]; int i; - int ref_fb_index[MAX_REF_FRAMES]; int ref_fb_corrupted[MAX_REF_FRAMES]; ref_fb_corrupted[INTRA_FRAME] = 0; - ref_fb_index[LAST_FRAME] = pc->lst_fb_idx; - ref_fb_index[GOLDEN_FRAME] = pc->gld_fb_idx; - ref_fb_index[ALTREF_FRAME] = pc->alt_fb_idx; - for(i = 1; i < MAX_REF_FRAMES; i++) { - ref_buffer[i][0] = pc->yv12_fb[ref_fb_index[i]].y_buffer; - ref_buffer[i][1] = pc->yv12_fb[ref_fb_index[i]].u_buffer; - ref_buffer[i][2] = pc->yv12_fb[ref_fb_index[i]].v_buffer; + YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; - ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted; + ref_buffer[i][0] = this_fb->y_buffer; + ref_buffer[i][1] = this_fb->u_buffer; + ref_buffer[i][2] = this_fb->v_buffer; + + ref_fb_corrupted[i] = this_fb->corrupted; } - dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer; - dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer; - dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer; + dst_buffer[0] = yv12_fb_new->y_buffer; + dst_buffer[1] = yv12_fb_new->u_buffer; + dst_buffer[2] = yv12_fb_new->v_buffer; xd->up_available = (start_mb_row != 0); @@ -359,18 +346,20 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, int i; int recon_yoffset, recon_uvoffset; int mb_col; - int ref_fb_idx = pc->lst_fb_idx; - int dst_fb_idx = pc->new_fb_idx; - int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; - int filter_level; loop_filter_info_n *lfi_n = &pc->lf_info; - init_current_bc_fn(pbi, xd, start_mb_row, mb_row, num_part); + /* save last row processed by this thread */ + last_mb_row = mb_row; + /* select bool coder for current partition */ + xd->current_bc = &pbi->mbc[mb_row%num_part]; if (mb_row > 0) last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; + else + last_row_current_mb_col = &first_row_no_sync_above; + + current_mb_col = &pbi->mt_current_mb_col[mb_row]; recon_yoffset = mb_row * recon_y_stride * 16; recon_uvoffset = mb_row * recon_uv_stride * 8; @@ -394,7 +383,7 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, xd->recon_left[1] = pbi->mt_uleft_col[mb_row]; xd->recon_left[2] = pbi->mt_vleft_col[mb_row]; - //TODO: move to outside row loop + /* TODO: move to outside row loop */ xd->recon_left_stride[0] = 1; xd->recon_left_stride[1] = 1; } @@ -412,16 +401,22 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, xd->recon_above[1] -= xd->dst.uv_stride; xd->recon_above[2] -= xd->dst.uv_stride; - //TODO: move to outside row loop + /* TODO: move to outside row loop */ xd->recon_left_stride[0] = xd->dst.y_stride; xd->recon_left_stride[1] = xd->dst.uv_stride; + + setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], + xd->recon_left[2], xd->dst.y_stride, + xd->dst.uv_stride); } for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) { - if ( mb_row > 0 && (mb_col & (nsync-1)) == 0) + *current_mb_col = mb_col - 1; + + if ((mb_col & (nsync - 1)) == 0) { - while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1) + while (mb_col > (*last_row_current_mb_col - nsync)) { x86_pause_hint(); thread_sleep(0); @@ -477,7 +472,7 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, /* propagate errors from reference frames */ xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; - decode_macroblock(pbi, xd, 0); + mt_decode_macroblock(pbi, xd, 0); xd->left_available = 1; @@ -591,9 +586,6 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, ++xd->mode_info_context; /* next mb */ xd->above_context++; - - /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/ - pbi->mt_current_mb_col[mb_row] = mb_col; } /* adjust to the next row of mbs */ @@ -601,8 +593,8 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, { if(mb_row != pc->mb_rows-1) { - int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS; - int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1); + int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS; + int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1); for (i = 0; i < 4; i++) { @@ -611,8 +603,13 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; } } - } else - vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); + } + else + vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); + + /* last MB of row is ready just after extension is done */ + *current_mb_col = mb_col + nsync; ++xd->mode_info_context; /* skip prediction column */ xd->up_available = 1; @@ -620,6 +617,11 @@ static void decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row, /* since we have multithread */ xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; } + + /* signal end of frame decoding if this thread processed the last mb_row */ + if (last_mb_row == (pc->mb_rows - 1)) + sem_post(&pbi->h_event_end_decoding); + } @@ -635,7 +637,6 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) if (pbi->b_multithreaded_rd == 0) break; - /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/ if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) { if (pbi->b_multithreaded_rd == 0) @@ -643,21 +644,11 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) else { MACROBLOCKD *xd = &mbrd->mbd; - xd->left_context = &mb_row_left_context; - decode_mb_rows(pbi, xd, ithread+1, init_current_bc_threads); + mt_decode_mb_rows(pbi, xd, ithread+1); } } - - /* add this to each frame */ - if ((mbrd->mb_row == pbi->common.mb_rows-1) || - ((mbrd->mb_row == pbi->common.mb_rows-2) && - (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1)) - { - /*SetEvent(pbi->h_event_end_decoding);*/ - sem_post(&pbi->h_event_end_decoding); - } } return 0 ; @@ -667,7 +658,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) void vp8_decoder_create_threads(VP8D_COMP *pbi) { int core_count = 0; - int ithread; + unsigned int ithread; pbi->b_multithreaded_rd = 0; pbi->allocated_decoding_thread_count = 0; @@ -684,16 +675,17 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) pbi->b_multithreaded_rd = 1; pbi->decoding_thread_count = core_count - 1; - CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count)); - CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count)); - CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count)); - vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count); - CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count)); + CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count); + CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count); + CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32); + CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count); for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++) { sem_init(&pbi->h_event_start_decoding[ithread], 0, 0); + vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd); + pbi->de_thread_data[ithread].ithread = ithread; pbi->de_thread_data[ithread].ptr1 = (void *)pbi; pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread]; @@ -810,32 +802,32 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) uv_width = width >>1; /* Allocate an int for each mb row. */ - CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows)); + CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows); /* Allocate memory for above_row buffers. */ - CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) + CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)))); - CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) + CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); - CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) + CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); /* Allocate memory for left_col buffers. */ - CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) + CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1)); - CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) + CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); - CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) + CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); } } @@ -881,42 +873,46 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) { VP8_COMMON *pc = &pbi->common; - int i; + unsigned int i; + int j; int filter_level = pc->filter_level; + YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; if (filter_level) { /* Set above_row buffer to 127 for decoding first MB row */ - vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5); - vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5); - vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5); + vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5); + vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); + vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); - for (i=1; imb_rows; i++) + for (j=1; jmb_rows; j++) { - vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); - vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); - vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); + vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); + vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); + vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); } /* Set left_col to 129 initially */ - for (i=0; imb_rows; i++) + for (j=0; jmb_rows; j++) { - vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16); - vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8); - vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8); + vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); + vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); + vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); } /* Initialize the loop filter for this frame. */ vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); } + else + vp8_setup_intra_recon_top_line(yv12_fb_new); setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count); for (i = 0; i < pbi->decoding_thread_count; i++) sem_post(&pbi->h_event_start_decoding[i]); - decode_mb_rows(pbi, xd, 0, init_current_bc); + mt_decode_mb_rows(pbi, xd, 0); sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ } diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index 3824294..e666b6c 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -118,7 +118,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi) update_mode( w, VP8_YMODES, vp8_ymode_encodings, vp8_ymode_tree, - Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count + Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->mb.ymode_count ); } { @@ -127,7 +127,7 @@ static void update_mbintra_mode_probs(VP8_COMP *cpi) update_mode( w, VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree, - Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->uv_mode_count + Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->mb.uv_mode_count ); } } @@ -172,7 +172,7 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) while (p < stop) { const int t = p->Token; - const vp8_token *a = vp8_coef_encodings + t; + vp8_token *a = vp8_coef_encodings + t; const vp8_extra_bit_struct *b = vp8_extra_bits + t; int i = 0; const unsigned char *pp = p->context_tree; @@ -397,7 +397,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, { const TOKENEXTRA *p = cpi->tplist[mb_row].start; const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; - int tokens = stop - p; + int tokens = (int)(stop - p); vp8_pack_tokens_c(w, p, tokens); } @@ -416,7 +416,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w) { const TOKENEXTRA *p = cpi->tplist[mb_row].start; const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; - int tokens = stop - p; + int tokens = (int)(stop - p); vp8_pack_tokens_c(w, p, tokens); } @@ -461,7 +461,7 @@ static void write_mv static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACROBLOCKD *x) { - // Encode the MB segment id. + /* Encode the MB segment id. */ if (x->segmentation_enabled && x->update_mb_segmentation_map) { switch (mi->segment_id) @@ -483,7 +483,7 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO vp8_write(w, 1, x->mb_segment_tree_probs[2]); break; - // TRAP.. This should not happen + /* TRAP.. This should not happen */ default: vp8_write(w, 0, x->mb_segment_tree_probs[0]); vp8_write(w, 0, x->mb_segment_tree_probs[1]); @@ -493,11 +493,11 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO } void vp8_convert_rfct_to_prob(VP8_COMP *const cpi) { - const int *const rfct = cpi->count_mb_ref_frame_usage; + const int *const rfct = cpi->mb.count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; - // Calculate the probabilities used to code the ref frame based on useage + /* Calculate the probabilities used to code the ref frame based on usage */ if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter))) cpi->prob_intra_coded = 1; @@ -539,7 +539,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { int total_mbs = pc->mb_rows * pc->mb_cols; - prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs; + prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs; if (prob_skip_false <= 1) prob_skip_false = 1; @@ -571,8 +571,10 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) MACROBLOCKD *xd = &cpi->mb.e_mbd; - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units + /* Distance of Mb to the various image edges. + * These specified to 8th pel as they are always compared to MV + * values that are in 1/8th pel units + */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; xd->mb_to_top_edge = -((mb_row * 16)) << 3; @@ -728,7 +730,7 @@ static void write_kfmodes(VP8_COMP *cpi) { int total_mbs = c->mb_rows * c->mb_cols; - prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs; + prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs; if (prob_skip_false <= 1) prob_skip_false = 1; @@ -754,7 +756,7 @@ static void write_kfmodes(VP8_COMP *cpi) if (c->mb_no_coeff_skip) vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false); - kfwrite_ymode(bc, ym, c->kf_ymode_prob); + kfwrite_ymode(bc, ym, vp8_kf_ymode_prob); if (ym == B_PRED) { @@ -771,15 +773,15 @@ static void write_kfmodes(VP8_COMP *cpi) ++intra_mode_stats [A] [L] [bm]; #endif - write_bmode(bc, bm, c->kf_bmode_prob [A] [L]); + write_bmode(bc, bm, vp8_kf_bmode_prob [A] [L]); } while (++i < 16); } - write_uv_mode(bc, (m++)->mbmi.uv_mode, c->kf_uv_mode_prob); + write_uv_mode(bc, (m++)->mbmi.uv_mode, vp8_kf_uv_mode_prob); } - m++; // skip L prediction border + m++; /* skip L prediction border */ } } @@ -849,6 +851,7 @@ static int prob_update_savings(const unsigned int *ct, static int independent_coef_context_savings(VP8_COMP *cpi) { + MACROBLOCK *const x = & cpi->mb; int savings = 0; int i = 0; do @@ -865,7 +868,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi) */ probs = (const unsigned int (*)[MAX_ENTROPY_TOKENS]) - cpi->coef_counts[i][j]; + x->coef_counts[i][j]; /* Reset to default probabilities at key frames */ if (cpi->common.frame_type == KEY_FRAME) @@ -878,9 +881,6 @@ static int independent_coef_context_savings(VP8_COMP *cpi) /* at every context */ /* calc probs and branch cts for this frame only */ - //vp8_prob new_p [ENTROPY_NODES]; - //unsigned int branch_ct [ENTROPY_NODES] [2]; - int t = 0; /* token/prob index */ vp8_tree_probs_from_distribution( @@ -927,6 +927,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi) static int default_coef_context_savings(VP8_COMP *cpi) { + MACROBLOCK *const x = & cpi->mb; int savings = 0; int i = 0; do @@ -940,16 +941,13 @@ static int default_coef_context_savings(VP8_COMP *cpi) /* at every context */ /* calc probs and branch cts for this frame only */ - //vp8_prob new_p [ENTROPY_NODES]; - //unsigned int branch_ct [ENTROPY_NODES] [2]; - int t = 0; /* token/prob index */ vp8_tree_probs_from_distribution( MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, cpi->frame_coef_probs [i][j][k], cpi->frame_branch_ct [i][j][k], - cpi->coef_counts [i][j][k], + x->coef_counts [i][j][k], 256, 1 ); @@ -998,13 +996,13 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) { int savings = 0; - const int *const rfct = cpi->count_mb_ref_frame_usage; + const int *const rfct = cpi->mb.count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; int new_intra, new_last, new_garf, oldtotal, newtotal; int ref_frame_cost[MAX_REF_FRAMES]; - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); if (cpi->common.frame_type != KEY_FRAME) { @@ -1026,7 +1024,7 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) rfct[ALTREF_FRAME] * ref_frame_cost[ALTREF_FRAME]; - // old costs + /* old costs */ vp8_calc_ref_frame_costs(ref_frame_cost,cpi->prob_intra_coded, cpi->prob_last_coded,cpi->prob_gf_coded); @@ -1078,7 +1076,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi) #endif int savings = 0; - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); do { @@ -1110,21 +1108,15 @@ void vp8_update_coef_probs(VP8_COMP *cpi) } do { - //note: use result from vp8_estimate_entropy_savings, so no need to call vp8_tree_probs_from_distribution here. + /* note: use result from vp8_estimate_entropy_savings, so no + * need to call vp8_tree_probs_from_distribution here. + */ + /* at every context */ /* calc probs and branch cts for this frame only */ - //vp8_prob new_p [ENTROPY_NODES]; - //unsigned int branch_ct [ENTROPY_NODES] [2]; - int t = 0; /* token/prob index */ - //vp8_tree_probs_from_distribution( - // MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, - // new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k], - // 256, 1 - // ); - do { const vp8_prob newp = cpi->frame_coef_probs [i][j][k][t]; @@ -1295,19 +1287,16 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest Sectionbits[active_section = 1] += sizeof(VP8_HEADER) * 8 * 256; #endif - //vp8_kf_default_bmode_probs() is called in vp8_setup_key_frame() once for each - //K frame before encode frame. pc->kf_bmode_prob doesn't get changed anywhere - //else. No need to call it again here. --yw - //vp8_kf_default_bmode_probs( pc->kf_bmode_prob); - - // every keyframe send startcode, width, height, scale factor, clamp and color type + /* every keyframe send startcode, width, height, scale factor, clamp + * and color type + */ if (oh.type == KEY_FRAME) { int v; validate_buffer(cx_data, 7, cx_data_end, &cpi->common.error); - // Start / synch code + /* Start / synch code */ cx_data[0] = 0x9D; cx_data[1] = 0x01; cx_data[2] = 0x2a; @@ -1326,7 +1315,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest vp8_start_encode(bc, cx_data, cx_data_end); - // signal clr type + /* signal clr type */ vp8_write_bit(bc, pc->clr_type); vp8_write_bit(bc, pc->clamp_type); @@ -1335,13 +1324,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest vp8_start_encode(bc, cx_data, cx_data_end); - // Signal whether or not Segmentation is enabled + /* Signal whether or not Segmentation is enabled */ vp8_write_bit(bc, xd->segmentation_enabled); - // Indicate which features are enabled + /* Indicate which features are enabled */ if (xd->segmentation_enabled) { - // Signal whether or not the segmentation map is being updated. + /* Signal whether or not the segmentation map is being updated. */ vp8_write_bit(bc, xd->update_mb_segmentation_map); vp8_write_bit(bc, xd->update_mb_segmentation_data); @@ -1351,15 +1340,15 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest vp8_write_bit(bc, xd->mb_segement_abs_delta); - // For each segmentation feature (Quant and loop filter level) + /* For each segmentation feature (Quant and loop filter level) */ for (i = 0; i < MB_LVL_MAX; i++) { - // For each of the segments + /* For each of the segments */ for (j = 0; j < MAX_MB_SEGMENTS; j++) { Data = xd->segment_feature_data[i][j]; - // Frame level data + /* Frame level data */ if (Data) { vp8_write_bit(bc, 1); @@ -1384,7 +1373,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest if (xd->update_mb_segmentation_map) { - // Write the probs used to decode the segment id for each macro block. + /* Write the probs used to decode the segment id for each mb */ for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) { int Data = xd->mb_segment_tree_probs[i]; @@ -1400,17 +1389,18 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest } } - // Code to determine whether or not to update the scan order. vp8_write_bit(bc, pc->filter_type); vp8_write_literal(bc, pc->filter_level, 6); vp8_write_literal(bc, pc->sharpness_level, 3); - // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled). + /* Write out loop filter deltas applied at the MB level based on mode + * or ref frame (if they are enabled). + */ vp8_write_bit(bc, xd->mode_ref_lf_delta_enabled); if (xd->mode_ref_lf_delta_enabled) { - // Do the deltas need to be updated + /* Do the deltas need to be updated */ int send_update = xd->mode_ref_lf_delta_update || cpi->oxcf.error_resilient_mode; @@ -1419,12 +1409,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest { int Data; - // Send update + /* Send update */ for (i = 0; i < MAX_REF_LF_DELTAS; i++) { Data = xd->ref_lf_deltas[i]; - // Frame level data + /* Frame level data */ if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i] || cpi->oxcf.error_resilient_mode) { @@ -1434,20 +1424,20 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest if (Data > 0) { vp8_write_literal(bc, (Data & 0x3F), 6); - vp8_write_bit(bc, 0); // sign + vp8_write_bit(bc, 0); /* sign */ } else { Data = -Data; vp8_write_literal(bc, (Data & 0x3F), 6); - vp8_write_bit(bc, 1); // sign + vp8_write_bit(bc, 1); /* sign */ } } else vp8_write_bit(bc, 0); } - // Send update + /* Send update */ for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { Data = xd->mode_lf_deltas[i]; @@ -1461,13 +1451,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest if (Data > 0) { vp8_write_literal(bc, (Data & 0x3F), 6); - vp8_write_bit(bc, 0); // sign + vp8_write_bit(bc, 0); /* sign */ } else { Data = -Data; vp8_write_literal(bc, (Data & 0x3F), 6); - vp8_write_bit(bc, 1); // sign + vp8_write_bit(bc, 1); /* sign */ } } else @@ -1476,34 +1466,42 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest } } - //signal here is multi token partition is enabled + /* signal here is multi token partition is enabled */ vp8_write_literal(bc, pc->multi_token_partition, 2); - // Frame Qbaseline quantizer index + /* Frame Qbaseline quantizer index */ vp8_write_literal(bc, pc->base_qindex, 7); - // Transmit Dc, Second order and Uv quantizer delta information + /* Transmit Dc, Second order and Uv quantizer delta information */ put_delta_q(bc, pc->y1dc_delta_q); put_delta_q(bc, pc->y2dc_delta_q); put_delta_q(bc, pc->y2ac_delta_q); put_delta_q(bc, pc->uvdc_delta_q); put_delta_q(bc, pc->uvac_delta_q); - // When there is a key frame all reference buffers are updated using the new key frame + /* When there is a key frame all reference buffers are updated using + * the new key frame + */ if (pc->frame_type != KEY_FRAME) { - // Should the GF or ARF be updated using the transmitted frame or buffer + /* Should the GF or ARF be updated using the transmitted frame + * or buffer + */ vp8_write_bit(bc, pc->refresh_golden_frame); vp8_write_bit(bc, pc->refresh_alt_ref_frame); - // If not being updated from current frame should either GF or ARF be updated from another buffer + /* If not being updated from current frame should either GF or ARF + * be updated from another buffer + */ if (!pc->refresh_golden_frame) vp8_write_literal(bc, pc->copy_buffer_to_gf, 2); if (!pc->refresh_alt_ref_frame) vp8_write_literal(bc, pc->copy_buffer_to_arf, 2); - // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer) + /* Indicate reference frame sign bias for Golden and ARF frames + * (always 0 for last frame buffer) + */ vp8_write_bit(bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]); vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]); } @@ -1532,14 +1530,14 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest #endif - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING pack_coef_probs(cpi); #else if (pc->refresh_entropy_probs == 0) { - // save a copy for later refresh + /* save a copy for later refresh */ vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc)); } @@ -1550,7 +1548,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest active_section = 2; #endif - // Write out the mb_no_coeff_skip flag + /* Write out the mb_no_coeff_skip flag */ vp8_write_bit(bc, pc->mb_no_coeff_skip); if (pc->frame_type == KEY_FRAME) diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index 6165d04..a30f888 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -18,7 +18,10 @@ #include "vp8/common/entropy.h" #include "vpx_ports/mem.h" -// motion search site +#define MAX_MODES 20 +#define MAX_ERROR_BINS 1024 + +/* motion search site */ typedef struct { MV mv; @@ -27,11 +30,11 @@ typedef struct typedef struct block { - // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries + /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ short *src_diff; short *coeff; - // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries + /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ short *quant; short *quant_fast; unsigned char *quant_shift; @@ -39,7 +42,7 @@ typedef struct block short *zrun_zbin_boost; short *round; - // Zbin Over Quant value + /* Zbin Over Quant value */ short zbin_extra; unsigned char **base_src; @@ -59,12 +62,12 @@ typedef struct typedef struct macroblock { - DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y - DECLARE_ALIGNED(16, short, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y + DECLARE_ALIGNED(16, short, src_diff[400]); /* 25 blocks Y,U,V,Y2 */ + DECLARE_ALIGNED(16, short, coeff[400]); /* 25 blocks Y,U,V,Y2 */ DECLARE_ALIGNED(16, unsigned char, thismb[256]); unsigned char *thismb_ptr; - // 16 Y blocks, 4 U blocks, 4 V blocks, 1 DC 2nd order block each with 16 entries + /* 16 Y, 4 U, 4 V, 1 DC 2nd order block */ BLOCK block[25]; YV12_BUFFER_CONFIG src; @@ -90,16 +93,18 @@ typedef struct macroblock signed int act_zbin_adj; signed int last_act_zbin_adj; - int mvcosts[2][MVvals+1]; int *mvcost[2]; - int mvsadcosts[2][MVfpvals+1]; int *mvsadcost[2]; - int mbmode_cost[2][MB_MODE_COUNT]; - int intra_uv_mode_cost[2][MB_MODE_COUNT]; - unsigned int bmode_costs[10][10][10]; - unsigned int inter_bmode_costs[B_MODE_COUNT]; - - // These define limits to motion vector components to prevent them from extending outside the UMV borders + int (*mbmode_cost)[MB_MODE_COUNT]; + int (*intra_uv_mode_cost)[MB_MODE_COUNT]; + int (*bmode_costs)[10][10]; + int *inter_bmode_costs; + int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS] + [MAX_ENTROPY_TOKENS]; + + /* These define limits to motion vector components to prevent + * them from extending outside the UMV borders. + */ int mv_col_min; int mv_col_max; int mv_row_min; @@ -107,18 +112,45 @@ typedef struct macroblock int skip; - int encode_breakout; + unsigned int encode_breakout; - //char * gf_active_ptr; signed char *gf_active_ptr; unsigned char *active_ptr; MV_CONTEXT *mvc; - unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; int optimize; int q_index; +#if CONFIG_TEMPORAL_DENOISING + MB_PREDICTION_MODE best_sse_inter_mode; + int_mv best_sse_mv; + MV_REFERENCE_FRAME best_reference_frame; + MV_REFERENCE_FRAME best_zeromv_reference_frame; + unsigned char need_to_clamp_best_mvs; +#endif + + int skip_true_count; + unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; + unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */ + int ymode_count [VP8_YMODES]; /* intra MB type cts this frame */ + int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */ + int64_t prediction_error; + int64_t intra_error; + int count_mb_ref_frame_usage[MAX_REF_FRAMES]; + + int rd_thresh_mult[MAX_MODES]; + int rd_threshes[MAX_MODES]; + unsigned int mbs_tested_so_far; + unsigned int mode_test_hit_counts[MAX_MODES]; + int zbin_mode_boost_enabled; + int zbin_mode_boost; + int last_zbin_mode_boost; + + int last_zbin_over_quant; + int zbin_over_quant; + int error_bins[MAX_ERROR_BINS]; + void (*short_fdct4x4)(short *input, short *output, int pitch); void (*short_fdct8x4)(short *input, short *output, int pitch); void (*short_walsh4x4)(short *input, short *output, int pitch); diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h index fb6cbaf..8309063 100644 --- a/vp8/encoder/boolhuff.h +++ b/vp8/encoder/boolhuff.h @@ -32,7 +32,7 @@ typedef struct unsigned char *buffer_end; struct vpx_internal_error_info *error; - // Variables used to track bit costs without outputing to the bitstream + /* Variables used to track bit costs without outputing to the bitstream */ unsigned int measure_cost; unsigned long bit_counter; } BOOL_CODER; diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c index 09ed9dd..f3faa22 100644 --- a/vp8/encoder/denoising.c +++ b/vp8/encoder/denoising.c @@ -15,198 +15,293 @@ #include "vpx_mem/vpx_mem.h" #include "vpx_rtcd.h" -static const unsigned int NOISE_MOTION_THRESHOLD = 20*20; -static const unsigned int NOISE_DIFF2_THRESHOLD = 75; -// SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100. -static const unsigned int SSE_DIFF_THRESHOLD = 16*16*20; -static const unsigned int SSE_THRESHOLD = 16*16*40; +static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25; +/* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming + * var(noise) ~= 100. + */ +static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20; +static const unsigned int SSE_THRESHOLD = 16 * 16 * 40; -static uint8_t blend(uint8_t state, uint8_t sample, uint8_t factor_q8) -{ - return (uint8_t)( - (((uint16_t)factor_q8 * ((uint16_t)state) + // Q8 - (uint16_t)(256 - factor_q8) * ((uint16_t)sample)) + 128) // Q8 - >> 8); -} +/* + * The filter function was modified to reduce the computational complexity. + * Step 1: + * Instead of applying tap coefficients for each pixel, we calculated the + * pixel adjustments vs. pixel diff value ahead of time. + * adjustment = filtered_value - current_raw + * = (filter_coefficient * diff + 128) >> 8 + * where + * filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3)); + * filter_coefficient += filter_coefficient / + * (3 + motion_magnitude_adjustment); + * filter_coefficient is clamped to 0 ~ 255. + * + * Step 2: + * The adjustment vs. diff curve becomes flat very quick when diff increases. + * This allowed us to use only several levels to approximate the curve without + * changing the filtering algorithm too much. + * The adjustments were further corrected by checking the motion magnitude. + * The levels used are: + * diff adjustment w/o motion correction adjustment w/ motion correction + * [-255, -16] -6 -7 + * [-15, -8] -4 -5 + * [-7, -4] -3 -4 + * [-3, 3] diff diff + * [4, 7] 3 4 + * [8, 15] 4 5 + * [16, 255] 6 7 + */ -static unsigned int denoiser_motion_compensate(YV12_BUFFER_CONFIG* src, - YV12_BUFFER_CONFIG* dst, - MACROBLOCK* x, - unsigned int best_sse, - unsigned int zero_mv_sse, - int recon_yoffset, - int recon_uvoffset) +int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg, + YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal, + unsigned int motion_magnitude, int y_offset, + int uv_offset) { - MACROBLOCKD filter_xd = x->e_mbd; - int mv_col; - int mv_row; - int sse_diff = zero_mv_sse - best_sse; - // Compensate the running average. - filter_xd.pre.y_buffer = src->y_buffer + recon_yoffset; - filter_xd.pre.u_buffer = src->u_buffer + recon_uvoffset; - filter_xd.pre.v_buffer = src->v_buffer + recon_uvoffset; - // Write the compensated running average to the destination buffer. - filter_xd.dst.y_buffer = dst->y_buffer + recon_yoffset; - filter_xd.dst.u_buffer = dst->u_buffer + recon_uvoffset; - filter_xd.dst.v_buffer = dst->v_buffer + recon_uvoffset; - // Use the best MV for the compensation. - filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME; - filter_xd.mode_info_context->mbmi.mode = filter_xd.best_sse_inter_mode; - filter_xd.mode_info_context->mbmi.mv = filter_xd.best_sse_mv; - filter_xd.mode_info_context->mbmi.need_to_clamp_mvs = - filter_xd.need_to_clamp_best_mvs; - mv_col = filter_xd.best_sse_mv.as_mv.col; - mv_row = filter_xd.best_sse_mv.as_mv.row; - if (filter_xd.mode_info_context->mbmi.mode <= B_PRED || - (mv_row*mv_row + mv_col*mv_col <= NOISE_MOTION_THRESHOLD && - sse_diff < SSE_DIFF_THRESHOLD)) - { - // Handle intra blocks as referring to last frame with zero motion and - // let the absolute pixel difference affect the filter factor. - // Also consider small amount of motion as being random walk due to noise, - // if it doesn't mean that we get a much bigger error. - // Note that any changes to the mode info only affects the denoising. - filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME; - filter_xd.mode_info_context->mbmi.mode = ZEROMV; - filter_xd.mode_info_context->mbmi.mv.as_int = 0; - x->e_mbd.best_sse_inter_mode = ZEROMV; - x->e_mbd.best_sse_mv.as_int = 0; - best_sse = zero_mv_sse; - } - if (!x->skip) - { - vp8_build_inter_predictors_mb(&filter_xd); - } - else - { - vp8_build_inter16x16_predictors_mb(&filter_xd, - filter_xd.dst.y_buffer, - filter_xd.dst.u_buffer, - filter_xd.dst.v_buffer, - filter_xd.dst.y_stride, - filter_xd.dst.uv_stride); - } - return best_sse; -} + unsigned char *sig = signal->thismb; + int sig_stride = 16; + unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; + int mc_avg_y_stride = mc_running_avg->y_stride; + unsigned char *running_avg_y = running_avg->y_buffer + y_offset; + int avg_y_stride = running_avg->y_stride; + int r, c, i; + int sum_diff = 0; + int adj_val[3] = {3, 4, 6}; -static void denoiser_filter(YV12_BUFFER_CONFIG* mc_running_avg, - YV12_BUFFER_CONFIG* running_avg, - MACROBLOCK* signal, - unsigned int motion_magnitude2, - int y_offset, - int uv_offset) -{ - unsigned char* sig = signal->thismb; - int sig_stride = 16; - unsigned char* mc_running_avg_y = mc_running_avg->y_buffer + y_offset; - int mc_avg_y_stride = mc_running_avg->y_stride; - unsigned char* running_avg_y = running_avg->y_buffer + y_offset; - int avg_y_stride = running_avg->y_stride; - int r, c; - for (r = 0; r < 16; r++) - { - for (c = 0; c < 16; c++) + /* If motion_magnitude is small, making the denoiser more aggressive by + * increasing the adjustment for each level. */ + if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) + { + for (i = 0; i < 3; i++) + adj_val[i] += 1; + } + + for (r = 0; r < 16; ++r) { - int diff; - int absdiff = 0; - unsigned int filter_coefficient; - absdiff = sig[c] - mc_running_avg_y[c]; - absdiff = absdiff > 0 ? absdiff : -absdiff; - assert(absdiff >= 0 && absdiff < 256); - filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3)); - // Allow some additional filtering of static blocks, or blocks with very - // small motion vectors. - filter_coefficient += filter_coefficient / (3 + (motion_magnitude2 >> 3)); - filter_coefficient = filter_coefficient > 255 ? 255 : filter_coefficient; - - running_avg_y[c] = blend(mc_running_avg_y[c], sig[c], filter_coefficient); - diff = sig[c] - running_avg_y[c]; - - if (diff * diff < NOISE_DIFF2_THRESHOLD) - { - // Replace with mean to suppress the noise. - sig[c] = running_avg_y[c]; - } - else - { - // Replace the filter state with the signal since the change in this - // pixel isn't classified as noise. - running_avg_y[c] = sig[c]; - } + for (c = 0; c < 16; ++c) + { + int diff = 0; + int adjustment = 0; + int absdiff = 0; + + diff = mc_running_avg_y[c] - sig[c]; + absdiff = abs(diff); + + /* When |diff| < 4, use pixel value from last denoised raw. */ + if (absdiff <= 3) + { + running_avg_y[c] = mc_running_avg_y[c]; + sum_diff += diff; + } + else + { + if (absdiff >= 4 && absdiff <= 7) + adjustment = adj_val[0]; + else if (absdiff >= 8 && absdiff <= 15) + adjustment = adj_val[1]; + else + adjustment = adj_val[2]; + + if (diff > 0) + { + if ((sig[c] + adjustment) > 255) + running_avg_y[c] = 255; + else + running_avg_y[c] = sig[c] + adjustment; + + sum_diff += adjustment; + } + else + { + if ((sig[c] - adjustment) < 0) + running_avg_y[c] = 0; + else + running_avg_y[c] = sig[c] - adjustment; + + sum_diff -= adjustment; + } + } + } + + /* Update pointers for next iteration. */ + sig += sig_stride; + mc_running_avg_y += mc_avg_y_stride; + running_avg_y += avg_y_stride; } - sig += sig_stride; - mc_running_avg_y += mc_avg_y_stride; - running_avg_y += avg_y_stride; - } + + if (abs(sum_diff) > SUM_DIFF_THRESHOLD) + return COPY_BLOCK; + + vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride, + signal->thismb, sig_stride); + return FILTER_BLOCK; } int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height) { - assert(denoiser); - denoiser->yv12_running_avg.flags = 0; - if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg), width, - height, VP8BORDERINPIXELS) < 0) - { - vp8_denoiser_free(denoiser); - return 1; - } - denoiser->yv12_mc_running_avg.flags = 0; - if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width, - height, VP8BORDERINPIXELS) < 0) - { - vp8_denoiser_free(denoiser); - return 1; - } - vpx_memset(denoiser->yv12_running_avg.buffer_alloc, 0, - denoiser->yv12_running_avg.frame_size); - vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0, - denoiser->yv12_mc_running_avg.frame_size); - return 0; + int i; + assert(denoiser); + + for (i = 0; i < MAX_REF_FRAMES; i++) + { + denoiser->yv12_running_avg[i].flags = 0; + + if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg[i]), width, + height, VP8BORDERINPIXELS) + < 0) + { + vp8_denoiser_free(denoiser); + return 1; + } + vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0, + denoiser->yv12_running_avg[i].frame_size); + + } + denoiser->yv12_mc_running_avg.flags = 0; + + if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width, + height, VP8BORDERINPIXELS) < 0) + { + vp8_denoiser_free(denoiser); + return 1; + } + + vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0, + denoiser->yv12_mc_running_avg.frame_size); + return 0; } void vp8_denoiser_free(VP8_DENOISER *denoiser) { - assert(denoiser); - vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg); - vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg); + int i; + assert(denoiser); + + for (i = 0; i < MAX_REF_FRAMES ; i++) + { + vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]); + } + vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg); } + void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, MACROBLOCK *x, unsigned int best_sse, unsigned int zero_mv_sse, int recon_yoffset, - int recon_uvoffset) { - int mv_row; - int mv_col; - unsigned int motion_magnitude2; - // Motion compensate the running average. - best_sse = denoiser_motion_compensate(&denoiser->yv12_running_avg, - &denoiser->yv12_mc_running_avg, - x, - best_sse, - zero_mv_sse, - recon_yoffset, - recon_uvoffset); - - mv_row = x->e_mbd.best_sse_mv.as_mv.row; - mv_col = x->e_mbd.best_sse_mv.as_mv.col; - motion_magnitude2 = mv_row*mv_row + mv_col*mv_col; - if (best_sse > SSE_THRESHOLD || - motion_magnitude2 > 8 * NOISE_MOTION_THRESHOLD) - { - // No filtering of this block since it differs too much from the predictor, - // or the motion vector magnitude is considered too big. - vp8_copy_mem16x16(x->thismb, 16, - denoiser->yv12_running_avg.y_buffer + recon_yoffset, - denoiser->yv12_running_avg.y_stride); - return; - } - // Filter. - denoiser_filter(&denoiser->yv12_mc_running_avg, - &denoiser->yv12_running_avg, - x, - motion_magnitude2, - recon_yoffset, - recon_uvoffset); + int recon_uvoffset) +{ + int mv_row; + int mv_col; + unsigned int motion_magnitude2; + + MV_REFERENCE_FRAME frame = x->best_reference_frame; + MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame; + + enum vp8_denoiser_decision decision = FILTER_BLOCK; + + if (zero_frame) + { + YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame]; + YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg; + YV12_BUFFER_CONFIG saved_pre,saved_dst; + MB_MODE_INFO saved_mbmi; + MACROBLOCKD *filter_xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi; + int mv_col; + int mv_row; + int sse_diff = zero_mv_sse - best_sse; + + saved_mbmi = *mbmi; + + /* Use the best MV for the compensation. */ + mbmi->ref_frame = x->best_reference_frame; + mbmi->mode = x->best_sse_inter_mode; + mbmi->mv = x->best_sse_mv; + mbmi->need_to_clamp_mvs = x->need_to_clamp_best_mvs; + mv_col = x->best_sse_mv.as_mv.col; + mv_row = x->best_sse_mv.as_mv.row; + + if (frame == INTRA_FRAME || + ((unsigned int)(mv_row *mv_row + mv_col *mv_col) + <= NOISE_MOTION_THRESHOLD && + sse_diff < (int)SSE_DIFF_THRESHOLD)) + { + /* + * Handle intra blocks as referring to last frame with zero motion + * and let the absolute pixel difference affect the filter factor. + * Also consider small amount of motion as being random walk due + * to noise, if it doesn't mean that we get a much bigger error. + * Note that any changes to the mode info only affects the + * denoising. + */ + mbmi->ref_frame = + x->best_zeromv_reference_frame; + + src = &denoiser->yv12_running_avg[zero_frame]; + + mbmi->mode = ZEROMV; + mbmi->mv.as_int = 0; + x->best_sse_inter_mode = ZEROMV; + x->best_sse_mv.as_int = 0; + best_sse = zero_mv_sse; + } + + saved_pre = filter_xd->pre; + saved_dst = filter_xd->dst; + + /* Compensate the running average. */ + filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset; + filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset; + filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset; + /* Write the compensated running average to the destination buffer. */ + filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset; + filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset; + filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset; + + if (!x->skip) + { + vp8_build_inter_predictors_mb(filter_xd); + } + else + { + vp8_build_inter16x16_predictors_mb(filter_xd, + filter_xd->dst.y_buffer, + filter_xd->dst.u_buffer, + filter_xd->dst.v_buffer, + filter_xd->dst.y_stride, + filter_xd->dst.uv_stride); + } + filter_xd->pre = saved_pre; + filter_xd->dst = saved_dst; + *mbmi = saved_mbmi; + + } + + mv_row = x->best_sse_mv.as_mv.row; + mv_col = x->best_sse_mv.as_mv.col; + motion_magnitude2 = mv_row * mv_row + mv_col * mv_col; + if (best_sse > SSE_THRESHOLD || motion_magnitude2 + > 8 * NOISE_MOTION_THRESHOLD) + { + decision = COPY_BLOCK; + } + + if (decision == FILTER_BLOCK) + { + /* Filter. */ + decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg, + &denoiser->yv12_running_avg[INTRA_FRAME], + x, + motion_magnitude2, + recon_yoffset, recon_uvoffset); + } + if (decision == COPY_BLOCK) + { + /* No filtering of this block; it differs too much from the predictor, + * or the motion vector magnitude is considered too big. + */ + vp8_copy_mem16x16( + x->thismb, 16, + denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset, + denoiser->yv12_running_avg[INTRA_FRAME].y_stride); + } } diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h index 343531b..b025f5c 100644 --- a/vp8/encoder/denoising.h +++ b/vp8/encoder/denoising.h @@ -13,10 +13,19 @@ #include "block.h" +#define SUM_DIFF_THRESHOLD (16 * 16 * 2) +#define MOTION_MAGNITUDE_THRESHOLD (8*3) + +enum vp8_denoiser_decision +{ + COPY_BLOCK, + FILTER_BLOCK +}; + typedef struct vp8_denoiser { - YV12_BUFFER_CONFIG yv12_running_avg; - YV12_BUFFER_CONFIG yv12_mc_running_avg; + YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES]; + YV12_BUFFER_CONFIG yv12_mc_running_avg; } VP8_DENOISER; int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height); @@ -30,4 +39,4 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, int recon_yoffset, int recon_uvoffset); -#endif // VP8_ENCODER_DENOISING_H_ +#endif /* VP8_ENCODER_DENOISING_H_ */ diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 8233873..d1b647b 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -33,7 +33,7 @@ #endif #include "encodeframe.h" -extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; +extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ; extern void vp8_calc_ref_frame_costs(int *ref_frame_cost, int prob_intra, int prob_last, @@ -45,7 +45,6 @@ extern void vp8_auto_select_speed(VP8_COMP *cpi); extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi, MACROBLOCK *x, MB_ROW_COMP *mbr_ei, - int mb_row, int count); static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ); @@ -77,7 +76,7 @@ static const unsigned char VP8_VAR_OFFS[16]= }; -// Original activity measure from Tim T's code. +/* Original activity measure from Tim T's code. */ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) { unsigned int act; @@ -100,7 +99,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) return act; } -// Stub for alternative experimental activity measures. +/* Stub for alternative experimental activity measures. */ static unsigned int alt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred ) { @@ -108,8 +107,9 @@ static unsigned int alt_activity_measure( VP8_COMP *cpi, } -// Measure the activity of the current macroblock -// What we measure here is TBD so abstracted to this function +/* Measure the activity of the current macroblock + * What we measure here is TBD so abstracted to this function + */ #define ALT_ACT_MEASURE 1 static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, int mb_row, int mb_col) @@ -120,12 +120,12 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, { int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); - // Or use and alternative. + /* Or use and alternative. */ mb_activity = alt_activity_measure( cpi, x, use_dc_pred ); } else { - // Original activity measure from Tim T's code. + /* Original activity measure from Tim T's code. */ mb_activity = tt_activity_measure( cpi, x ); } @@ -135,36 +135,36 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x, return mb_activity; } -// Calculate an "average" mb activity value for the frame +/* Calculate an "average" mb activity value for the frame */ #define ACT_MEDIAN 0 static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) { #if ACT_MEDIAN - // Find median: Simple n^2 algorithm for experimentation + /* Find median: Simple n^2 algorithm for experimentation */ { unsigned int median; unsigned int i,j; unsigned int * sortlist; unsigned int tmp; - // Create a list to sort to + /* Create a list to sort to */ CHECK_MEM_ERROR(sortlist, vpx_calloc(sizeof(unsigned int), cpi->common.MBs)); - // Copy map to sort list + /* Copy map to sort list */ vpx_memcpy( sortlist, cpi->mb_activity_map, sizeof(unsigned int) * cpi->common.MBs ); - // Ripple each value down to its correct position + /* Ripple each value down to its correct position */ for ( i = 1; i < cpi->common.MBs; i ++ ) { for ( j = i; j > 0; j -- ) { if ( sortlist[j] < sortlist[j-1] ) { - // Swap values + /* Swap values */ tmp = sortlist[j-1]; sortlist[j-1] = sortlist[j]; sortlist[j] = tmp; @@ -174,7 +174,7 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) } } - // Even number MBs so estimate median as mean of two either side. + /* Even number MBs so estimate median as mean of two either side. */ median = ( 1 + sortlist[cpi->common.MBs >> 1] + sortlist[(cpi->common.MBs >> 1) + 1] ) >> 1; @@ -183,14 +183,14 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) vpx_free(sortlist); } #else - // Simple mean for now + /* Simple mean for now */ cpi->activity_avg = (unsigned int)(activity_sum/cpi->common.MBs); #endif if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN) cpi->activity_avg = VP8_ACTIVITY_AVG_MIN; - // Experimental code: return fixed value normalized for several clips + /* Experimental code: return fixed value normalized for several clips */ if ( ALT_ACT_MEASURE ) cpi->activity_avg = 100000; } @@ -199,7 +199,7 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum ) #define OUTPUT_NORM_ACT_STATS 0 #if USE_ACT_INDEX -// Calculate and activity index for each mb +/* Calculate and activity index for each mb */ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) { VP8_COMMON *const cm = & cpi->common; @@ -214,19 +214,19 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) fprintf(f, "\n%12d\n", cpi->activity_avg ); #endif - // Reset pointers to start of activity map + /* Reset pointers to start of activity map */ x->mb_activity_ptr = cpi->mb_activity_map; - // Calculate normalized mb activity number. + /* Calculate normalized mb activity number. */ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - // for each macroblock col in image + /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - // Read activity from the map + /* Read activity from the map */ act = *(x->mb_activity_ptr); - // Calculate a normalized activity number + /* Calculate a normalized activity number */ a = act + 4*cpi->activity_avg; b = 4*act + cpi->activity_avg; @@ -238,7 +238,7 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) #if OUTPUT_NORM_ACT_STATS fprintf(f, " %6d", *(x->mb_activity_ptr)); #endif - // Increment activity map pointers + /* Increment activity map pointers */ x->mb_activity_ptr++; } @@ -255,8 +255,9 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x ) } #endif -// Loop through all MBs. Note activity of each, average activity and -// calculate a normalized activity for each +/* Loop through all MBs. Note activity of each, average activity and + * calculate a normalized activity for each + */ static void build_activity_map( VP8_COMP *cpi ) { MACROBLOCK *const x = & cpi->mb; @@ -273,15 +274,15 @@ static void build_activity_map( VP8_COMP *cpi ) unsigned int mb_activity; int64_t activity_sum = 0; - // for each macroblock row in image + /* for each macroblock row in image */ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { #if ALT_ACT_MEASURE - // reset above block coeffs + /* reset above block coeffs */ xd->up_available = (mb_row != 0); recon_yoffset = (mb_row * recon_y_stride * 16); #endif - // for each macroblock col in image + /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { #if ALT_ACT_MEASURE @@ -289,48 +290,48 @@ static void build_activity_map( VP8_COMP *cpi ) xd->left_available = (mb_col != 0); recon_yoffset += 16; #endif - //Copy current mb to a buffer + /* Copy current mb to a buffer */ vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - // measure activity + /* measure activity */ mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col ); - // Keep frame sum + /* Keep frame sum */ activity_sum += mb_activity; - // Store MB level activity details. + /* Store MB level activity details. */ *x->mb_activity_ptr = mb_activity; - // Increment activity map pointer + /* Increment activity map pointer */ x->mb_activity_ptr++; - // adjust to the next column of source macroblocks + /* adjust to the next column of source macroblocks */ x->src.y_buffer += 16; } - // adjust to the next row of mbs + /* adjust to the next row of mbs */ x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; #if ALT_ACT_MEASURE - //extend the recon for intra prediction + /* extend the recon for intra prediction */ vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); #endif } - // Calculate an "average" MB activity + /* Calculate an "average" MB activity */ calc_av_activity(cpi, activity_sum); #if USE_ACT_INDEX - // Calculate an activity index number of each mb + /* Calculate an activity index number of each mb */ calc_activity_index( cpi, x ); #endif } -// Macroblock activity masking +/* Macroblock activity masking */ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) { #if USE_ACT_INDEX @@ -342,7 +343,7 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) int64_t b; int64_t act = *(x->mb_activity_ptr); - // Apply the masking to the RD multiplier. + /* Apply the masking to the RD multiplier. */ a = act + (2*cpi->activity_avg); b = (2*act) + cpi->activity_avg; @@ -351,7 +352,7 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) x->errorperbit += (x->errorperbit==0); #endif - // Activity based Zbin adjustment + /* Activity based Zbin adjustment */ adjust_act_zbin(cpi, x); } @@ -398,7 +399,7 @@ void encode_mb_row(VP8_COMP *cpi, w = &cpi->bc[1]; #endif - // reset above block coeffs + /* reset above block coeffs */ xd->above_context = cm->above_context; xd->up_available = (mb_row != 0); @@ -406,37 +407,41 @@ void encode_mb_row(VP8_COMP *cpi, recon_uvoffset = (mb_row * recon_uv_stride * 8); cpi->tplist[mb_row].start = *tp; - //printf("Main mb_row = %d\n", mb_row); + /* printf("Main mb_row = %d\n", mb_row); */ - // Distance of Mb to the top & bottom edges, specified in 1/8th pel - // units as they are always compared to values that are in 1/8th pel units + /* Distance of Mb to the top & bottom edges, specified in 1/8th pel + * units as they are always compared to values that are in 1/8th pel + */ xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - // Set up limit values for vertical motion vector components - // to prevent them extending beyond the UMV borders + /* Set up limit values for vertical motion vector components + * to prevent them extending beyond the UMV borders + */ x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); - // Set the mb activity pointer to the start of the row. + /* Set the mb activity pointer to the start of the row. */ x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - // for each macroblock col in image + /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) *tp = cpi->tok; #endif - // Distance of Mb to the left & right edges, specified in - // 1/8th pel units as they are always compared to values - // that are in 1/8th pel units + /* Distance of Mb to the left & right edges, specified in + * 1/8th pel units as they are always compared to values + * that are in 1/8th pel units + */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - // Set up limit values for horizontal motion vector components - // to prevent them extending beyond the UMV borders + /* Set up limit values for horizontal motion vector components + * to prevent them extending beyond the UMV borders + */ x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); @@ -449,13 +454,13 @@ void encode_mb_row(VP8_COMP *cpi, x->rddiv = cpi->RDDIV; x->rdmult = cpi->RDMULT; - //Copy current mb to a buffer + /* Copy current mb to a buffer */ vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); #if CONFIG_MULTITHREAD if (cpi->b_multi_threaded != 0) { - *current_mb_col = mb_col - 1; // set previous MB done + *current_mb_col = mb_col - 1; /* set previous MB done */ if ((mb_col & (nsync - 1)) == 0) { @@ -471,11 +476,13 @@ void encode_mb_row(VP8_COMP *cpi, if(cpi->oxcf.tuning == VP8_TUNE_SSIM) vp8_activity_masking(cpi, x); - // Is segmentation enabled - // MB level adjustment to quantizer + /* Is segmentation enabled */ + /* MB level adjustment to quantizer */ if (xd->segmentation_enabled) { - // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) + /* Code to set segment id in xd->mbmi.segment_id for current MB + * (with range checking) + */ if (cpi->segmentation_map[map_index+mb_col] <= 3) xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col]; else @@ -484,7 +491,8 @@ void encode_mb_row(VP8_COMP *cpi, vp8cx_mb_init_quantizer(cpi, x, 1); } else - xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default + /* Set to Segment 0 by default */ + xd->mode_info_context->mbmi.segment_id = 0; x->active_ptr = cpi->active_map + map_index + mb_col; @@ -514,21 +522,25 @@ void encode_mb_row(VP8_COMP *cpi, #endif - // Count of last ref frame 0,0 usage - if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) - cpi->inter_zz_count ++; - - // Special case code for cyclic refresh - // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode - // during vp8cx_encode_inter_macroblock()) back into the global segmentation map + /* Special case code for cyclic refresh + * If cyclic update enabled then copy xd->mbmi.segment_id; (which + * may have been updated based on mode during + * vp8cx_encode_inter_macroblock()) back into the global + * segmentation map + */ if ((cpi->current_layer == 0) && - (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)) + (cpi->cyclic_refresh_mode_enabled && + xd->segmentation_enabled)) { cpi->segmentation_map[map_index+mb_col] = xd->mode_info_context->mbmi.segment_id; - // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh): - // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0) - // else mark it as dirty (1). + /* If the block has been refreshed mark it as clean (the + * magnitude of the -ve influences how long it will be before + * we consider another refresh): + * Else if it was coded (last frame 0,0) and has not already + * been refreshed then mark it as a candidate for cleanup + * next time (marked 0) else mark it as dirty (1). + */ if (xd->mode_info_context->mbmi.segment_id) cpi->cyclic_refresh_map[map_index+mb_col] = -1; else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) @@ -551,13 +563,13 @@ void encode_mb_row(VP8_COMP *cpi, pack_tokens(w, tp_start, tok_count); } #endif - // Increment pointer into gf usage flags structure. + /* Increment pointer into gf usage flags structure. */ x->gf_active_ptr++; - // Increment the activity mask pointers. + /* Increment the activity mask pointers. */ x->mb_activity_ptr++; - // adjust to the next column of macroblocks + /* adjust to the next column of macroblocks */ x->src.y_buffer += 16; x->src.u_buffer += 8; x->src.v_buffer += 8; @@ -565,16 +577,16 @@ void encode_mb_row(VP8_COMP *cpi, recon_yoffset += 16; recon_uvoffset += 8; - // Keep track of segment usage + /* Keep track of segment usage */ segment_counts[xd->mode_info_context->mbmi.segment_id] ++; - // skip to next mb + /* skip to next mb */ xd->mode_info_context++; x->partition_info++; xd->above_context++; } - //extend the recon for intra prediction + /* extend the recon for intra prediction */ vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, @@ -585,7 +597,7 @@ void encode_mb_row(VP8_COMP *cpi, *current_mb_col = rightmost_col; #endif - // this is to account for the border + /* this is to account for the border */ xd->mode_info_context++; x->partition_info++; } @@ -596,10 +608,10 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) VP8_COMMON *const cm = & cpi->common; MACROBLOCKD *const xd = & x->e_mbd; - // GF active flags data structure + /* GF active flags data structure */ x->gf_active_ptr = (signed char *)cpi->gf_active_flags; - // Activity map pointer + /* Activity map pointer */ x->mb_activity_ptr = cpi->mb_activity_map; x->act_zbin_adj = 0; @@ -611,48 +623,42 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) xd->frame_type = cm->frame_type; - // reset intra mode contexts + /* reset intra mode contexts */ if (cm->frame_type == KEY_FRAME) vp8_init_mbmode_probs(cm); - // Copy data over into macro block data structures. + /* Copy data over into macro block data structures. */ x->src = * cpi->Source; xd->pre = cm->yv12_fb[cm->lst_fb_idx]; xd->dst = cm->yv12_fb[cm->new_fb_idx]; - // set up frame for intra coded blocks + /* set up frame for intra coded blocks */ vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); vp8_build_block_offsets(x); - vp8_setup_block_dptrs(&x->e_mbd); - - vp8_setup_block_ptrs(x); - xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_context->mbmi.uv_mode = DC_PRED; xd->left_context = &cm->left_context; - vp8_zero(cpi->count_mb_ref_frame_usage) - vp8_zero(cpi->ymode_count) - vp8_zero(cpi->uv_mode_count) - x->mvc = cm->fc.mvc; vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); - // Special case treatment when GF and ARF are not sensible options for reference - if (cpi->ref_frame_flags == VP8_LAST_FLAG) + /* Special case treatment when GF and ARF are not sensible options + * for reference + */ + if (cpi->ref_frame_flags == VP8_LAST_FRAME) vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded,255,128); else if ((cpi->oxcf.number_of_layers > 1) && - (cpi->ref_frame_flags == VP8_GOLD_FLAG)) + (cpi->ref_frame_flags == VP8_GOLD_FRAME)) vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded,1,255); else if ((cpi->oxcf.number_of_layers > 1) && - (cpi->ref_frame_flags == VP8_ALT_FLAG)) + (cpi->ref_frame_flags == VP8_ALTR_FRAME)) vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded,1,1); else @@ -664,6 +670,43 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) xd->fullpixel_mask = 0xffffffff; if(cm->full_pixel) xd->fullpixel_mask = 0xfffffff8; + + vp8_zero(x->coef_counts); + vp8_zero(x->ymode_count); + vp8_zero(x->uv_mode_count) + x->prediction_error = 0; + x->intra_error = 0; + vp8_zero(x->count_mb_ref_frame_usage); +} + +static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread) +{ + int i = 0; + do + { + int j = 0; + do + { + int k = 0; + do + { + /* at every context */ + + /* calc probs and branch cts for this frame only */ + int t = 0; /* token/prob index */ + + do + { + x->coef_counts [i][j][k][t] += + x_thread->coef_counts [i][j][k][t]; + } + while (++t < ENTROPY_NODES); + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++j < COEF_BANDS); + } + while (++i < BLOCK_TYPES); } void vp8_encode_frame(VP8_COMP *cpi) @@ -676,7 +719,7 @@ void vp8_encode_frame(VP8_COMP *cpi) int segment_counts[MAX_MB_SEGMENTS]; int totalrate; #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING - BOOL_CODER * bc = &cpi->bc[1]; // bc[0] is for control partition + BOOL_CODER * bc = &cpi->bc[1]; /* bc[0] is for control partition */ const int num_part = (1 << cm->multi_token_partition); #endif @@ -691,8 +734,8 @@ void vp8_encode_frame(VP8_COMP *cpi) vp8_auto_select_speed(cpi); } - // Functions setup for all frame types so we can use MC in AltRef - if (cm->mcomp_filter_type == SIXTAP) + /* Functions setup for all frame types so we can use MC in AltRef */ + if(!cm->use_bilinear_mc_filter) { xd->subpixel_predict = vp8_sixtap_predict4x4; xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; @@ -707,43 +750,36 @@ void vp8_encode_frame(VP8_COMP *cpi) xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; } - // Reset frame count of inter 0,0 motion vector usage. - cpi->inter_zz_count = 0; - - cpi->prediction_error = 0; - cpi->intra_error = 0; - cpi->skip_true_count = 0; + cpi->mb.skip_true_count = 0; cpi->tok_count = 0; #if 0 - // Experimental code + /* Experimental code */ cpi->frame_distortion = 0; cpi->last_mb_distortion = 0; #endif xd->mode_info_context = cm->mi; - vp8_zero(cpi->MVcount); - - vp8_zero(cpi->coef_counts); + vp8_zero(cpi->mb.MVcount); vp8cx_frame_init_quantizer(cpi); - vp8_initialize_rd_consts(cpi, + vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); vp8cx_initialize_me_consts(cpi, cm->base_qindex); if(cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Initialize encode frame context. + /* Initialize encode frame context. */ init_encode_frame_mb_context(cpi); - // Build a frame level activity map + /* Build a frame level activity map */ build_activity_map(cpi); } - // re-init encode frame context. + /* re-init encode frame context. */ init_encode_frame_mb_context(cpi); #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING @@ -768,7 +804,8 @@ void vp8_encode_frame(VP8_COMP *cpi) { int i; - vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count); + vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, + cpi->encoding_thread_count); for (i = 0; i < cm->mb_rows; i++) cpi->mt_current_mb_col[i] = -1; @@ -790,7 +827,7 @@ void vp8_encode_frame(VP8_COMP *cpi) encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); - // adjust to the next row of mbs + /* adjust to the next row of mbs */ x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; @@ -809,7 +846,8 @@ void vp8_encode_frame(VP8_COMP *cpi) for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) { - cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start; + cpi->tok_count += (unsigned int) + (cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start); } if (xd->segmentation_enabled) @@ -829,14 +867,50 @@ void vp8_encode_frame(VP8_COMP *cpi) for (i = 0; i < cpi->encoding_thread_count; i++) { + int mode_count; + int c_idx; totalrate += cpi->mb_row_ei[i].totalrate; + + cpi->mb.skip_true_count += cpi->mb_row_ei[i].mb.skip_true_count; + + for(mode_count = 0; mode_count < VP8_YMODES; mode_count++) + cpi->mb.ymode_count[mode_count] += + cpi->mb_row_ei[i].mb.ymode_count[mode_count]; + + for(mode_count = 0; mode_count < VP8_UV_MODES; mode_count++) + cpi->mb.uv_mode_count[mode_count] += + cpi->mb_row_ei[i].mb.uv_mode_count[mode_count]; + + for(c_idx = 0; c_idx < MVvals; c_idx++) + { + cpi->mb.MVcount[0][c_idx] += + cpi->mb_row_ei[i].mb.MVcount[0][c_idx]; + cpi->mb.MVcount[1][c_idx] += + cpi->mb_row_ei[i].mb.MVcount[1][c_idx]; + } + + cpi->mb.prediction_error += + cpi->mb_row_ei[i].mb.prediction_error; + cpi->mb.intra_error += cpi->mb_row_ei[i].mb.intra_error; + + for(c_idx = 0; c_idx < MAX_REF_FRAMES; c_idx++) + cpi->mb.count_mb_ref_frame_usage[c_idx] += + cpi->mb_row_ei[i].mb.count_mb_ref_frame_usage[c_idx]; + + for(c_idx = 0; c_idx < MAX_ERROR_BINS; c_idx++) + cpi->mb.error_bins[c_idx] += + cpi->mb_row_ei[i].mb.error_bins[c_idx]; + + /* add up counts for each thread */ + sum_coef_counts(x, &cpi->mb_row_ei[i].mb); } } else #endif { - // for each macroblock row in image + + /* for each macroblock row in image */ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { vp8_zero(cm->left_context) @@ -847,13 +921,13 @@ void vp8_encode_frame(VP8_COMP *cpi) encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); - // adjust to the next row of mbs + /* adjust to the next row of mbs */ x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; } - cpi->tok_count = tp - cpi->tok; + cpi->tok_count = (unsigned int)(tp - cpi->tok); } #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING @@ -873,12 +947,13 @@ void vp8_encode_frame(VP8_COMP *cpi) // Work out the segment probabilities if segmentation is enabled - if (xd->segmentation_enabled) + // and needs to be updated + if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { int tot_count; int i; - // Set to defaults + /* Set to defaults */ vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs)); tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3]; @@ -899,7 +974,7 @@ void vp8_encode_frame(VP8_COMP *cpi) if (tot_count > 0) xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count; - // Zero probabilities not allowed + /* Zero probabilities not allowed */ for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++) { if (xd->mb_segment_tree_probs[i] == 0) @@ -908,10 +983,10 @@ void vp8_encode_frame(VP8_COMP *cpi) } } - // 256 rate units to the bit - cpi->projected_frame_size = totalrate >> 8; // projected_frame_size in units of BYTES + /* projected_frame_size in units of BYTES */ + cpi->projected_frame_size = totalrate >> 8; - // Make a note of the percentage MBs coded Intra. + /* Make a note of the percentage MBs coded Intra. */ if (cm->frame_type == KEY_FRAME) { cpi->this_frame_percent_intra = 100; @@ -920,50 +995,23 @@ void vp8_encode_frame(VP8_COMP *cpi) { int tot_modes; - tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME] - + cpi->count_mb_ref_frame_usage[LAST_FRAME] - + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] - + cpi->count_mb_ref_frame_usage[ALTREF_FRAME]; + tot_modes = cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] + + cpi->mb.count_mb_ref_frame_usage[LAST_FRAME] + + cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME] + + cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME]; if (tot_modes) - cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes; - - } - -#if 0 - { - int cnt = 0; - int flag[2] = {0, 0}; - - for (cnt = 0; cnt < MVPcount; cnt++) - { - if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt]) - { - flag[0] = 1; - vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount); - break; - } - } - - for (cnt = 0; cnt < MVPcount; cnt++) - { - if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt]) - { - flag[1] = 1; - vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount); - break; - } - } + cpi->this_frame_percent_intra = + cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes; - if (flag[0] || flag[1]) - vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag); } -#endif #if ! CONFIG_REALTIME_ONLY - // Adjust the projected reference frame usage probability numbers to reflect - // what we have just seen. This may be useful when we make multiple iterations - // of the recode loop rather than continuing to use values from the previous frame. + /* Adjust the projected reference frame usage probability numbers to + * reflect what we have just seen. This may be useful when we make + * multiple iterations of the recode loop rather than continuing to use + * values from the previous frame. + */ if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || (!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame))) { @@ -1017,16 +1065,13 @@ void vp8_build_block_offsets(MACROBLOCK *x) vp8_build_block_doffsets(&x->e_mbd); - // y blocks + /* y blocks */ x->thismb_ptr = &x->thismb[0]; for (br = 0; br < 4; br++) { for (bc = 0; bc < 4; bc++) { BLOCK *this_block = &x->block[block]; - //this_block->base_src = &x->src.y_buffer; - //this_block->src_stride = x->src.y_stride; - //this_block->src = 4 * br * this_block->src_stride + 4 * bc; this_block->base_src = &x->thismb_ptr; this_block->src_stride = 16; this_block->src = 4 * br * 16 + 4 * bc; @@ -1034,7 +1079,7 @@ void vp8_build_block_offsets(MACROBLOCK *x) } } - // u blocks + /* u blocks */ for (br = 0; br < 2; br++) { for (bc = 0; bc < 2; bc++) @@ -1047,7 +1092,7 @@ void vp8_build_block_offsets(MACROBLOCK *x) } } - // v blocks + /* v blocks */ for (br = 0; br < 2; br++) { for (bc = 0; bc < 2; bc++) @@ -1087,13 +1132,14 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) #endif - ++cpi->ymode_count[m]; - ++cpi->uv_mode_count[uvm]; + ++x->ymode_count[m]; + ++x->uv_mode_count[uvm]; } -// Experimental stub function to create a per MB zbin adjustment based on -// some previously calculated measure of MB activity. +/* Experimental stub function to create a per MB zbin adjustment based on + * some previously calculated measure of MB activity. + */ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) { #if USE_ACT_INDEX @@ -1103,7 +1149,7 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) int64_t b; int64_t act = *(x->mb_activity_ptr); - // Apply the masking to the RD multiplier. + /* Apply the masking to the RD multiplier. */ a = act + 4*cpi->activity_avg; b = 4*act + cpi->activity_avg; @@ -1114,15 +1160,16 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) #endif } -int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) +int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t) { MACROBLOCKD *xd = &x->e_mbd; int rate; if (cpi->sf.RD && cpi->compressor_speed != 2) - vp8_rd_pick_intra_mode(cpi, x, &rate); + vp8_rd_pick_intra_mode(x, &rate); else - vp8_pick_intra_mode(cpi, x, &rate); + vp8_pick_intra_mode(x, &rate); if(cpi->oxcf.tuning == VP8_TUNE_SSIM) { @@ -1139,7 +1186,7 @@ int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) sum_intra_stats(cpi, x); - vp8_tokenize_mb(cpi, &x->e_mbd, t); + vp8_tokenize_mb(cpi, x, t); if (xd->mode_info_context->mbmi.mode != B_PRED) vp8_inverse_transform_mby(xd); @@ -1176,25 +1223,27 @@ int vp8cx_encode_inter_macroblock x->encode_breakout = cpi->oxcf.encode_breakout; #if CONFIG_TEMPORAL_DENOISING - // Reset the best sse mode/mv for each macroblock. - x->e_mbd.best_sse_inter_mode = 0; - x->e_mbd.best_sse_mv.as_int = 0; - x->e_mbd.need_to_clamp_best_mvs = 0; + /* Reset the best sse mode/mv for each macroblock. */ + x->best_reference_frame = INTRA_FRAME; + x->best_zeromv_reference_frame = INTRA_FRAME; + x->best_sse_inter_mode = 0; + x->best_sse_mv.as_int = 0; + x->need_to_clamp_best_mvs = 0; #endif if (cpi->sf.RD) { - int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; + int zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; /* Are we using the fast quantizer for the mode selection? */ if(cpi->sf.use_fastquant_for_pick) { - cpi->mb.quantize_b = vp8_fast_quantize_b; - cpi->mb.quantize_b_pair = vp8_fast_quantize_b_pair; + x->quantize_b = vp8_fast_quantize_b; + x->quantize_b_pair = vp8_fast_quantize_b_pair; /* the fast quantizer does not use zbin_extra, so * do not recalculate */ - cpi->zbin_mode_boost_enabled = 0; + x->zbin_mode_boost_enabled = 0; } vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error); @@ -1202,12 +1251,12 @@ int vp8cx_encode_inter_macroblock /* switch back to the regular quantizer for the encode */ if (cpi->sf.improved_quant) { - cpi->mb.quantize_b = vp8_regular_quantize_b; - cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair; + x->quantize_b = vp8_regular_quantize_b; + x->quantize_b_pair = vp8_regular_quantize_b_pair; } /* restore cpi->zbin_mode_boost_enabled */ - cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled; + x->zbin_mode_boost_enabled = zbin_mode_boost_enabled; } else @@ -1216,28 +1265,28 @@ int vp8cx_encode_inter_macroblock &distortion, &intra_error, mb_row, mb_col); } - cpi->prediction_error += distortion; - cpi->intra_error += intra_error; + x->prediction_error += distortion; + x->intra_error += intra_error; if(cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Adjust the zbin based on this MB rate. + /* Adjust the zbin based on this MB rate. */ adjust_act_zbin( cpi, x ); } #if 0 - // Experimental RD code + /* Experimental RD code */ cpi->frame_distortion += distortion; cpi->last_mb_distortion = distortion; #endif - // MB level adjutment to quantizer setup + /* MB level adjutment to quantizer setup */ if (xd->segmentation_enabled) { - // If cyclic update enabled + /* If cyclic update enabled */ if (cpi->current_layer == 0 && cpi->cyclic_refresh_mode_enabled) { - // Clear segment_id back to 0 if not coded (last frame 0,0) + /* Clear segment_id back to 0 if not coded (last frame 0,0) */ if ((xd->mode_info_context->mbmi.segment_id == 1) && ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV))) { @@ -1250,24 +1299,25 @@ int vp8cx_encode_inter_macroblock } { - // Experimental code. Special case for gf and arf zeromv modes. - // Increase zbin size to supress noise - cpi->zbin_mode_boost = 0; - if (cpi->zbin_mode_boost_enabled) + /* Experimental code. Special case for gf and arf zeromv modes. + * Increase zbin size to supress noise + */ + x->zbin_mode_boost = 0; + if (x->zbin_mode_boost_enabled) { if ( xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME ) { if (xd->mode_info_context->mbmi.mode == ZEROMV) { if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; } else if (xd->mode_info_context->mbmi.mode == SPLITMV) - cpi->zbin_mode_boost = 0; + x->zbin_mode_boost = 0; else - cpi->zbin_mode_boost = MV_ZBIN_BOOST; + x->zbin_mode_boost = MV_ZBIN_BOOST; } } @@ -1277,7 +1327,7 @@ int vp8cx_encode_inter_macroblock vp8_update_zbin_extra(cpi, x); } - cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; + x->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { @@ -1322,7 +1372,7 @@ int vp8cx_encode_inter_macroblock if (!x->skip) { - vp8_tokenize_mb(cpi, xd, t); + vp8_tokenize_mb(cpi, x, t); if (xd->mode_info_context->mbmi.mode != B_PRED) vp8_inverse_transform_mby(xd); @@ -1339,12 +1389,12 @@ int vp8cx_encode_inter_macroblock if (cpi->common.mb_no_coeff_skip) { - cpi->skip_true_count ++; + x->skip_true_count ++; vp8_fix_contexts(xd); } else { - vp8_stuff_mb(cpi, xd, t); + vp8_stuff_mb(cpi, x, t); } } diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 1f445b7..340dd63 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -54,10 +54,13 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib) BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; int dst_stride = x->e_mbd.dst.y_stride; - unsigned char *base_dst = x->e_mbd.dst.y_buffer; + unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; + unsigned char *Above = dst - dst_stride; + unsigned char *yleft = dst - 1; + unsigned char top_left = Above[-1]; - vp8_intra4x4_predict(base_dst + b->offset, dst_stride, - b->bmi.as_mode, b->predictor, 16); + vp8_intra4x4_predict(Above, yleft, dst_stride, b->bmi.as_mode, + b->predictor, 16, top_left); vp8_subtract_b(be, b, 16); @@ -67,14 +70,11 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib) if (*b->eob > 1) { - vp8_short_idct4x4llm(b->dqcoeff, - b->predictor, 16, base_dst + b->offset, dst_stride); + vp8_short_idct4x4llm(b->dqcoeff, b->predictor, 16, dst, dst_stride); } else { - vp8_dc_only_idct_add - (b->dqcoeff[0], b->predictor, 16, base_dst + b->offset, - dst_stride); + vp8_dc_only_idct_add(b->dqcoeff[0], b->predictor, 16, dst, dst_stride); } } diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index f89e4f7..7d494f2 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -137,10 +137,10 @@ void vp8_transform_intra_mby(MACROBLOCK *x) &x->block[i].coeff[0], 32); } - // build dc block from 16 y dc values + /* build dc block from 16 y dc values */ build_dcblock(x); - // do 2nd order transform on the dc block + /* do 2nd order transform on the dc block */ x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); @@ -157,7 +157,7 @@ static void transform_mb(MACROBLOCK *x) &x->block[i].coeff[0], 32); } - // build dc block from 16 y dc values + /* build dc block from 16 y dc values */ if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) build_dcblock(x); @@ -167,7 +167,7 @@ static void transform_mb(MACROBLOCK *x) &x->block[i].coeff[0], 16); } - // do 2nd order transform on the dc block + /* do 2nd order transform on the dc block */ if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); @@ -185,7 +185,7 @@ static void transform_mby(MACROBLOCK *x) &x->block[i].coeff[0], 32); } - // build dc block from 16 y dc values + /* build dc block from 16 y dc values */ if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) { build_dcblock(x); @@ -208,7 +208,7 @@ struct vp8_token_state{ short qc; }; -// TODO: experiments to find optimal multiple numbers +/* TODO: experiments to find optimal multiple numbers */ #define Y1_RD_MULT 4 #define UV_RD_MULT 2 #define Y2_RD_MULT 16 diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c index 0145f6d..0c43d06 100644 --- a/vp8/encoder/encodemv.c +++ b/vp8/encoder/encodemv.c @@ -29,15 +29,15 @@ static void encode_mvcomponent( const vp8_prob *p = mvc->prob; const int x = v < 0 ? -v : v; - if (x < mvnum_short) // Small + if (x < mvnum_short) /* Small */ { vp8_write(w, 0, p [mvpis_short]); vp8_treed_write(w, vp8_small_mvtree, p + MVPshort, x, 3); if (!x) - return; // no sign bit + return; /* no sign bit */ } - else // Large + else /* Large */ { int i = 0; @@ -100,7 +100,7 @@ void vp8_encode_motion_vector(vp8_writer *w, const MV *mv, const MV_CONTEXT *mvc static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc) { const vp8_prob *p = mvc->prob; - const int x = v; //v<0? -v:v; + const int x = v; unsigned int cost; if (x < mvnum_short) @@ -132,12 +132,12 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc) cost += vp8_cost_bit(p [MVPbits + 3], (x >> 3) & 1); } - return cost; // + vp8_cost_bit( p [MVPsign], v < 0); + return cost; /* + vp8_cost_bit( p [MVPsign], v < 0); */ } void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]) { - int i = 1; //-mv_max; + int i = 1; unsigned int cost0 = 0; unsigned int cost1 = 0; @@ -151,7 +151,6 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m do { - //mvcost [0] [i] = cost_mvcomponent( i, &mvc[0]); cost0 = cost_mvcomponent(i, &mvc[0]); mvcost [0] [i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign]); @@ -168,7 +167,6 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m do { - //mvcost [1] [i] = cost_mvcomponent( i, mvc[1]); cost1 = cost_mvcomponent(i, &mvc[1]); mvcost [1] [i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign]); @@ -179,10 +177,10 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m } -// Motion vector probability table update depends on benefit. -// Small correction allows for the fact that an update to an MV probability -// may have benefit in subsequent frames as well as the current one. - +/* Motion vector probability table update depends on benefit. + * Small correction allows for the fact that an update to an MV probability + * may have benefit in subsequent frames as well as the current one. + */ #define MV_PROB_UPDATE_CORRECTION -1 @@ -254,22 +252,22 @@ static void write_component_probs( vp8_zero(short_bct) - //j=0 + /* j=0 */ { const int c = events [mv_max]; - is_short_ct [0] += c; // Short vector - short_ct [0] += c; // Magnitude distribution + is_short_ct [0] += c; /* Short vector */ + short_ct [0] += c; /* Magnitude distribution */ } - //j: 1 ~ mv_max (1023) + /* j: 1 ~ mv_max (1023) */ { int j = 1; do { - const int c1 = events [mv_max + j]; //positive - const int c2 = events [mv_max - j]; //negative + const int c1 = events [mv_max + j]; /* positive */ + const int c2 = events [mv_max - j]; /* negative */ const int c = c1 + c2; int a = j; @@ -278,13 +276,13 @@ static void write_component_probs( if (a < mvnum_short) { - is_short_ct [0] += c; // Short vector - short_ct [a] += c; // Magnitude distribution + is_short_ct [0] += c; /* Short vector */ + short_ct [a] += c; /* Magnitude distribution */ } else { int k = mvlong_width - 1; - is_short_ct [1] += c; // Long vector + is_short_ct [1] += c; /* Long vector */ /* bit 3 not always encoded. */ do @@ -296,43 +294,6 @@ static void write_component_probs( while (++j <= mv_max); } - /* - { - int j = -mv_max; - do - { - - const int c = events [mv_max + j]; - int a = j; - - if( j < 0) - { - sign_ct [1] += c; - a = -j; - } - else if( j) - sign_ct [0] += c; - - if( a < mvnum_short) - { - is_short_ct [0] += c; // Short vector - short_ct [a] += c; // Magnitude distribution - } - else - { - int k = mvlong_width - 1; - is_short_ct [1] += c; // Long vector - - // bit 3 not always encoded. - - do - bit_ct [k] [(a >> k) & 1] += c; - while( --k >= 0); - } - } while( ++j <= mv_max); - } - */ - calc_prob(Pnew + mvpis_short, is_short_ct); calc_prob(Pnew + MVPsign, sign_ct); @@ -402,10 +363,12 @@ void vp8_write_mvprobs(VP8_COMP *cpi) active_section = 4; #endif write_component_probs( - w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], cpi->MVcount[0], 0, &flags[0] + w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], + cpi->mb.MVcount[0], 0, &flags[0] ); write_component_probs( - w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], cpi->MVcount[1], 1, &flags[1] + w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], + cpi->mb.MVcount[1], 1, &flags[1] ); if (flags[0] || flags[1]) diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 2a2cb2f..d4b17ce 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -17,12 +17,6 @@ #if CONFIG_MULTITHREAD -extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, - int recon_yoffset, int recon_uvoffset, - int mb_row, int mb_col); -extern int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t); extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip); extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); @@ -39,7 +33,7 @@ static THREAD_FUNCTION thread_loopfilter(void *p_data) if (sem_wait(&cpi->h_event_start_lpf) == 0) { - if (cpi->b_multi_threaded == 0) // we're shutting down + if (cpi->b_multi_threaded == 0) /* we're shutting down */ break; vp8_loopfilter_frame(cpi, cm); @@ -59,17 +53,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); ENTROPY_CONTEXT_PLANES mb_row_left_context; - const int nsync = cpi->mt_sync_range; - //printf("Started thread %d\n", ithread); - while (1) { if (cpi->b_multi_threaded == 0) break; - //if(WaitForSingleObject(cpi->h_event_mbrencoding[ithread], INFINITE) == WAIT_OBJECT_0) if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) { + const int nsync = cpi->mt_sync_range; VP8_COMMON *cm = &cpi->common; int mb_row; MACROBLOCK *x = &mbri->mb; @@ -83,7 +74,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) int *segment_counts = mbri->segment_counts; int *totalrate = &mbri->totalrate; - if (cpi->b_multi_threaded == 0) // we're shutting down + if (cpi->b_multi_threaded == 0) /* we're shutting down */ break; for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) @@ -108,7 +99,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; - // reset above block coeffs + /* reset above block coeffs */ xd->above_context = cm->above_context; xd->left_context = &mb_row_left_context; @@ -118,10 +109,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * 8); - // Set the mb activity pointer to the start of the row. + /* Set the mb activity pointer to the start of the row. */ x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - // for each macroblock col in image + /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { *current_mb_col = mb_col - 1; @@ -139,14 +130,18 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) tp = tp_start; #endif - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to values that are in 1/8th pel units + /* Distance of Mb to the various image edges. + * These specified to 8th pel as they are always compared + * to values that are in 1/8th pel units + */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - // Set up limit values for motion vectors used to prevent them extending outside the UMV borders + /* Set up limit values for motion vectors used to prevent + * them extending outside the UMV borders + */ x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); @@ -160,17 +155,19 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) x->rddiv = cpi->RDDIV; x->rdmult = cpi->RDMULT; - //Copy current mb to a buffer + /* Copy current mb to a buffer */ vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp8_activity_masking(cpi, x); - // Is segmentation enabled - // MB level adjustment to quantizer + /* Is segmentation enabled */ + /* MB level adjustment to quantizer */ if (xd->segmentation_enabled) { - // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) + /* Code to set segment id in xd->mbmi.segment_id for + * current MB (with range checking) + */ if (cpi->segmentation_map[map_index + mb_col] <= 3) xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col]; else @@ -179,7 +176,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) vp8cx_mb_init_quantizer(cpi, x, 1); } else - xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default + /* Set to Segment 0 by default */ + xd->mode_info_context->mbmi.segment_id = 0; x->active_ptr = cpi->active_map + map_index + mb_col; @@ -209,21 +207,28 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) #endif - // Count of last ref frame 0,0 usage - if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) - cpi->inter_zz_count++; - - // Special case code for cyclic refresh - // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode - // during vp8cx_encode_inter_macroblock()) back into the global segmentation map - if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled) + /* Special case code for cyclic refresh + * If cyclic update enabled then copy + * xd->mbmi.segment_id; (which may have been updated + * based on mode during + * vp8cx_encode_inter_macroblock()) back into the + * global segmentation map + */ + if ((cpi->current_layer == 0) && + (cpi->cyclic_refresh_mode_enabled && + xd->segmentation_enabled)) { const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id; - // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh): - // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0) - // else mark it as dirty (1). + /* If the block has been refreshed mark it as clean + * (the magnitude of the -ve influences how long it + * will be before we consider another refresh): + * Else if it was coded (last frame 0,0) and has + * not already been refreshed then mark it as a + * candidate for cleanup next time (marked 0) else + * mark it as dirty (1). + */ if (mbmi->segment_id) cpi->cyclic_refresh_map[map_index + mb_col] = -1; else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) @@ -246,13 +251,13 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) #else cpi->tplist[mb_row].stop = tp; #endif - // Increment pointer into gf usage flags structure. + /* Increment pointer into gf usage flags structure. */ x->gf_active_ptr++; - // Increment the activity mask pointers. + /* Increment the activity mask pointers. */ x->mb_activity_ptr++; - // adjust to the next column of macroblocks + /* adjust to the next column of macroblocks */ x->src.y_buffer += 16; x->src.u_buffer += 8; x->src.v_buffer += 8; @@ -260,10 +265,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) recon_yoffset += 16; recon_uvoffset += 8; - // Keep track of segment usage + /* Keep track of segment usage */ segment_counts[xd->mode_info_context->mbmi.segment_id]++; - // skip to next mb + /* skip to next mb */ xd->mode_info_context++; x->partition_info++; xd->above_context++; @@ -276,7 +281,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) *current_mb_col = mb_col + nsync; - // this is to account for the border + /* this is to account for the border */ xd->mode_info_context++; x->partition_info++; @@ -296,7 +301,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) } } - //printf("exit thread %d\n", ithread); + /* printf("exit thread %d\n", ithread); */ return 0; } @@ -336,21 +341,16 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) z->src.v_buffer = x->src.v_buffer; */ + z->mvcost[0] = x->mvcost[0]; + z->mvcost[1] = x->mvcost[1]; + z->mvsadcost[0] = x->mvsadcost[0]; + z->mvsadcost[1] = x->mvsadcost[1]; - vpx_memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); - z->mvcost[0] = &z->mvcosts[0][mv_max+1]; - z->mvcost[1] = &z->mvcosts[1][mv_max+1]; - z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1]; - z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1]; - - - vpx_memcpy(z->token_costs, x->token_costs, sizeof(x->token_costs)); - vpx_memcpy(z->inter_bmode_costs, x->inter_bmode_costs, sizeof(x->inter_bmode_costs)); - //memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); - //memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost)); - vpx_memcpy(z->mbmode_cost, x->mbmode_cost, sizeof(x->mbmode_cost)); - vpx_memcpy(z->intra_uv_mode_cost, x->intra_uv_mode_cost, sizeof(x->intra_uv_mode_cost)); - vpx_memcpy(z->bmode_costs, x->bmode_costs, sizeof(x->bmode_costs)); + z->token_costs = x->token_costs; + z->inter_bmode_costs = x->inter_bmode_costs; + z->mbmode_cost = x->mbmode_cost; + z->intra_uv_mode_cost = x->intra_uv_mode_cost; + z->bmode_costs = x->bmode_costs; for (i = 0; i < 25; i++) { @@ -358,17 +358,15 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) z->block[i].quant_fast = x->block[i].quant_fast; z->block[i].quant_shift = x->block[i].quant_shift; z->block[i].zbin = x->block[i].zbin; - z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; + z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; z->block[i].round = x->block[i].round; - z->q_index = x->q_index; - z->act_zbin_adj = x->act_zbin_adj; - z->last_act_zbin_adj = x->last_act_zbin_adj; - /* - z->block[i].src = x->block[i].src; - */ - z->block[i].src_stride = x->block[i].src_stride; + z->block[i].src_stride = x->block[i].src_stride; } + z->q_index = x->q_index; + z->act_zbin_adj = x->act_zbin_adj; + z->last_act_zbin_adj = x->last_act_zbin_adj; + { MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *zd = &z->e_mbd; @@ -400,9 +398,11 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) zd->subpixel_predict16x16 = xd->subpixel_predict16x16; zd->segmentation_enabled = xd->segmentation_enabled; zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); + vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, + sizeof(xd->segment_feature_data)); - vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); + vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, + sizeof(xd->dequant_y1_dc)); vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); @@ -418,13 +418,23 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) zd->block[i].dequant = zd->dequant_uv; zd->block[24].dequant = zd->dequant_y2; #endif + + + vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes)); + vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult, + sizeof(x->rd_thresh_mult)); + + z->zbin_over_quant = x->zbin_over_quant; + z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; + z->zbin_mode_boost = x->zbin_mode_boost; + + vpx_memset(z->error_bins, 0, sizeof(z->error_bins)); } } void vp8cx_init_mbrthread_data(VP8_COMP *cpi, MACROBLOCK *x, MB_ROW_COMP *mbr_ei, - int mb_row, int count ) { @@ -432,7 +442,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, VP8_COMMON *const cm = & cpi->common; MACROBLOCKD *const xd = & x->e_mbd; int i; - (void) mb_row; for (i = 0; i < count; i++) { @@ -465,10 +474,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, vp8_build_block_offsets(mb); - vp8_setup_block_dptrs(mbd); - - vp8_setup_block_ptrs(mb); - mbd->left_context = &cm->left_context; mb->mvc = cm->fc.mvc; @@ -477,10 +482,19 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, mbd->fullpixel_mask = 0xffffffff; if(cm->full_pixel) mbd->fullpixel_mask = 0xfffffff8; + + vp8_zero(mb->coef_counts); + vp8_zero(x->ymode_count); + mb->skip_true_count = 0; + vp8_zero(mb->MVcount); + mb->prediction_error = 0; + mb->intra_error = 0; + vp8_zero(mb->count_mb_ref_frame_usage); + mb->mbs_tested_so_far = 0; } } -void vp8cx_create_encoder_threads(VP8_COMP *cpi) +int vp8cx_create_encoder_threads(VP8_COMP *cpi) { const VP8_COMMON * cm = &cpi->common; @@ -492,6 +506,7 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) { int ithread; int th_count = cpi->oxcf.multi_threaded - 1; + int rc = 0; /* don't allocate more threads than cores available */ if (cpi->oxcf.multi_threaded > cm->processor_core_count) @@ -505,16 +520,17 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) } if(th_count == 0) - return; - - CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count)); - CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count)); - CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); + return 0; + + CHECK_MEM_ERROR(cpi->h_encoding_thread, + vpx_malloc(sizeof(pthread_t) * th_count)); + CHECK_MEM_ERROR(cpi->h_event_start_encoding, + vpx_malloc(sizeof(sem_t) * th_count)); + CHECK_MEM_ERROR(cpi->mb_row_ei, + vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); - CHECK_MEM_ERROR(cpi->mt_current_mb_col, - vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); sem_init(&cpi->h_event_end_encoding, 0, 0); @@ -528,16 +544,45 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) for (ithread = 0; ithread < th_count; ithread++) { - ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread]; + ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread]; + + /* Setup block ptrs and offsets */ + vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb); + vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd); sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); + ethd->ithread = ithread; ethd->ptr1 = (void *)cpi; ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; - pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); + rc = pthread_create(&cpi->h_encoding_thread[ithread], 0, + thread_encoding_proc, ethd); + if(rc) + break; } + if(rc) + { + /* shutdown other threads */ + cpi->b_multi_threaded = 0; + for(--ithread; ithread >= 0; ithread--) + { + pthread_join(cpi->h_encoding_thread[ithread], 0); + sem_destroy(&cpi->h_event_start_encoding[ithread]); + } + sem_destroy(&cpi->h_event_end_encoding); + + /* free thread related resources */ + vpx_free(cpi->h_event_start_encoding); + vpx_free(cpi->h_encoding_thread); + vpx_free(cpi->mb_row_ei); + vpx_free(cpi->en_thread_data); + + return -1; + } + + { LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; @@ -545,24 +590,47 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) sem_init(&cpi->h_event_end_lpf, 0, 0); lpfthd->ptr1 = (void *)cpi; - pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd); + rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, + lpfthd); + + if(rc) + { + /* shutdown other threads */ + cpi->b_multi_threaded = 0; + for(--ithread; ithread >= 0; ithread--) + { + sem_post(&cpi->h_event_start_encoding[ithread]); + pthread_join(cpi->h_encoding_thread[ithread], 0); + sem_destroy(&cpi->h_event_start_encoding[ithread]); + } + sem_destroy(&cpi->h_event_end_encoding); + sem_destroy(&cpi->h_event_end_lpf); + sem_destroy(&cpi->h_event_start_lpf); + + /* free thread related resources */ + vpx_free(cpi->h_event_start_encoding); + vpx_free(cpi->h_encoding_thread); + vpx_free(cpi->mb_row_ei); + vpx_free(cpi->en_thread_data); + + return -2; + } } } - + return 0; } void vp8cx_remove_encoder_threads(VP8_COMP *cpi) { if (cpi->b_multi_threaded) { - //shutdown other threads + /* shutdown other threads */ cpi->b_multi_threaded = 0; { int i; for (i = 0; i < cpi->encoding_thread_count; i++) { - //SetEvent(cpi->h_event_mbrencoding[i]); sem_post(&cpi->h_event_start_encoding[i]); pthread_join(cpi->h_encoding_thread[i], 0); @@ -577,12 +645,11 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi) sem_destroy(&cpi->h_event_end_lpf); sem_destroy(&cpi->h_event_start_lpf); - //free thread related resources + /* free thread related resources */ vpx_free(cpi->h_event_start_encoding); vpx_free(cpi->h_encoding_thread); vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); - vpx_free(cpi->mt_current_mb_col); } } #endif diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index 8de1a6a..30bf8a6 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -30,14 +30,12 @@ #include "encodemv.h" #include "encodeframe.h" -//#define OUTPUT_FPF 1 +/* #define OUTPUT_FPF 1 */ extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi); extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); extern void vp8_alloc_compressor_data(VP8_COMP *cpi); -//#define GFQ_ADJUSTMENT (40 + ((15*Q)/10)) -//#define GFQ_ADJUSTMENT (80 + ((15*Q)/10)) #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q] extern int vp8_kf_boost_qadjustment[QINDEX_RANGE]; @@ -77,7 +75,9 @@ static const int cq_level[QINDEX_RANGE] = static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame); -// Resets the first pass file to the given position using a relative seek from the current position +/* Resets the first pass file to the given position using a relative seek + * from the current position + */ static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position) { cpi->twopass.stats_in = Position; @@ -92,14 +92,14 @@ static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame) return 1; } -// Read frame stats at an offset from the current position +/* Read frame stats at an offset from the current position */ static int read_frame_stats( VP8_COMP *cpi, FIRSTPASS_STATS *frame_stats, int offset ) { FIRSTPASS_STATS * fps_ptr = cpi->twopass.stats_in; - // Check legality of offset + /* Check legality of offset */ if ( offset >= 0 ) { if ( &fps_ptr[offset] >= cpi->twopass.stats_in_end ) @@ -136,7 +136,7 @@ static void output_stats(const VP8_COMP *cpi, pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS); vpx_codec_pkt_list_add(pktlist, &pkt); -// TEMP debug code +/* TEMP debug code */ #if OUTPUT_FPF { @@ -257,7 +257,9 @@ static void avg_stats(FIRSTPASS_STATS *section) section->duration /= section->count; } -// Calculate a modified Error used in distributing bits between easier and harder frames +/* Calculate a modified Error used in distributing bits between easier + * and harder frames + */ static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { double av_err = ( cpi->twopass.total_stats.ssim_weighted_pred_err / @@ -315,7 +317,9 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) unsigned char *src = source->y_buffer; double sum_weights = 0.0; - // Loop throught the Y plane raw examining levels and creating a weight for the image + /* Loop throught the Y plane raw examining levels and creating a weight + * for the image + */ i = source->y_height; do { @@ -335,41 +339,52 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) } -// This function returns the current per frame maximum bitrate target +/* This function returns the current per frame maximum bitrate target */ static int frame_max_bits(VP8_COMP *cpi) { - // Max allocation for a single frame based on the max section guidelines passed in and how many bits are left + /* Max allocation for a single frame based on the max section guidelines + * passed in and how many bits are left + */ int max_bits; - // For CBR we need to also consider buffer fullness. - // If we are running below the optimal level then we need to gradually tighten up on max_bits. + /* For CBR we need to also consider buffer fullness. + * If we are running below the optimal level then we need to gradually + * tighten up on max_bits. + */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level); - // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user + /* For CBR base this on the target average bits per frame plus the + * maximum sedction rate passed in by the user + */ max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); - // If our buffer is below the optimum level + /* If our buffer is below the optimum level */ if (buffer_fullness_ratio < 1.0) { - // The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. + /* The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. */ int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2; max_bits = (int)(max_bits * buffer_fullness_ratio); + /* Lowest value we will set ... which should allow the buffer to + * refill. + */ if (max_bits < min_max_bits) - max_bits = min_max_bits; // Lowest value we will set ... which should allow the buffer to refil. + max_bits = min_max_bits; } } - // VBR + /* VBR */ else { - // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user + /* For VBR base this on the bits and frames left plus the + * two_pass_vbrmax_section rate passed in by the user + */ max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); } - // Trap case where we are out of bits + /* Trap case where we are out of bits */ if (max_bits < 0) max_bits = 0; @@ -403,13 +418,13 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, unsigned char *ref_ptr; int ref_stride = x->e_mbd.pre.y_stride; - // Set up pointers for this macro block raw buffer + /* Set up pointers for this macro block raw buffer */ raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset + d->offset); vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride, (unsigned int *)(raw_motion_err)); - // Set up pointers for this macro block recon buffer + /* Set up pointers for this macro block recon buffer */ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset ); vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride, @@ -430,19 +445,19 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, int_mv ref_mv_full; int tmp_err; - int step_param = 3; //3; // Dont search over full range for first pass - int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3; + int step_param = 3; /* Dont search over full range for first pass */ + int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; int n; vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; int new_mv_mode_penalty = 256; - // override the default variance function to use MSE + /* override the default variance function to use MSE */ v_fn_ptr.vf = vp8_mse16x16; - // Set up pointers for this macro block recon buffer + /* Set up pointers for this macro block recon buffer */ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; - // Initial step/diamond search centred on best mv + /* Initial step/diamond search centred on best mv */ tmp_mv.as_int = 0; ref_mv_full.as_mv.col = ref_mv->as_mv.col>>3; ref_mv_full.as_mv.row = ref_mv->as_mv.row>>3; @@ -459,7 +474,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, best_mv->col = tmp_mv.as_mv.col; } - // Further step/diamond searches as necessary + /* Further step/diamond searches as necessary */ n = num00; num00 = 0; @@ -520,7 +535,7 @@ void vp8_first_pass(VP8_COMP *cpi) zero_ref_mv.as_int = 0; - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); x->src = * cpi->Source; xd->pre = *lst_yv12; @@ -530,44 +545,55 @@ void vp8_first_pass(VP8_COMP *cpi) xd->mode_info_context = cm->mi; - vp8_build_block_offsets(x); - - vp8_setup_block_dptrs(&x->e_mbd); + if(!cm->use_bilinear_mc_filter) + { + xd->subpixel_predict = vp8_sixtap_predict4x4; + xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; + xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; + xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; + } + else + { + xd->subpixel_predict = vp8_bilinear_predict4x4; + xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; + xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; + xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; + } - vp8_setup_block_ptrs(x); + vp8_build_block_offsets(x); - // set up frame new frame for intra coded blocks + /* set up frame new frame for intra coded blocks */ vp8_setup_intra_recon(new_yv12); vp8cx_frame_init_quantizer(cpi); - // Initialise the MV cost table to the defaults - //if( cm->current_video_frame == 0) - //if ( 0 ) + /* Initialise the MV cost table to the defaults */ { int flag[2] = {1, 1}; - vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); + vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag); } - // for each macroblock row in image + /* for each macroblock row in image */ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { int_mv best_ref_mv; best_ref_mv.as_int = 0; - // reset above block coeffs + /* reset above block coeffs */ xd->up_available = (mb_row != 0); recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * 8); - // Set up limit values for motion vectors to prevent them extending outside the UMV borders + /* Set up limit values for motion vectors to prevent them extending + * outside the UMV borders + */ x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); - // for each macroblock col in image + /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { int this_error; @@ -579,26 +605,33 @@ void vp8_first_pass(VP8_COMP *cpi) xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); - //Copy current mb to a buffer + /* Copy current mb to a buffer */ vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - // do intra 16x16 prediction + /* do intra 16x16 prediction */ this_error = vp8_encode_intra(cpi, x, use_dc_pred); - // "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame) - // We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv. - // When the error score is very low this causes us to pick all or lots of INTRA modes and throw lots of key frames. - // This penalty adds a cost matching that of a 0,0 mv to the intra case. + /* "intrapenalty" below deals with situations where the intra + * and inter error scores are very low (eg a plain black frame) + * We do not have special cases in first pass for 0,0 and + * nearest etc so all inter modes carry an overhead cost + * estimate fot the mv. When the error score is very low this + * causes us to pick all or lots of INTRA modes and throw lots + * of key frames. This penalty adds a cost matching that of a + * 0,0 mv to the intra case. + */ this_error += intrapenalty; - // Cumulative intra error total + /* Cumulative intra error total */ intra_error += (int64_t)this_error; - // Set up limit values for motion vectors to prevent them extending outside the UMV borders + /* Set up limit values for motion vectors to prevent them + * extending outside the UMV borders + */ x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); - // Other than for the first frame do a motion search + /* Other than for the first frame do a motion search */ if (cm->current_video_frame > 0) { BLOCKD *d = &x->e_mbd.block[0]; @@ -607,7 +640,7 @@ void vp8_first_pass(VP8_COMP *cpi) int motion_error = INT_MAX; int raw_motion_error = INT_MAX; - // Simple 0,0 motion with no mv overhead + /* Simple 0,0 motion with no mv overhead */ zz_motion_search( cpi, x, cpi->last_frame_unscaled_source, &raw_motion_error, lst_yv12, &motion_error, recon_yoffset ); @@ -617,13 +650,16 @@ void vp8_first_pass(VP8_COMP *cpi) if (raw_motion_error < cpi->oxcf.encode_breakout) goto skip_motion_search; - // Test last reference frame using the previous best mv as the - // starting point (best reference) for the search + /* Test last reference frame using the previous best mv as the + * starting point (best reference) for the search + */ first_pass_motion_search(cpi, x, &best_ref_mv, &d->bmi.mv.as_mv, lst_yv12, &motion_error, recon_yoffset); - // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well + /* If the current best reference mv is not centred on 0,0 + * then do a 0,0 based search as well + */ if (best_ref_mv.as_int) { tmp_err = INT_MAX; @@ -638,7 +674,9 @@ void vp8_first_pass(VP8_COMP *cpi) } } - // Experimental search in a second reference frame ((0,0) based only) + /* Experimental search in a second reference frame ((0,0) + * based only) + */ if (cm->current_video_frame > 1) { first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset); @@ -646,19 +684,9 @@ void vp8_first_pass(VP8_COMP *cpi) if ((gf_motion_error < motion_error) && (gf_motion_error < this_error)) { second_ref_count++; - //motion_error = gf_motion_error; - //d->bmi.mv.as_mv.row = tmp_mv.row; - //d->bmi.mv.as_mv.col = tmp_mv.col; } - /*else - { - xd->pre.y_buffer = cm->last_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = cm->last_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = cm->last_frame.v_buffer + recon_uvoffset; - }*/ - - // Reset to last frame as reference buffer + /* Reset to last frame as reference buffer */ xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset; xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset; xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; @@ -670,10 +698,11 @@ skip_motion_search: if (motion_error <= this_error) { - // Keep a count of cases where the inter and intra were - // very close and very low. This helps with scene cut - // detection for example in cropped clips with black bars - // at the sides or top and bottom. + /* Keep a count of cases where the inter and intra were + * very close and very low. This helps with scene cut + * detection for example in cropped clips with black bars + * at the sides or top and bottom. + */ if( (((this_error-intrapenalty) * 9) <= (motion_error*10)) && (this_error < (2*intrapenalty)) ) @@ -696,17 +725,17 @@ skip_motion_search: best_ref_mv.as_int = d->bmi.mv.as_int; - // Was the vector non-zero + /* Was the vector non-zero */ if (d->bmi.mv.as_int) { mvcount++; - // Was it different from the last non zero vector + /* Was it different from the last non zero vector */ if ( d->bmi.mv.as_int != lastmv_as_int ) new_mv_count++; lastmv_as_int = d->bmi.mv.as_int; - // Does the Row vector point inwards or outwards + /* Does the Row vector point inwards or outwards */ if (mb_row < cm->mb_rows / 2) { if (d->bmi.mv.as_mv.row > 0) @@ -722,7 +751,7 @@ skip_motion_search: sum_in_vectors--; } - // Does the Row vector point inwards or outwards + /* Does the Row vector point inwards or outwards */ if (mb_col < cm->mb_cols / 2) { if (d->bmi.mv.as_mv.col > 0) @@ -743,7 +772,7 @@ skip_motion_search: coded_error += (int64_t)this_error; - // adjust to the next column of macroblocks + /* adjust to the next column of macroblocks */ x->src.y_buffer += 16; x->src.u_buffer += 8; x->src.v_buffer += 8; @@ -752,25 +781,25 @@ skip_motion_search: recon_uvoffset += 8; } - // adjust to the next row of mbs + /* adjust to the next row of mbs */ x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - //extend the recon for intra prediction + /* extend the recon for intra prediction */ vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); } - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); { double weight = 0.0; FIRSTPASS_STATS fps; fps.frame = cm->current_video_frame ; - fps.intra_error = intra_error >> 8; - fps.coded_error = coded_error >> 8; + fps.intra_error = (double)(intra_error >> 8); + fps.coded_error = (double)(coded_error >> 8); weight = simple_weight(cpi->Source); @@ -809,12 +838,13 @@ skip_motion_search: fps.pcnt_motion = 1.0 * (double)mvcount / cpi->common.MBs; } - // TODO: handle the case when duration is set to 0, or something less - // than the full time between subsequent cpi->source_time_stamp s . - fps.duration = cpi->source->ts_end - - cpi->source->ts_start; + /* TODO: handle the case when duration is set to 0, or something less + * than the full time between subsequent cpi->source_time_stamps + */ + fps.duration = (double)(cpi->source->ts_end + - cpi->source->ts_start); - // don't want to do output stats with a stack variable! + /* don't want to do output stats with a stack variable! */ memcpy(&cpi->twopass.this_frame_stats, &fps, sizeof(FIRSTPASS_STATS)); @@ -822,7 +852,9 @@ skip_motion_search: accumulate_stats(&cpi->twopass.total_stats, &fps); } - // Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met + /* Copy the previous Last Frame into the GF buffer if specific + * conditions for doing so are met + */ if ((cm->current_video_frame > 0) && (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) && ((cpi->twopass.this_frame_stats.intra_error / cpi->twopass.this_frame_stats.coded_error) > 2.0)) @@ -830,18 +862,22 @@ skip_motion_search: vp8_yv12_copy_frame(lst_yv12, gld_yv12); } - // swap frame pointers so last frame refers to the frame we just compressed + /* swap frame pointers so last frame refers to the frame we just + * compressed + */ vp8_swap_yv12_buffer(lst_yv12, new_yv12); vp8_yv12_extend_frame_borders(lst_yv12); - // Special case for the first frame. Copy into the GF buffer as a second reference. + /* Special case for the first frame. Copy into the GF buffer as a + * second reference. + */ if (cm->current_video_frame == 0) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); } - // use this to see what the first pass reconstruction looks like + /* use this to see what the first pass reconstruction looks like */ if (0) { char filename[512]; @@ -853,7 +889,8 @@ skip_motion_search: else recon_file = fopen(filename, "ab"); - if(fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file)); + (void) fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, + recon_file); fclose(recon_file); } @@ -862,11 +899,10 @@ skip_motion_search: } extern const int vp8_bits_per_mb[2][QINDEX_RANGE]; -// Estimate a cost per mb attributable to overheads such as the coding of -// modes and motion vectors. -// Currently simplistic in its assumptions for testing. -// - +/* Estimate a cost per mb attributable to overheads such as the coding of + * modes and motion vectors. + * Currently simplistic in its assumptions for testing. + */ static double bitcost( double prob ) { @@ -890,12 +926,14 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi, motion_cost = bitcost(av_pct_motion); intra_cost = bitcost(av_intra); - // Estimate of extra bits per mv overhead for mbs - // << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb + /* Estimate of extra bits per mv overhead for mbs + * << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb + */ mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9; - // Crude estimate of overhead cost from modes - // << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb + /* Crude estimate of overhead cost from modes + * << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb + */ mode_cost = (int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) + (av_pct_motion * motion_cost) + @@ -914,17 +952,17 @@ static double calc_correction_factor( double err_per_mb, double error_term = err_per_mb / err_devisor; double correction_factor; - // Adjustment based on Q to power term. + /* Adjustment based on Q to power term. */ power_term = pt_low + (Q * 0.01); power_term = (power_term > pt_high) ? pt_high : power_term; - // Adjustments to error term - // TBD + /* Adjustments to error term */ + /* TBD */ - // Calculate correction factor + /* Calculate correction factor */ correction_factor = pow(error_term, power_term); - // Clip range + /* Clip range */ correction_factor = (correction_factor < 0.05) ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor; @@ -948,15 +986,16 @@ static int estimate_max_q(VP8_COMP *cpi, int overhead_bits_per_mb; if (section_target_bandwitdh <= 0) - return cpi->twopass.maxq_max_limit; // Highest value allowed + return cpi->twopass.maxq_max_limit; /* Highest value allowed */ target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs); - // Calculate a corrective factor based on a rolling ratio of bits spent - // vs target bits + /* Calculate a corrective factor based on a rolling ratio of bits spent + * vs target bits + */ if ((cpi->rolling_target_bits > 0) && (cpi->active_worst_quality < cpi->worst_quality)) { @@ -977,8 +1016,9 @@ static int estimate_max_q(VP8_COMP *cpi, ? 10.0 : cpi->twopass.est_max_qcorrection_factor; } - // Corrections for higher compression speed settings - // (reduced compression expected) + /* Corrections for higher compression speed settings + * (reduced compression expected) + */ if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) { if (cpi->oxcf.cpu_used <= 5) @@ -987,18 +1027,20 @@ static int estimate_max_q(VP8_COMP *cpi, speed_correction = 1.25; } - // Estimate of overhead bits per mb - // Correction to overhead bits for min allowed Q. + /* Estimate of overhead bits per mb */ + /* Correction to overhead bits for min allowed Q. */ overhead_bits_per_mb = overhead_bits / num_mbs; - overhead_bits_per_mb *= pow( 0.98, (double)cpi->twopass.maxq_min_limit ); + overhead_bits_per_mb = (int)(overhead_bits_per_mb * + pow( 0.98, (double)cpi->twopass.maxq_min_limit )); - // Try and pick a max Q that will be high enough to encode the - // content at the given rate. + /* Try and pick a max Q that will be high enough to encode the + * content at the given rate. + */ for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) { int bits_per_mb_at_this_q; - // Error per MB based correction factor + /* Error per MB based correction factor */ err_correction_factor = calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q); @@ -1010,27 +1052,29 @@ static int estimate_max_q(VP8_COMP *cpi, * cpi->twopass.section_max_qfactor * (double)bits_per_mb_at_this_q); - // Mode and motion overhead - // As Q rises in real encode loop rd code will force overhead down - // We make a crude adjustment for this here as *.98 per Q step. + /* Mode and motion overhead */ + /* As Q rises in real encode loop rd code will force overhead down + * We make a crude adjustment for this here as *.98 per Q step. + */ overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } - // Restriction on active max q for constrained quality mode. + /* Restriction on active max q for constrained quality mode. */ if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && (Q < cpi->cq_target_quality) ) { Q = cpi->cq_target_quality; } - // Adjust maxq_min_limit and maxq_max_limit limits based on - // averaga q observed in clip for non kf/gf.arf frames - // Give average a chance to settle though. + /* Adjust maxq_min_limit and maxq_max_limit limits based on + * average q observed in clip for non kf/gf.arf frames + * Give average a chance to settle though. + */ if ( (cpi->ni_frames > - ((unsigned int)cpi->twopass.total_stats.count >> 8)) && + ((int)cpi->twopass.total_stats.count >> 8)) && (cpi->ni_frames > 150) ) { cpi->twopass.maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality) @@ -1042,8 +1086,9 @@ static int estimate_max_q(VP8_COMP *cpi, return Q; } -// For cq mode estimate a cq level that matches the observed -// complexity and data rate. +/* For cq mode estimate a cq level that matches the observed + * complexity and data rate. + */ static int estimate_cq( VP8_COMP *cpi, FIRSTPASS_STATS * fpstats, int section_target_bandwitdh, @@ -1072,11 +1117,12 @@ static int estimate_cq( VP8_COMP *cpi, ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs); - // Estimate of overhead bits per mb + /* Estimate of overhead bits per mb */ overhead_bits_per_mb = overhead_bits / num_mbs; - // Corrections for higher compression speed settings - // (reduced compression expected) + /* Corrections for higher compression speed settings + * (reduced compression expected) + */ if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) { if (cpi->oxcf.cpu_used <= 5) @@ -1085,19 +1131,19 @@ static int estimate_cq( VP8_COMP *cpi, speed_correction = 1.25; } - // II ratio correction factor for clip as a whole + /* II ratio correction factor for clip as a whole */ clip_iiratio = cpi->twopass.total_stats.intra_error / DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error); clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025); if (clip_iifactor < 0.80) clip_iifactor = 0.80; - // Try and pick a Q that can encode the content at the given rate. + /* Try and pick a Q that can encode the content at the given rate. */ for (Q = 0; Q < MAXQ; Q++) { int bits_per_mb_at_this_q; - // Error per MB based correction factor + /* Error per MB based correction factor */ err_correction_factor = calc_correction_factor(err_per_mb, 100.0, 0.40, 0.90, Q); @@ -1110,16 +1156,17 @@ static int estimate_cq( VP8_COMP *cpi, clip_iifactor * (double)bits_per_mb_at_this_q); - // Mode and motion overhead - // As Q rises in real encode loop rd code will force overhead down - // We make a crude adjustment for this here as *.98 per Q step. + /* Mode and motion overhead */ + /* As Q rises in real encode loop rd code will force overhead down + * We make a crude adjustment for this here as *.98 per Q step. + */ overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } - // Clip value to range "best allowed to (worst allowed - 1)" + /* Clip value to range "best allowed to (worst allowed - 1)" */ Q = cq_level[Q]; if ( Q >= cpi->worst_quality ) Q = cpi->worst_quality - 1; @@ -1141,7 +1188,9 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs); - // Corrections for higher compression speed settings (reduced compression expected) + /* Corrections for higher compression speed settings + * (reduced compression expected) + */ if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) { if (cpi->oxcf.cpu_used <= 5) @@ -1150,12 +1199,12 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band speed_correction = 1.25; } - // Try and pick a Q that can encode the content at the given rate. + /* Try and pick a Q that can encode the content at the given rate. */ for (Q = 0; Q < MAXQ; Q++) { int bits_per_mb_at_this_q; - // Error per MB based correction factor + /* Error per MB based correction factor */ err_correction_factor = calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q); @@ -1172,7 +1221,7 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band return Q; } -// Estimate a worst case Q for a KF group +/* Estimate a worst case Q for a KF group */ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, double group_iiratio) { int Q; @@ -1192,12 +1241,14 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta double combined_correction_factor; - // Trap special case where the target is <= 0 + /* Trap special case where the target is <= 0 */ if (target_norm_bits_per_mb <= 0) return MAXQ * 2; - // Calculate a corrective factor based on a rolling ratio of bits spent vs target bits - // This is clamped to the range 0.1 to 10.0 + /* Calculate a corrective factor based on a rolling ratio of bits spent + * vs target bits + * This is clamped to the range 0.1 to 10.0 + */ if (cpi->long_rolling_target_bits <= 0) current_spend_ratio = 10.0; else @@ -1206,14 +1257,19 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta current_spend_ratio = (current_spend_ratio > 10.0) ? 10.0 : (current_spend_ratio < 0.1) ? 0.1 : current_spend_ratio; } - // Calculate a correction factor based on the quality of prediction in the sequence as indicated by intra_inter error score ratio (IIRatio) - // The idea here is to favour subsampling in the hardest sections vs the easyest. + /* Calculate a correction factor based on the quality of prediction in + * the sequence as indicated by intra_inter error score ratio (IIRatio) + * The idea here is to favour subsampling in the hardest sections vs + * the easyest. + */ iiratio_correction_factor = 1.0 - ((group_iiratio - 6.0) * 0.1); if (iiratio_correction_factor < 0.5) iiratio_correction_factor = 0.5; - // Corrections for higher compression speed settings (reduced compression expected) + /* Corrections for higher compression speed settings + * (reduced compression expected) + */ if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) { if (cpi->oxcf.cpu_used <= 5) @@ -1222,13 +1278,15 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta speed_correction = 1.25; } - // Combine the various factors calculated above + /* Combine the various factors calculated above */ combined_correction_factor = speed_correction * iiratio_correction_factor * current_spend_ratio; - // Try and pick a Q that should be high enough to encode the content at the given rate. + /* Try and pick a Q that should be high enough to encode the content at + * the given rate. + */ for (Q = 0; Q < MAXQ; Q++) { - // Error per MB based correction factor + /* Error per MB based correction factor */ err_correction_factor = calc_correction_factor(err_per_mb, 150.0, pow_lowq, pow_highq, Q); @@ -1241,7 +1299,9 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta break; } - // If we could not hit the target even at Max Q then estimate what Q would have bee required + /* If we could not hit the target even at Max Q then estimate what Q + * would have been required + */ while ((bits_per_mb_at_this_q > target_norm_bits_per_mb) && (Q < (MAXQ * 2))) { @@ -1280,30 +1340,34 @@ void vp8_init_second_pass(VP8_COMP *cpi) cpi->twopass.total_stats = *cpi->twopass.stats_in_end; cpi->twopass.total_left_stats = cpi->twopass.total_stats; - // each frame can have a different duration, as the frame rate in the source - // isn't guaranteed to be constant. The frame rate prior to the first frame - // encoded in the second pass is a guess. However the sum duration is not. - // Its calculated based on the actual durations of all frames from the first - // pass. + /* each frame can have a different duration, as the frame rate in the + * source isn't guaranteed to be constant. The frame rate prior to + * the first frame encoded in the second pass is a guess. However the + * sum duration is not. Its calculated based on the actual durations of + * all frames from the first pass. + */ vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration); cpi->output_frame_rate = cpi->frame_rate; cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ; cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0); - // Calculate a minimum intra value to be used in determining the IIratio - // scores used in the second pass. We have this minimum to make sure - // that clips that are static but "low complexity" in the intra domain - // are still boosted appropriately for KF/GF/ARF + /* Calculate a minimum intra value to be used in determining the IIratio + * scores used in the second pass. We have this minimum to make sure + * that clips that are static but "low complexity" in the intra domain + * are still boosted appropriately for KF/GF/ARF + */ cpi->twopass.kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; cpi->twopass.gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; - // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence + /* Scan the first pass file and calculate an average Intra / Inter error + * score ratio for the sequence + */ { double sum_iiratio = 0.0; double IIRatio; - start_pos = cpi->twopass.stats_in; // Note starting "file" position + start_pos = cpi->twopass.stats_in; /* Note starting "file" position */ while (input_stats(cpi, &this_frame) != EOF) { @@ -1314,14 +1378,15 @@ void vp8_init_second_pass(VP8_COMP *cpi) cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count); - // Reset file position + /* Reset file position */ reset_fpf_position(cpi, start_pos); } - // Scan the first pass file and calculate a modified total error based upon the bias/power function - // used to allocate bits + /* Scan the first pass file and calculate a modified total error based + * upon the bias/power function used to allocate bits + */ { - start_pos = cpi->twopass.stats_in; // Note starting "file" position + start_pos = cpi->twopass.stats_in; /* Note starting "file" position */ cpi->twopass.modified_error_total = 0.0; cpi->twopass.modified_error_used = 0.0; @@ -1332,7 +1397,7 @@ void vp8_init_second_pass(VP8_COMP *cpi) } cpi->twopass.modified_error_left = cpi->twopass.modified_error_total; - reset_fpf_position(cpi, start_pos); // Reset file position + reset_fpf_position(cpi, start_pos); /* Reset file position */ } } @@ -1341,23 +1406,24 @@ void vp8_end_second_pass(VP8_COMP *cpi) { } -// This function gives and estimate of how badly we believe -// the prediction quality is decaying from frame to frame. +/* This function gives and estimate of how badly we believe the prediction + * quality is decaying from frame to frame. + */ static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame) { double prediction_decay_rate; double motion_decay; double motion_pct = next_frame->pcnt_motion; - // Initial basis is the % mbs inter coded + /* Initial basis is the % mbs inter coded */ prediction_decay_rate = next_frame->pcnt_inter; - // High % motion -> somewhat higher decay rate + /* High % motion -> somewhat higher decay rate */ motion_decay = (1.0 - (motion_pct / 20.0)); if (motion_decay < prediction_decay_rate) prediction_decay_rate = motion_decay; - // Adjustment to decay rate based on speed of motion + /* Adjustment to decay rate based on speed of motion */ { double this_mv_rabs; double this_mv_cabs; @@ -1377,9 +1443,10 @@ static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_fra return prediction_decay_rate; } -// Function to test for a condition where a complex transition is followed -// by a static section. For example in slide shows where there is a fade -// between slides. This is to help with more optimal kf and gf positioning. +/* Function to test for a condition where a complex transition is followed + * by a static section. For example in slide shows where there is a fade + * between slides. This is to help with more optimal kf and gf positioning. + */ static int detect_transition_to_still( VP8_COMP *cpi, int frame_interval, @@ -1389,9 +1456,10 @@ static int detect_transition_to_still( { int trans_to_still = 0; - // Break clause to detect very still sections after motion - // For example a static image after a fade or other transition - // instead of a clean scene cut. + /* Break clause to detect very still sections after motion + * For example a static image after a fade or other transition + * instead of a clean scene cut. + */ if ( (frame_interval > MIN_GF_INTERVAL) && (loop_decay_rate >= 0.999) && (decay_accumulator < 0.9) ) @@ -1401,8 +1469,7 @@ static int detect_transition_to_still( FIRSTPASS_STATS tmp_next_frame; double decay_rate; - // Look ahead a few frames to see if static condition - // persists... + /* Look ahead a few frames to see if static condition persists... */ for ( j = 0; j < still_interval; j++ ) { if (EOF == input_stats(cpi, &tmp_next_frame)) @@ -1412,10 +1479,10 @@ static int detect_transition_to_still( if ( decay_rate < 0.999 ) break; } - // Reset file position + /* Reset file position */ reset_fpf_position(cpi, position); - // Only if it does do we signal a transition to still + /* Only if it does do we signal a transition to still */ if ( j == still_interval ) trans_to_still = 1; } @@ -1423,24 +1490,26 @@ static int detect_transition_to_still( return trans_to_still; } -// This function detects a flash through the high relative pcnt_second_ref -// score in the frame following a flash frame. The offset passed in should -// reflect this +/* This function detects a flash through the high relative pcnt_second_ref + * score in the frame following a flash frame. The offset passed in should + * reflect this + */ static int detect_flash( VP8_COMP *cpi, int offset ) { FIRSTPASS_STATS next_frame; int flash_detected = 0; - // Read the frame data. - // The return is 0 (no flash detected) if not a valid frame + /* Read the frame data. */ + /* The return is 0 (no flash detected) if not a valid frame */ if ( read_frame_stats(cpi, &next_frame, offset) != EOF ) { - // What we are looking for here is a situation where there is a - // brief break in prediction (such as a flash) but subsequent frames - // are reasonably well predicted by an earlier (pre flash) frame. - // The recovery after a flash is indicated by a high pcnt_second_ref - // comapred to pcnt_inter. + /* What we are looking for here is a situation where there is a + * brief break in prediction (such as a flash) but subsequent frames + * are reasonably well predicted by an earlier (pre flash) frame. + * The recovery after a flash is indicated by a high pcnt_second_ref + * comapred to pcnt_inter. + */ if ( (next_frame.pcnt_second_ref > next_frame.pcnt_inter) && (next_frame.pcnt_second_ref >= 0.5 ) ) { @@ -1461,7 +1530,7 @@ static int detect_flash( VP8_COMP *cpi, int offset ) return flash_detected; } -// Update the motion related elements to the GF arf boost calculation +/* Update the motion related elements to the GF arf boost calculation */ static void accumulate_frame_motion_stats( VP8_COMP *cpi, FIRSTPASS_STATS * this_frame, @@ -1470,22 +1539,22 @@ static void accumulate_frame_motion_stats( double * abs_mv_in_out_accumulator, double * mv_ratio_accumulator ) { - //double this_frame_mv_in_out; double this_frame_mvr_ratio; double this_frame_mvc_ratio; double motion_pct; - // Accumulate motion stats. + /* Accumulate motion stats. */ motion_pct = this_frame->pcnt_motion; - // Accumulate Motion In/Out of frame stats + /* Accumulate Motion In/Out of frame stats */ *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct; *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct; *abs_mv_in_out_accumulator += fabs(this_frame->mv_in_out_count * motion_pct); - // Accumulate a measure of how uniform (or conversely how random) - // the motion field is. (A ratio of absmv / mv) + /* Accumulate a measure of how uniform (or conversely how random) + * the motion field is. (A ratio of absmv / mv) + */ if (motion_pct > 0.05) { this_frame_mvr_ratio = fabs(this_frame->mvr_abs) / @@ -1507,7 +1576,7 @@ static void accumulate_frame_motion_stats( } } -// Calculate a baseline boost number for the current frame. +/* Calculate a baseline boost number for the current frame. */ static double calc_frame_boost( VP8_COMP *cpi, FIRSTPASS_STATS * this_frame, @@ -1515,7 +1584,7 @@ static double calc_frame_boost( { double frame_boost; - // Underlying boost factor is based on inter intra error ratio + /* Underlying boost factor is based on inter intra error ratio */ if (this_frame->intra_error > cpi->twopass.gf_intra_err_min) frame_boost = (IIFACTOR * this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); @@ -1523,17 +1592,18 @@ static double calc_frame_boost( frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); - // Increase boost for frames where new data coming into frame - // (eg zoom out). Slightly reduce boost if there is a net balance - // of motion out of the frame (zoom in). - // The range for this_frame_mv_in_out is -1.0 to +1.0 + /* Increase boost for frames where new data coming into frame + * (eg zoom out). Slightly reduce boost if there is a net balance + * of motion out of the frame (zoom in). + * The range for this_frame_mv_in_out is -1.0 to +1.0 + */ if (this_frame_mv_in_out > 0.0) frame_boost += frame_boost * (this_frame_mv_in_out * 2.0); - // In extreme case boost is halved + /* In extreme case boost is halved */ else frame_boost += frame_boost * (this_frame_mv_in_out / 2.0); - // Clip to maximum + /* Clip to maximum */ if (frame_boost > GF_RMAX) frame_boost = GF_RMAX; @@ -1561,26 +1631,27 @@ static int calc_arf_boost( double r; int flash_detected = 0; - // Search forward from the proposed arf/next gf position + /* Search forward from the proposed arf/next gf position */ for ( i = 0; i < f_frames; i++ ) { if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF ) break; - // Update the motion related elements to the boost calculation + /* Update the motion related elements to the boost calculation */ accumulate_frame_motion_stats( cpi, &this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator ); - // Calculate the baseline boost number for this frame + /* Calculate the baseline boost number for this frame */ r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out ); - // We want to discount the the flash frame itself and the recovery - // frame that follows as both will have poor scores. + /* We want to discount the the flash frame itself and the recovery + * frame that follows as both will have poor scores. + */ flash_detected = detect_flash(cpi, (i+offset)) || detect_flash(cpi, (i+offset+1)); - // Cumulative effect of prediction quality decay + /* Cumulative effect of prediction quality decay */ if ( !flash_detected ) { decay_accumulator = @@ -1591,7 +1662,7 @@ static int calc_arf_boost( } boost_score += (decay_accumulator * r); - // Break out conditions. + /* Break out conditions. */ if ( (!flash_detected) && ((mv_ratio_accumulator > 100.0) || (abs_mv_in_out_accumulator > 3.0) || @@ -1603,7 +1674,7 @@ static int calc_arf_boost( *f_boost = (int)(boost_score * 100.0) >> 4; - // Reset for backward looking loop + /* Reset for backward looking loop */ boost_score = 0.0; mv_ratio_accumulator = 0.0; decay_accumulator = 1.0; @@ -1611,26 +1682,27 @@ static int calc_arf_boost( mv_in_out_accumulator = 0.0; abs_mv_in_out_accumulator = 0.0; - // Search forward from the proposed arf/next gf position + /* Search forward from the proposed arf/next gf position */ for ( i = -1; i >= -b_frames; i-- ) { if ( read_frame_stats(cpi, &this_frame, (i+offset)) == EOF ) break; - // Update the motion related elements to the boost calculation + /* Update the motion related elements to the boost calculation */ accumulate_frame_motion_stats( cpi, &this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator ); - // Calculate the baseline boost number for this frame + /* Calculate the baseline boost number for this frame */ r = calc_frame_boost( cpi, &this_frame, this_frame_mv_in_out ); - // We want to discount the the flash frame itself and the recovery - // frame that follows as both will have poor scores. + /* We want to discount the the flash frame itself and the recovery + * frame that follows as both will have poor scores. + */ flash_detected = detect_flash(cpi, (i+offset)) || detect_flash(cpi, (i+offset+1)); - // Cumulative effect of prediction quality decay + /* Cumulative effect of prediction quality decay */ if ( !flash_detected ) { decay_accumulator = @@ -1642,7 +1714,7 @@ static int calc_arf_boost( boost_score += (decay_accumulator * r); - // Break out conditions. + /* Break out conditions. */ if ( (!flash_detected) && ((mv_ratio_accumulator > 100.0) || (abs_mv_in_out_accumulator > 3.0) || @@ -1657,7 +1729,7 @@ static int calc_arf_boost( } #endif -// Analyse and define a gf/arf group . +/* Analyse and define a gf/arf group . */ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { FIRSTPASS_STATS next_frame; @@ -1673,14 +1745,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) double mv_ratio_accumulator = 0.0; double decay_accumulator = 1.0; - double loop_decay_rate = 1.00; // Starting decay rate + double loop_decay_rate = 1.00; /* Starting decay rate */ double this_frame_mv_in_out = 0.0; double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double mod_err_per_mb_accumulator = 0.0; - int max_bits = frame_max_bits(cpi); // Max for a single frame + int max_bits = frame_max_bits(cpi); /* Max for a single frame */ unsigned int allow_alt_ref = cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames; @@ -1693,37 +1765,40 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.gf_group_bits = 0; cpi->twopass.gf_decay_rate = 0; - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); start_pos = cpi->twopass.stats_in; - vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean + vpx_memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */ - // Load stats for the current frame. + /* Load stats for the current frame. */ mod_frame_err = calculate_modified_err(cpi, this_frame); - // Note the error of the frame at the start of the group (this will be - // the GF frame error if we code a normal gf + /* Note the error of the frame at the start of the group (this will be + * the GF frame error if we code a normal gf + */ gf_first_frame_err = mod_frame_err; - // Special treatment if the current frame is a key frame (which is also - // a gf). If it is then its error score (and hence bit allocation) need - // to be subtracted out from the calculation for the GF group + /* Special treatment if the current frame is a key frame (which is also + * a gf). If it is then its error score (and hence bit allocation) need + * to be subtracted out from the calculation for the GF group + */ if (cpi->common.frame_type == KEY_FRAME) gf_group_err -= gf_first_frame_err; - // Scan forward to try and work out how many frames the next gf group - // should contain and what level of boost is appropriate for the GF - // or ARF that will be coded with the group + /* Scan forward to try and work out how many frames the next gf group + * should contain and what level of boost is appropriate for the GF + * or ARF that will be coded with the group + */ i = 0; while (((i < cpi->twopass.static_scene_max_gf_interval) || ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->twopass.frames_to_key)) { - i++; // Increment the loop counter + i++; - // Accumulate error score of frames in this gf group + /* Accumulate error score of frames in this gf group */ mod_frame_err = calculate_modified_err(cpi, this_frame); gf_group_err += mod_frame_err; @@ -1734,19 +1809,20 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) if (EOF == input_stats(cpi, &next_frame)) break; - // Test for the case where there is a brief flash but the prediction - // quality back to an earlier frame is then restored. + /* Test for the case where there is a brief flash but the prediction + * quality back to an earlier frame is then restored. + */ flash_detected = detect_flash(cpi, 0); - // Update the motion related elements to the boost calculation + /* Update the motion related elements to the boost calculation */ accumulate_frame_motion_stats( cpi, &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator ); - // Calculate a baseline boost number for this frame + /* Calculate a baseline boost number for this frame */ r = calc_frame_boost( cpi, &next_frame, this_frame_mv_in_out ); - // Cumulative effect of prediction quality decay + /* Cumulative effect of prediction quality decay */ if ( !flash_detected ) { loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); @@ -1756,8 +1832,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } boost_score += (decay_accumulator * r); - // Break clause to detect very still sections after motion - // For example a staic image after a fade or other transition. + /* Break clause to detect very still sections after motion + * For example a staic image after a fade or other transition. + */ if ( detect_transition_to_still( cpi, i, 5, loop_decay_rate, decay_accumulator ) ) @@ -1767,14 +1844,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) break; } - // Break out conditions. + /* Break out conditions. */ if ( - // Break at cpi->max_gf_interval unless almost totally static + /* Break at cpi->max_gf_interval unless almost totally static */ (i >= cpi->max_gf_interval && (decay_accumulator < 0.995)) || ( - // Dont break out with a very short interval + /* Dont break out with a very short interval */ (i > MIN_GF_INTERVAL) && - // Dont break out very close to a key frame + /* Dont break out very close to a key frame */ ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) && ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) && (!flash_detected) && @@ -1796,15 +1873,15 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.gf_decay_rate = (i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0; - // When using CBR apply additional buffer related upper limits + /* When using CBR apply additional buffer related upper limits */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { double max_boost; - // For cbr apply buffer related limits + /* For cbr apply buffer related limits */ if (cpi->drop_frames_allowed) { - int df_buffer_level = cpi->oxcf.drop_frames_water_mark * + int64_t df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100); if (cpi->buffer_level > df_buffer_level) @@ -1825,7 +1902,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) boost_score = max_boost; } - // Dont allow conventional gf too near the next kf + /* Dont allow conventional gf too near the next kf */ if ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL) { while (i < cpi->twopass.frames_to_key) @@ -1846,14 +1923,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->gfu_boost = (int)(boost_score * 100.0) >> 4; #if NEW_BOOST - // Alterrnative boost calculation for alt ref + /* Alterrnative boost calculation for alt ref */ alt_boost = calc_arf_boost( cpi, 0, (i-1), (i-1), &f_boost, &b_boost ); #endif - // Should we use the alternate refernce frame + /* Should we use the alternate refernce frame */ if (allow_alt_ref && (i >= MIN_GF_INTERVAL) && - // dont use ARF very near next kf + /* dont use ARF very near next kf */ (i <= (cpi->twopass.frames_to_key - MIN_GF_INTERVAL)) && #if NEW_BOOST ((next_frame.pcnt_inter > 0.75) || @@ -1883,7 +1960,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->gfu_boost = alt_boost; #endif - // Estimate the bits to be allocated to the group as a whole + /* Estimate the bits to be allocated to the group as a whole */ if ((cpi->twopass.kf_group_bits > 0) && (cpi->twopass.kf_group_error_left > 0)) { @@ -1893,7 +1970,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) else group_bits = 0; - // Boost for arf frame + /* Boost for arf frame */ #if NEW_BOOST Boost = (alt_boost * GFQ_ADJUSTMENT) / 100; #else @@ -1901,7 +1978,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) #endif Boost += (i * 50); - // Set max and minimum boost and hence minimum allocation + /* Set max and minimum boost and hence minimum allocation */ if (Boost > ((cpi->baseline_gf_interval + 1) * 200)) Boost = ((cpi->baseline_gf_interval + 1) * 200); else if (Boost < 125) @@ -1909,24 +1986,27 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) allocation_chunks = (i * 100) + Boost; - // Normalize Altboost and allocations chunck down to prevent overflow + /* Normalize Altboost and allocations chunck down to prevent overflow */ while (Boost > 1000) { Boost /= 2; allocation_chunks /= 2; } - // Calculate the number of bits to be spent on the arf based on the - // boost number + /* Calculate the number of bits to be spent on the arf based on the + * boost number + */ arf_frame_bits = (int)((double)Boost * (group_bits / (double)allocation_chunks)); - // Estimate if there are enough bits available to make worthwhile use - // of an arf. + /* Estimate if there are enough bits available to make worthwhile use + * of an arf. + */ tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits); - // Only use an arf if it is likely we will be able to code - // it at a lower Q than the surrounding frames. + /* Only use an arf if it is likely we will be able to code + * it at a lower Q than the surrounding frames. + */ if (tmp_q < cpi->worst_quality) { int half_gf_int; @@ -1936,42 +2016,46 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->source_alt_ref_pending = 1; - // For alt ref frames the error score for the end frame of the - // group (the alt ref frame) should not contribute to the group - // total and hence the number of bit allocated to the group. - // Rather it forms part of the next group (it is the GF at the - // start of the next group) - // gf_group_err -= mod_frame_err; - - // For alt ref frames alt ref frame is technically part of the - // GF frame for the next group but we always base the error - // calculation and bit allocation on the current group of frames. - - // Set the interval till the next gf or arf. - // For ARFs this is the number of frames to be coded before the - // future frame that is coded as an ARF. - // The future frame itself is part of the next group + /* + * For alt ref frames the error score for the end frame of the + * group (the alt ref frame) should not contribute to the group + * total and hence the number of bit allocated to the group. + * Rather it forms part of the next group (it is the GF at the + * start of the next group) + * gf_group_err -= mod_frame_err; + * + * For alt ref frames alt ref frame is technically part of the + * GF frame for the next group but we always base the error + * calculation and bit allocation on the current group of frames. + * + * Set the interval till the next gf or arf. + * For ARFs this is the number of frames to be coded before the + * future frame that is coded as an ARF. + * The future frame itself is part of the next group + */ cpi->baseline_gf_interval = i; - // Define the arnr filter width for this group of frames: - // We only filter frames that lie within a distance of half - // the GF interval from the ARF frame. We also have to trap - // cases where the filter extends beyond the end of clip. - // Note: this_frame->frame has been updated in the loop - // so it now points at the ARF frame. + /* + * Define the arnr filter width for this group of frames: + * We only filter frames that lie within a distance of half + * the GF interval from the ARF frame. We also have to trap + * cases where the filter extends beyond the end of clip. + * Note: this_frame->frame has been updated in the loop + * so it now points at the ARF frame. + */ half_gf_int = cpi->baseline_gf_interval >> 1; - frames_after_arf = cpi->twopass.total_stats.count - - this_frame->frame - 1; + frames_after_arf = (int)(cpi->twopass.total_stats.count - + this_frame->frame - 1); switch (cpi->oxcf.arnr_type) { - case 1: // Backward filter + case 1: /* Backward filter */ frames_fwd = 0; if (frames_bwd > half_gf_int) frames_bwd = half_gf_int; break; - case 2: // Forward filter + case 2: /* Forward filter */ if (frames_fwd > half_gf_int) frames_fwd = half_gf_int; if (frames_fwd > frames_after_arf) @@ -1979,7 +2063,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) frames_bwd = 0; break; - case 3: // Centered filter + case 3: /* Centered filter */ default: frames_fwd >>= 1; if (frames_fwd > frames_after_arf) @@ -1989,8 +2073,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) frames_bwd = frames_fwd; - // For even length filter there is one more frame backward - // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. + /* For even length filter there is one more frame backward + * than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. + */ if (frames_bwd < half_gf_int) frames_bwd += (cpi->oxcf.arnr_max_frames+1) & 0x1; break; @@ -2010,12 +2095,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->baseline_gf_interval = i; } - // Now decide how many bits should be allocated to the GF group as a - // proportion of those remaining in the kf group. - // The final key frame group in the clip is treated as a special case - // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. - // This is also important for short clips where there may only be one - // key frame. + /* + * Now decide how many bits should be allocated to the GF group as a + * proportion of those remaining in the kf group. + * The final key frame group in the clip is treated as a special case + * where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. + * This is also important for short clips where there may only be one + * key frame. + */ if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame)) { @@ -2023,7 +2110,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0; } - // Calculate the bits to be allocated to the group as a whole + /* Calculate the bits to be allocated to the group as a whole */ if ((cpi->twopass.kf_group_bits > 0) && (cpi->twopass.kf_group_error_left > 0)) { @@ -2034,31 +2121,32 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) else cpi->twopass.gf_group_bits = 0; - cpi->twopass.gf_group_bits = + cpi->twopass.gf_group_bits = (int)( (cpi->twopass.gf_group_bits < 0) ? 0 : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) - ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits; + ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits); - // Clip cpi->twopass.gf_group_bits based on user supplied data rate - // variability limit (cpi->oxcf.two_pass_vbrmax_section) + /* Clip cpi->twopass.gf_group_bits based on user supplied data rate + * variability limit (cpi->oxcf.two_pass_vbrmax_section) + */ if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval) cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval; - // Reset the file position + /* Reset the file position */ reset_fpf_position(cpi, start_pos); - // Update the record of error used so far (only done once per gf group) + /* Update the record of error used so far (only done once per gf group) */ cpi->twopass.modified_error_used += gf_group_err; - // Assign bits to the arf or gf. + /* Assign bits to the arf or gf. */ for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) { int Boost; int allocation_chunks; int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; int gf_bits; - // For ARF frames + /* For ARF frames */ if (cpi->source_alt_ref_pending && i == 0) { #if NEW_BOOST @@ -2068,7 +2156,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) #endif Boost += (cpi->baseline_gf_interval * 50); - // Set max and minimum boost and hence minimum allocation + /* Set max and minimum boost and hence minimum allocation */ if (Boost > ((cpi->baseline_gf_interval + 1) * 200)) Boost = ((cpi->baseline_gf_interval + 1) * 200); else if (Boost < 125) @@ -2077,13 +2165,13 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) allocation_chunks = ((cpi->baseline_gf_interval + 1) * 100) + Boost; } - // Else for standard golden frames + /* Else for standard golden frames */ else { - // boost based on inter / intra ratio of subsequent frames + /* boost based on inter / intra ratio of subsequent frames */ Boost = (cpi->gfu_boost * GFQ_ADJUSTMENT) / 100; - // Set max and minimum boost and hence minimum allocation + /* Set max and minimum boost and hence minimum allocation */ if (Boost > (cpi->baseline_gf_interval * 150)) Boost = (cpi->baseline_gf_interval * 150); else if (Boost < 125) @@ -2093,22 +2181,24 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) (cpi->baseline_gf_interval * 100) + (Boost - 100); } - // Normalize Altboost and allocations chunck down to prevent overflow + /* Normalize Altboost and allocations chunck down to prevent overflow */ while (Boost > 1000) { Boost /= 2; allocation_chunks /= 2; } - // Calculate the number of bits to be spent on the gf or arf based on - // the boost number + /* Calculate the number of bits to be spent on the gf or arf based on + * the boost number + */ gf_bits = (int)((double)Boost * (cpi->twopass.gf_group_bits / (double)allocation_chunks)); - // If the frame that is to be boosted is simpler than the average for - // the gf/arf group then use an alternative calculation - // based on the error score of the frame itself + /* If the frame that is to be boosted is simpler than the average for + * the gf/arf group then use an alternative calculation + * based on the error score of the frame itself + */ if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval) { double alt_gf_grp_bits; @@ -2127,9 +2217,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) gf_bits = alt_gf_bits; } } - // Else if it is harder than other frames in the group make sure it at - // least receives an allocation in keeping with its relative error - // score, otherwise it may be worse off than an "un-boosted" frame + /* Else if it is harder than other frames in the group make sure it at + * least receives an allocation in keeping with its relative error + * score, otherwise it may be worse off than an "un-boosted" frame + */ else { int alt_gf_bits = @@ -2143,18 +2234,19 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } } - // Apply an additional limit for CBR + /* Apply an additional limit for CBR */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1)) - cpi->twopass.gf_bits = cpi->buffer_level >> 1; + if (cpi->twopass.gf_bits > (int)(cpi->buffer_level >> 1)) + cpi->twopass.gf_bits = (int)(cpi->buffer_level >> 1); } - // Dont allow a negative value for gf_bits + /* Dont allow a negative value for gf_bits */ if (gf_bits < 0) gf_bits = 0; - gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame + /* Add in minimum for a frame */ + gf_bits += cpi->min_frame_bandwidth; if (i == 0) { @@ -2162,33 +2254,39 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))) { - cpi->per_frame_bandwidth = gf_bits; // Per frame bit target for this frame + /* Per frame bit target for this frame */ + cpi->per_frame_bandwidth = gf_bits; } } { - // Adjust KF group bits and error remainin - cpi->twopass.kf_group_error_left -= gf_group_err; + /* Adjust KF group bits and error remainin */ + cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err; cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits; if (cpi->twopass.kf_group_bits < 0) cpi->twopass.kf_group_bits = 0; - // Note the error score left in the remaining frames of the group. - // For normal GFs we want to remove the error score for the first frame of the group (except in Key frame case where this has already happened) + /* Note the error score left in the remaining frames of the group. + * For normal GFs we want to remove the error score for the first + * frame of the group (except in Key frame case where this has + * already happened) + */ if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME) - cpi->twopass.gf_group_error_left = gf_group_err - gf_first_frame_err; + cpi->twopass.gf_group_error_left = (int)(gf_group_err - + gf_first_frame_err); else - cpi->twopass.gf_group_error_left = gf_group_err; + cpi->twopass.gf_group_error_left = (int) gf_group_err; cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth; if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; - // This condition could fail if there are two kfs very close together - // despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the - // calculation of cpi->twopass.alt_extra_bits. + /* This condition could fail if there are two kfs very close together + * despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the + * calculation of cpi->twopass.alt_extra_bits. + */ if ( cpi->baseline_gf_interval >= 3 ) { #if NEW_BOOST @@ -2217,7 +2315,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.alt_extra_bits = 0; } - // Adjustments based on a measure of complexity of the section + /* Adjustments based on a measure of complexity of the section */ if (cpi->common.frame_type != KEY_FRAME) { FIRSTPASS_STATS sectionstats; @@ -2234,47 +2332,45 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) avg_stats(§ionstats); - cpi->twopass.section_intra_rating = - sectionstats.intra_error / - DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); + cpi->twopass.section_intra_rating = (unsigned int) + (sectionstats.intra_error / + DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); - //if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) ) - //{ cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025); if (cpi->twopass.section_max_qfactor < 0.80) cpi->twopass.section_max_qfactor = 0.80; - //} - //else - // cpi->twopass.section_max_qfactor = 1.0; - reset_fpf_position(cpi, start_pos); } } -// Allocate bits to a normal frame that is neither a gf an arf or a key frame. +/* Allocate bits to a normal frame that is neither a gf an arf or a key frame. */ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { - int target_frame_size; // gf_group_error_left + int target_frame_size; double modified_err; - double err_fraction; // What portion of the remaining GF group error is used by this frame + double err_fraction; - int max_bits = frame_max_bits(cpi); // Max for a single frame + int max_bits = frame_max_bits(cpi); /* Max for a single frame */ - // Calculate modified prediction error used in bit allocation + /* Calculate modified prediction error used in bit allocation */ modified_err = calculate_modified_err(cpi, this_frame); + /* What portion of the remaining GF group error is used by this frame */ if (cpi->twopass.gf_group_error_left > 0) - err_fraction = modified_err / cpi->twopass.gf_group_error_left; // What portion of the remaining GF group error is used by this frame + err_fraction = modified_err / cpi->twopass.gf_group_error_left; else err_fraction = 0.0; - target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); // How many of those bits available for allocation should we give it? + /* How many of those bits available for allocation should we give it? */ + target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); - // Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at the top end. + /* Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) + * at the top end. + */ if (target_frame_size < 0) target_frame_size = 0; else @@ -2286,22 +2382,25 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) target_frame_size = cpi->twopass.gf_group_bits; } - cpi->twopass.gf_group_error_left -= modified_err; // Adjust error remaining - cpi->twopass.gf_group_bits -= target_frame_size; // Adjust bits remaining + /* Adjust error and bits remaining */ + cpi->twopass.gf_group_error_left -= (int)modified_err; + cpi->twopass.gf_group_bits -= target_frame_size; if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; - target_frame_size += cpi->min_frame_bandwidth; // Add in the minimum number of bits that is set aside for every frame. + /* Add in the minimum number of bits that is set aside for every frame. */ + target_frame_size += cpi->min_frame_bandwidth; - // Every other frame gets a few extra bits + /* Every other frame gets a few extra bits */ if ( (cpi->common.frames_since_golden & 0x01) && (cpi->frames_till_gf_update_due > 0) ) { target_frame_size += cpi->twopass.alt_extra_bits; } - cpi->per_frame_bandwidth = target_frame_size; // Per frame bit target for this frame + /* Per frame bit target for this frame */ + cpi->per_frame_bandwidth = target_frame_size; } void vp8_second_pass(VP8_COMP *cpi) @@ -2330,20 +2429,25 @@ void vp8_second_pass(VP8_COMP *cpi) this_frame_intra_error = this_frame.intra_error; this_frame_coded_error = this_frame.coded_error; - // keyframe and section processing ! + /* keyframe and section processing ! */ if (cpi->twopass.frames_to_key == 0) { - // Define next KF group and assign bits to it + /* Define next KF group and assign bits to it */ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); find_next_key_frame(cpi, &this_frame_copy); - // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop - // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups. - // This is temporary code till we decide what should really happen in this case. + /* Special case: Error error_resilient_mode mode does not make much + * sense for two pass but with its current meaning but this code is + * designed to stop outlandish behaviour if someone does set it when + * using two pass. It effectively disables GF groups. This is + * temporary code till we decide what should really happen in this + * case. + */ if (cpi->oxcf.error_resilient_mode) { - cpi->twopass.gf_group_bits = cpi->twopass.kf_group_bits; - cpi->twopass.gf_group_error_left = cpi->twopass.kf_group_error_left; + cpi->twopass.gf_group_bits = (int)cpi->twopass.kf_group_bits; + cpi->twopass.gf_group_error_left = + (int)cpi->twopass.kf_group_error_left; cpi->baseline_gf_interval = cpi->twopass.frames_to_key; cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; cpi->source_alt_ref_pending = 0; @@ -2351,19 +2455,25 @@ void vp8_second_pass(VP8_COMP *cpi) } - // Is this a GF / ARF (Note that a KF is always also a GF) + /* Is this a GF / ARF (Note that a KF is always also a GF) */ if (cpi->frames_till_gf_update_due == 0) { - // Define next gf group and assign bits to it + /* Define next gf group and assign bits to it */ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); define_gf_group(cpi, &this_frame_copy); - // If we are going to code an altref frame at the end of the group and the current frame is not a key frame.... - // If the previous group used an arf this frame has already benefited from that arf boost and it should not be given extra bits - // If the previous group was NOT coded using arf we may want to apply some boost to this GF as well + /* If we are going to code an altref frame at the end of the group + * and the current frame is not a key frame.... If the previous + * group used an arf this frame has already benefited from that arf + * boost and it should not be given extra bits If the previous + * group was NOT coded using arf we may want to apply some boost to + * this GF as well + */ if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) { - // Assign a standard frames worth of bits from those allocated to the GF group + /* Assign a standard frames worth of bits from those allocated + * to the GF group + */ int bak = cpi->per_frame_bandwidth; vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); assign_std_frame_bits(cpi, &this_frame_copy); @@ -2371,59 +2481,64 @@ void vp8_second_pass(VP8_COMP *cpi) } } - // Otherwise this is an ordinary frame + /* Otherwise this is an ordinary frame */ else { - // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop - // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups. - // This is temporary code till we decide what should really happen in this case. + /* Special case: Error error_resilient_mode mode does not make much + * sense for two pass but with its current meaning but this code is + * designed to stop outlandish behaviour if someone does set it + * when using two pass. It effectively disables GF groups. This is + * temporary code till we decide what should really happen in this + * case. + */ if (cpi->oxcf.error_resilient_mode) { cpi->frames_till_gf_update_due = cpi->twopass.frames_to_key; if (cpi->common.frame_type != KEY_FRAME) { - // Assign bits from those allocated to the GF group + /* Assign bits from those allocated to the GF group */ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); assign_std_frame_bits(cpi, &this_frame_copy); } } else { - // Assign bits from those allocated to the GF group + /* Assign bits from those allocated to the GF group */ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); assign_std_frame_bits(cpi, &this_frame_copy); } } - // Keep a globally available copy of this and the next frame's iiratio. - cpi->twopass.this_iiratio = this_frame_intra_error / - DOUBLE_DIVIDE_CHECK(this_frame_coded_error); + /* Keep a globally available copy of this and the next frame's iiratio. */ + cpi->twopass.this_iiratio = (unsigned int)(this_frame_intra_error / + DOUBLE_DIVIDE_CHECK(this_frame_coded_error)); { FIRSTPASS_STATS next_frame; if ( lookup_next_frame_stats(cpi, &next_frame) != EOF ) { - cpi->twopass.next_iiratio = next_frame.intra_error / - DOUBLE_DIVIDE_CHECK(next_frame.coded_error); + cpi->twopass.next_iiratio = (unsigned int)(next_frame.intra_error / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); } } - // Set nominal per second bandwidth for this frame - cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate; + /* Set nominal per second bandwidth for this frame */ + cpi->target_bandwidth = (int) + (cpi->per_frame_bandwidth * cpi->output_frame_rate); if (cpi->target_bandwidth < 0) cpi->target_bandwidth = 0; - // Account for mv, mode and other overheads. - overhead_bits = estimate_modemvcost( + /* Account for mv, mode and other overheads. */ + overhead_bits = (int)estimate_modemvcost( cpi, &cpi->twopass.total_left_stats ); - // Special case code for first frame. + /* Special case code for first frame. */ if (cpi->common.current_video_frame == 0) { cpi->twopass.est_max_qcorrection_factor = 1.0; - // Set a cq_level in constrained quality mode. + /* Set a cq_level in constrained quality mode. */ if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY ) { int est_cq; @@ -2439,7 +2554,7 @@ void vp8_second_pass(VP8_COMP *cpi) cpi->cq_target_quality = est_cq; } - // guess at maxq needed in 2nd pass + /* guess at maxq needed in 2nd pass */ cpi->twopass.maxq_max_limit = cpi->worst_quality; cpi->twopass.maxq_min_limit = cpi->best_quality; @@ -2449,11 +2564,12 @@ void vp8_second_pass(VP8_COMP *cpi) (int)(cpi->twopass.bits_left / frames_left), overhead_bits ); - // Limit the maxq value returned subsequently. - // This increases the risk of overspend or underspend if the initial - // estimate for the clip is bad, but helps prevent excessive - // variation in Q, especially near the end of a clip - // where for example a small overspend may cause Q to crash + /* Limit the maxq value returned subsequently. + * This increases the risk of overspend or underspend if the initial + * estimate for the clip is bad, but helps prevent excessive + * variation in Q, especially near the end of a clip + * where for example a small overspend may cause Q to crash + */ cpi->twopass.maxq_max_limit = ((tmp_q + 32) < cpi->worst_quality) ? (tmp_q + 32) : cpi->worst_quality; cpi->twopass.maxq_min_limit = ((tmp_q - 32) > cpi->best_quality) @@ -2463,10 +2579,11 @@ void vp8_second_pass(VP8_COMP *cpi) cpi->ni_av_qi = tmp_q; } - // The last few frames of a clip almost always have to few or too many - // bits and for the sake of over exact rate control we dont want to make - // radical adjustments to the allowed quantizer range just to use up a - // few surplus bits or get beneath the target rate. + /* The last few frames of a clip almost always have to few or too many + * bits and for the sake of over exact rate control we dont want to make + * radical adjustments to the allowed quantizer range just to use up a + * few surplus bits or get beneath the target rate. + */ else if ( (cpi->common.current_video_frame < (((unsigned int)cpi->twopass.total_stats.count * 255)>>8)) && ((cpi->common.current_video_frame + cpi->baseline_gf_interval) < @@ -2481,7 +2598,7 @@ void vp8_second_pass(VP8_COMP *cpi) (int)(cpi->twopass.bits_left / frames_left), overhead_bits ); - // Move active_worst_quality but in a damped way + /* Move active_worst_quality but in a damped way */ if (tmp_q > cpi->active_worst_quality) cpi->active_worst_quality ++; else if (tmp_q < cpi->active_worst_quality) @@ -2493,7 +2610,7 @@ void vp8_second_pass(VP8_COMP *cpi) cpi->twopass.frames_to_key --; - // Update the total stats remaining sturcture + /* Update the total stats remaining sturcture */ subtract_stats(&cpi->twopass.total_left_stats, &this_frame ); } @@ -2502,8 +2619,9 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP { int is_viable_kf = 0; - // Does the frame satisfy the primary criteria of a key frame - // If so, then examine how well it predicts subsequent frames + /* Does the frame satisfy the primary criteria of a key frame + * If so, then examine how well it predicts subsequent frames + */ if ((this_frame->pcnt_second_ref < 0.10) && (next_frame->pcnt_second_ref < 0.10) && ((this_frame->pcnt_inter < 0.05) || @@ -2530,10 +2648,10 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame)); - // Note the starting file position so we can reset to it + /* Note the starting file position so we can reset to it */ start_pos = cpi->twopass.stats_in; - // Examine how well the key frame predicts subsequent frames + /* Examine how well the key frame predicts subsequent frames */ for (i = 0 ; i < 16; i++) { next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)) ; @@ -2541,18 +2659,16 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP if (next_iiratio > RMAX) next_iiratio = RMAX; - // Cumulative effect of decay in prediction quality + /* Cumulative effect of decay in prediction quality */ if (local_next_frame.pcnt_inter > 0.85) decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter; else decay_accumulator = decay_accumulator * ((0.85 + local_next_frame.pcnt_inter) / 2.0); - //decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter; - - // Keep a running total + /* Keep a running total */ boost_score += (decay_accumulator * next_iiratio); - // Test various breakout clauses + /* Test various breakout clauses */ if ((local_next_frame.pcnt_inter < 0.05) || (next_iiratio < 1.5) || (((local_next_frame.pcnt_inter - @@ -2567,17 +2683,19 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP old_boost_score = boost_score; - // Get the next frame details + /* Get the next frame details */ if (EOF == input_stats(cpi, &local_next_frame)) break; } - // If there is tolerable prediction for at least the next 3 frames then break out else discard this pottential key frame and move on + /* If there is tolerable prediction for at least the next 3 frames + * then break out else discard this pottential key frame and move on + */ if (boost_score > 5.0 && (i > 3)) is_viable_kf = 1; else { - // Reset the file position + /* Reset the file position */ reset_fpf_position(cpi, start_pos); is_viable_kf = 0; @@ -2605,65 +2723,71 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) double kf_group_coded_err = 0.0; double recent_loop_decay[8] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0}; - vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean + vpx_memset(&next_frame, 0, sizeof(next_frame)); - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); start_position = cpi->twopass.stats_in; cpi->common.frame_type = KEY_FRAME; - // is this a forced key frame by interval + /* is this a forced key frame by interval */ cpi->this_key_frame_forced = cpi->next_key_frame_forced; - // Clear the alt ref active flag as this can never be active on a key frame + /* Clear the alt ref active flag as this can never be active on a key + * frame + */ cpi->source_alt_ref_active = 0; - // Kf is always a gf so clear frames till next gf counter + /* Kf is always a gf so clear frames till next gf counter */ cpi->frames_till_gf_update_due = 0; cpi->twopass.frames_to_key = 1; - // Take a copy of the initial frame details + /* Take a copy of the initial frame details */ vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame)); - cpi->twopass.kf_group_bits = 0; // Total bits avaialable to kf group - cpi->twopass.kf_group_error_left = 0; // Group modified error score. + cpi->twopass.kf_group_bits = 0; + cpi->twopass.kf_group_error_left = 0; kf_mod_err = calculate_modified_err(cpi, this_frame); - // find the next keyframe + /* find the next keyframe */ i = 0; while (cpi->twopass.stats_in < cpi->twopass.stats_in_end) { - // Accumulate kf group error + /* Accumulate kf group error */ kf_group_err += calculate_modified_err(cpi, this_frame); - // These figures keep intra and coded error counts for all frames including key frames in the group. - // The effect of the key frame itself can be subtracted out using the first_frame data collected above + /* These figures keep intra and coded error counts for all frames + * including key frames in the group. The effect of the key frame + * itself can be subtracted out using the first_frame data + * collected above + */ kf_group_intra_err += this_frame->intra_error; kf_group_coded_err += this_frame->coded_error; - // load a the next frame's stats + /* load a the next frame's stats */ vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame)); input_stats(cpi, this_frame); - // Provided that we are not at the end of the file... + /* Provided that we are not at the end of the file... */ if (cpi->oxcf.auto_key && lookup_next_frame_stats(cpi, &next_frame) != EOF) { - // Normal scene cut check + /* Normal scene cut check */ if ( ( i >= MIN_GF_INTERVAL ) && test_candidate_kf(cpi, &last_frame, this_frame, &next_frame) ) { break; } - // How fast is prediction quality decaying + /* How fast is prediction quality decaying */ loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - // We want to know something about the recent past... rather than - // as used elsewhere where we are concened with decay in prediction - // quality since the last GF or KF. + /* We want to know something about the recent past... rather than + * as used elsewhere where we are concened with decay in prediction + * quality since the last GF or KF. + */ recent_loop_decay[i%8] = loop_decay_rate; decay_accumulator = 1.0; for (j = 0; j < 8; j++) @@ -2671,8 +2795,9 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) decay_accumulator = decay_accumulator * recent_loop_decay[j]; } - // Special check for transition or high motion followed by a - // to a static scene. + /* Special check for transition or high motion followed by a + * static scene. + */ if ( detect_transition_to_still( cpi, i, (cpi->key_frame_frequency-i), loop_decay_rate, @@ -2682,11 +2807,12 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } - // Step on to the next frame + /* Step on to the next frame */ cpi->twopass.frames_to_key ++; - // If we don't have a real key frame within the next two - // forcekeyframeevery intervals then break out of the loop. + /* If we don't have a real key frame within the next two + * forcekeyframeevery intervals then break out of the loop. + */ if (cpi->twopass.frames_to_key >= 2 *(int)cpi->key_frame_frequency) break; } else @@ -2695,10 +2821,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) i++; } - // If there is a max kf interval set by the user we must obey it. - // We already breakout of the loop above at 2x max. - // This code centers the extra kf if the actual natural - // interval is between 1x and 2x + /* If there is a max kf interval set by the user we must obey it. + * We already breakout of the loop above at 2x max. + * This code centers the extra kf if the actual natural + * interval is between 1x and 2x + */ if (cpi->oxcf.auto_key && cpi->twopass.frames_to_key > (int)cpi->key_frame_frequency ) { @@ -2707,29 +2834,29 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.frames_to_key /= 2; - // Copy first frame details + /* Copy first frame details */ vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame)); - // Reset to the start of the group + /* Reset to the start of the group */ reset_fpf_position(cpi, start_position); kf_group_err = 0; kf_group_intra_err = 0; kf_group_coded_err = 0; - // Rescan to get the correct error data for the forced kf group + /* Rescan to get the correct error data for the forced kf group */ for( i = 0; i < cpi->twopass.frames_to_key; i++ ) { - // Accumulate kf group errors + /* Accumulate kf group errors */ kf_group_err += calculate_modified_err(cpi, &tmp_frame); kf_group_intra_err += tmp_frame.intra_error; kf_group_coded_err += tmp_frame.coded_error; - // Load a the next frame's stats + /* Load a the next frame's stats */ input_stats(cpi, &tmp_frame); } - // Reset to the start of the group + /* Reset to the start of the group */ reset_fpf_position(cpi, current_pos); cpi->next_key_frame_forced = 1; @@ -2737,58 +2864,63 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) else cpi->next_key_frame_forced = 0; - // Special case for the last frame of the file + /* Special case for the last frame of the file */ if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) { - // Accumulate kf group error + /* Accumulate kf group error */ kf_group_err += calculate_modified_err(cpi, this_frame); - // These figures keep intra and coded error counts for all frames including key frames in the group. - // The effect of the key frame itself can be subtracted out using the first_frame data collected above + /* These figures keep intra and coded error counts for all frames + * including key frames in the group. The effect of the key frame + * itself can be subtracted out using the first_frame data + * collected above + */ kf_group_intra_err += this_frame->intra_error; kf_group_coded_err += this_frame->coded_error; } - // Calculate the number of bits that should be assigned to the kf group. + /* Calculate the number of bits that should be assigned to the kf group. */ if ((cpi->twopass.bits_left > 0) && (cpi->twopass.modified_error_left > 0.0)) { - // Max for a single normal frame (not key frame) + /* Max for a single normal frame (not key frame) */ int max_bits = frame_max_bits(cpi); - // Maximum bits for the kf group + /* Maximum bits for the kf group */ int64_t max_grp_bits; - // Default allocation based on bits left and relative - // complexity of the section + /* Default allocation based on bits left and relative + * complexity of the section + */ cpi->twopass.kf_group_bits = (int64_t)( cpi->twopass.bits_left * ( kf_group_err / cpi->twopass.modified_error_left )); - // Clip based on maximum per frame rate defined by the user. + /* Clip based on maximum per frame rate defined by the user. */ max_grp_bits = (int64_t)max_bits * (int64_t)cpi->twopass.frames_to_key; if (cpi->twopass.kf_group_bits > max_grp_bits) cpi->twopass.kf_group_bits = max_grp_bits; - // Additional special case for CBR if buffer is getting full. + /* Additional special case for CBR if buffer is getting full. */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - int opt_buffer_lvl = cpi->oxcf.optimal_buffer_level; - int buffer_lvl = cpi->buffer_level; + int64_t opt_buffer_lvl = cpi->oxcf.optimal_buffer_level; + int64_t buffer_lvl = cpi->buffer_level; - // If the buffer is near or above the optimal and this kf group is - // not being allocated much then increase the allocation a bit. + /* If the buffer is near or above the optimal and this kf group is + * not being allocated much then increase the allocation a bit. + */ if (buffer_lvl >= opt_buffer_lvl) { - int high_water_mark = (opt_buffer_lvl + + int64_t high_water_mark = (opt_buffer_lvl + cpi->oxcf.maximum_buffer_size) >> 1; int64_t av_group_bits; - // Av bits per frame * number of frames + /* Av bits per frame * number of frames */ av_group_bits = (int64_t)cpi->av_per_frame_bandwidth * (int64_t)cpi->twopass.frames_to_key; - // We are at or above the maximum. + /* We are at or above the maximum. */ if (cpi->buffer_level >= high_water_mark) { int64_t min_group_bits; @@ -2800,7 +2932,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) if (cpi->twopass.kf_group_bits < min_group_bits) cpi->twopass.kf_group_bits = min_group_bits; } - // We are above optimal but below the maximum + /* We are above optimal but below the maximum */ else if (cpi->twopass.kf_group_bits < av_group_bits) { int64_t bits_below_av = av_group_bits - @@ -2817,13 +2949,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) else cpi->twopass.kf_group_bits = 0; - // Reset the first pass file position + /* Reset the first pass file position */ reset_fpf_position(cpi, start_position); - // determine how big to make this keyframe based on how well the subsequent frames use inter blocks + /* determine how big to make this keyframe based on how well the + * subsequent frames use inter blocks + */ decay_accumulator = 1.0; boost_score = 0.0; - loop_decay_rate = 1.00; // Starting decay rate + loop_decay_rate = 1.00; /* Starting decay rate */ for (i = 0 ; i < cpi->twopass.frames_to_key ; i++) { @@ -2842,7 +2976,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) if (r > RMAX) r = RMAX; - // How fast is prediction quality decaying + /* How fast is prediction quality decaying */ loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); decay_accumulator = decay_accumulator * loop_decay_rate; @@ -2875,31 +3009,26 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) avg_stats(§ionstats); - cpi->twopass.section_intra_rating = - sectionstats.intra_error - / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); + cpi->twopass.section_intra_rating = (unsigned int) + (sectionstats.intra_error + / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); - // if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) ) - //{ cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025); if (cpi->twopass.section_max_qfactor < 0.80) cpi->twopass.section_max_qfactor = 0.80; - - //} - //else - // cpi->twopass.section_max_qfactor = 1.0; } - // When using CBR apply additional buffer fullness related upper limits + /* When using CBR apply additional buffer fullness related upper limits */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { double max_boost; if (cpi->drop_frames_allowed) { - int df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100); + int df_buffer_level = (int)(cpi->oxcf.drop_frames_water_mark + * (cpi->oxcf.optimal_buffer_level / 100)); if (cpi->buffer_level > df_buffer_level) max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth); @@ -2919,18 +3048,18 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) boost_score = max_boost; } - // Reset the first pass file position + /* Reset the first pass file position */ reset_fpf_position(cpi, start_position); - // Work out how many bits to allocate for the key frame itself + /* Work out how many bits to allocate for the key frame itself */ if (1) { - int kf_boost = boost_score; + int kf_boost = (int)boost_score; int allocation_chunks; int Counter = cpi->twopass.frames_to_key; int alt_kf_bits; YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; - // Min boost based on kf interval + /* Min boost based on kf interval */ #if 0 while ((kf_boost < 48) && (Counter > 0)) @@ -2948,32 +3077,33 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) if (kf_boost > 48) kf_boost = 48; } - // bigger frame sizes need larger kf boosts, smaller frames smaller boosts... + /* bigger frame sizes need larger kf boosts, smaller frames smaller + * boosts... + */ if ((lst_yv12->y_width * lst_yv12->y_height) > (320 * 240)) kf_boost += 2 * (lst_yv12->y_width * lst_yv12->y_height) / (320 * 240); else if ((lst_yv12->y_width * lst_yv12->y_height) < (320 * 240)) kf_boost -= 4 * (320 * 240) / (lst_yv12->y_width * lst_yv12->y_height); - kf_boost = (int)((double)kf_boost * 100.0) >> 4; // Scale 16 to 100 - - // Adjustment to boost based on recent average q - //kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100; - - if (kf_boost < 250) // Min KF boost + /* Min KF boost */ + kf_boost = (int)((double)kf_boost * 100.0) >> 4; /* Scale 16 to 100 */ + if (kf_boost < 250) kf_boost = 250; - // We do three calculations for kf size. - // The first is based on the error score for the whole kf group. - // The second (optionaly) on the key frames own error if this is - // smaller than the average for the group. - // The final one insures that the frame receives at least the - // allocation it would have received based on its own error score vs - // the error score remaining - // Special case if the sequence appears almost totaly static - // as measured by the decay accumulator. In this case we want to - // spend almost all of the bits on the key frame. - // cpi->twopass.frames_to_key-1 because key frame itself is taken - // care of by kf_boost. + /* + * We do three calculations for kf size. + * The first is based on the error score for the whole kf group. + * The second (optionaly) on the key frames own error if this is + * smaller than the average for the group. + * The final one insures that the frame receives at least the + * allocation it would have received based on its own error score vs + * the error score remaining + * Special case if the sequence appears almost totaly static + * as measured by the decay accumulator. In this case we want to + * spend almost all of the bits on the key frame. + * cpi->twopass.frames_to_key-1 because key frame itself is taken + * care of by kf_boost. + */ if ( decay_accumulator >= 0.99 ) { allocation_chunks = @@ -2985,7 +3115,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) ((cpi->twopass.frames_to_key - 1) * 100) + kf_boost; } - // Normalize Altboost and allocations chunck down to prevent overflow + /* Normalize Altboost and allocations chunck down to prevent overflow */ while (kf_boost > 1000) { kf_boost /= 2; @@ -2994,20 +3124,21 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.kf_group_bits = (cpi->twopass.kf_group_bits < 0) ? 0 : cpi->twopass.kf_group_bits; - // Calculate the number of bits to be spent on the key frame + /* Calculate the number of bits to be spent on the key frame */ cpi->twopass.kf_bits = (int)((double)kf_boost * ((double)cpi->twopass.kf_group_bits / (double)allocation_chunks)); - // Apply an additional limit for CBR + /* Apply an additional limit for CBR */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - if (cpi->twopass.kf_bits > ((3 * cpi->buffer_level) >> 2)) - cpi->twopass.kf_bits = (3 * cpi->buffer_level) >> 2; + if (cpi->twopass.kf_bits > (int)((3 * cpi->buffer_level) >> 2)) + cpi->twopass.kf_bits = (int)((3 * cpi->buffer_level) >> 2); } - // If the key frame is actually easier than the average for the - // kf group (which does sometimes happen... eg a blank intro frame) - // Then use an alternate calculation based on the kf error score - // which should give a smaller key frame. + /* If the key frame is actually easier than the average for the + * kf group (which does sometimes happen... eg a blank intro frame) + * Then use an alternate calculation based on the kf error score + * which should give a smaller key frame. + */ if (kf_mod_err < kf_group_err / cpi->twopass.frames_to_key) { double alt_kf_grp_bits = @@ -3023,9 +3154,10 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.kf_bits = alt_kf_bits; } } - // Else if it is much harder than other frames in the group make sure - // it at least receives an allocation in keeping with its relative - // error score + /* Else if it is much harder than other frames in the group make sure + * it at least receives an allocation in keeping with its relative + * error score + */ else { alt_kf_bits = @@ -3040,17 +3172,23 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits; - cpi->twopass.kf_bits += cpi->min_frame_bandwidth; // Add in the minimum frame allowance + /* Add in the minimum frame allowance */ + cpi->twopass.kf_bits += cpi->min_frame_bandwidth; + + /* Peer frame bit target for this frame */ + cpi->per_frame_bandwidth = cpi->twopass.kf_bits; - cpi->per_frame_bandwidth = cpi->twopass.kf_bits; // Peer frame bit target for this frame - cpi->target_bandwidth = cpi->twopass.kf_bits * cpi->output_frame_rate; // Convert to a per second bitrate + /* Convert to a per second bitrate */ + cpi->target_bandwidth = (int)(cpi->twopass.kf_bits * + cpi->output_frame_rate); } - // Note the total error score of the kf group minus the key frame itself + /* Note the total error score of the kf group minus the key frame itself */ cpi->twopass.kf_group_error_left = (int)(kf_group_err - kf_mod_err); - // Adjust the count of total modified error left. - // The count of bits left is adjusted elsewhere based on real coded frame sizes + /* Adjust the count of total modified error left. The count of bits left + * is adjusted elsewhere based on real coded frame sizes + */ cpi->twopass.modified_error_left -= kf_group_err; if (cpi->oxcf.allow_spatial_resampling) @@ -3063,7 +3201,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) int new_width = cpi->oxcf.Width; int new_height = cpi->oxcf.Height; - int projected_buffer_level = cpi->buffer_level; + int projected_buffer_level = (int)cpi->buffer_level; int tmp_q; double projected_bits_perframe; @@ -3076,40 +3214,47 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) if ((cpi->common.Width != cpi->oxcf.Width) || (cpi->common.Height != cpi->oxcf.Height)) last_kf_resampled = 1; - // Set back to unscaled by defaults + /* Set back to unscaled by defaults */ cpi->common.horiz_scale = NORMAL; cpi->common.vert_scale = NORMAL; - // Calculate Average bits per frame. - //av_bits_per_frame = cpi->twopass.bits_left/(double)(cpi->twopass.total_stats.count - cpi->common.current_video_frame); + /* Calculate Average bits per frame. */ av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate); - //if ( av_bits_per_frame < 0.0 ) - // av_bits_per_frame = 0.0 - // CBR... Use the clip average as the target for deciding resample + /* CBR... Use the clip average as the target for deciding resample */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { bits_per_frame = av_bits_per_frame; } - // In VBR we want to avoid downsampling in easy section unless we are under extreme pressure - // So use the larger of target bitrate for this sectoion or average bitrate for sequence + /* In VBR we want to avoid downsampling in easy section unless we + * are under extreme pressure So use the larger of target bitrate + * for this section or average bitrate for sequence + */ else { - bits_per_frame = cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key; // This accounts for how hard the section is... + /* This accounts for how hard the section is... */ + bits_per_frame = (double) + (cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key); - if (bits_per_frame < av_bits_per_frame) // Dont turn to resampling in easy sections just because they have been assigned a small number of bits + /* Dont turn to resampling in easy sections just because they + * have been assigned a small number of bits + */ + if (bits_per_frame < av_bits_per_frame) bits_per_frame = av_bits_per_frame; } - // bits_per_frame should comply with our minimum + /* bits_per_frame should comply with our minimum */ if (bits_per_frame < (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100)) bits_per_frame = (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); - // Work out if spatial resampling is necessary - kf_q = estimate_kf_group_q(cpi, err_per_frame, bits_per_frame, group_iiratio); + /* Work out if spatial resampling is necessary */ + kf_q = estimate_kf_group_q(cpi, err_per_frame, + (int)bits_per_frame, group_iiratio); - // If we project a required Q higher than the maximum allowed Q then make a guess at the actual size of frames in this section + /* If we project a required Q higher than the maximum allowed Q then + * make a guess at the actual size of frames in this section + */ projected_bits_perframe = bits_per_frame; tmp_q = kf_q; @@ -3119,8 +3264,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) tmp_q--; } - // Guess at buffer level at the end of the section - projected_buffer_level = cpi->buffer_level - (int)((projected_bits_perframe - av_bits_per_frame) * cpi->twopass.frames_to_key); + /* Guess at buffer level at the end of the section */ + projected_buffer_level = (int) + (cpi->buffer_level - (int) + ((projected_bits_perframe - av_bits_per_frame) * + cpi->twopass.frames_to_key)); if (0) { @@ -3129,15 +3277,17 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) fclose(f); } - // The trigger for spatial resampling depends on the various parameters such as whether we are streaming (CBR) or VBR. + /* The trigger for spatial resampling depends on the various + * parameters such as whether we are streaming (CBR) or VBR. + */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - // Trigger resample if we are projected to fall below down sample level or - // resampled last time and are projected to remain below the up sample level + /* Trigger resample if we are projected to fall below down + * sample level or resampled last time and are projected to + * remain below the up sample level + */ if ((projected_buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100)) || (last_kf_resampled && (projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100)))) - //( ((cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100))) && - // ((projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100))) )) resample_trigger = 1; else resample_trigger = 0; @@ -3147,9 +3297,15 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate)); int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level; - if ((last_kf_resampled && (kf_q > cpi->worst_quality)) || // If triggered last time the threshold for triggering again is reduced - ((kf_q > cpi->worst_quality) && // Projected Q higher than allowed and ... - (over_spend > clip_bits / 20))) // ... Overspend > 5% of total bits + /* If triggered last time the threshold for triggering again is + * reduced: + * + * Projected Q higher than allowed and Overspend > 5% of total + * bits + */ + if ((last_kf_resampled && (kf_q > cpi->worst_quality)) || + ((kf_q > cpi->worst_quality) && + (over_spend > clip_bits / 20))) resample_trigger = 1; else resample_trigger = 0; @@ -3171,13 +3327,19 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs; new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs; - // Reducing the area to 1/4 does not reduce the complexity (err_per_frame) to 1/4... - // effective_sizeratio attempts to provide a crude correction for this + /* Reducing the area to 1/4 does not reduce the complexity + * (err_per_frame) to 1/4... effective_sizeratio attempts + * to provide a crude correction for this + */ effective_size_ratio = (double)(new_width * new_height) / (double)(cpi->oxcf.Width * cpi->oxcf.Height); effective_size_ratio = (1.0 + (3.0 * effective_size_ratio)) / 4.0; - // Now try again and see what Q we get with the smaller image size - kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, bits_per_frame, group_iiratio); + /* Now try again and see what Q we get with the smaller + * image size + */ + kf_q = estimate_kf_group_q(cpi, + err_per_frame * effective_size_ratio, + (int)bits_per_frame, group_iiratio); if (0) { diff --git a/vp8/encoder/lookahead.c b/vp8/encoder/lookahead.c index 4c92281..ce2ce08 100644 --- a/vp8/encoder/lookahead.c +++ b/vp8/encoder/lookahead.c @@ -118,10 +118,11 @@ vp8_lookahead_push(struct lookahead_ctx *ctx, ctx->sz++; buf = pop(ctx, &ctx->write_idx); - // Only do this partial copy if the following conditions are all met: - // 1. Lookahead queue has has size of 1. - // 2. Active map is provided. - // 3. This is not a key frame, golden nor altref frame. + /* Only do this partial copy if the following conditions are all met: + * 1. Lookahead queue has has size of 1. + * 2. Active map is provided. + * 3. This is not a key frame, golden nor altref frame. + */ if (ctx->max_sz == 1 && active_map && !flags) { for (row = 0; row < mb_rows; ++row) @@ -130,18 +131,18 @@ vp8_lookahead_push(struct lookahead_ctx *ctx, while (1) { - // Find the first active macroblock in this row. + /* Find the first active macroblock in this row. */ for (; col < mb_cols; ++col) { if (active_map[col]) break; } - // No more active macroblock in this row. + /* No more active macroblock in this row. */ if (col == mb_cols) break; - // Find the end of active region in this row. + /* Find the end of active region in this row. */ active_end = col; for (; active_end < mb_cols; ++active_end) @@ -150,13 +151,13 @@ vp8_lookahead_push(struct lookahead_ctx *ctx, break; } - // Only copy this active region. + /* Only copy this active region. */ vp8_copy_and_extend_frame_with_rect(src, &buf->img, row << 4, col << 4, 16, (active_end - col) << 4); - // Start again from the end of this active region. + /* Start again from the end of this active region. */ col = active_end; } diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index 67e4f7e..b08c7a5 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -25,26 +25,35 @@ static int mv_mode_cts [4] [2]; int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) { - // MV costing is based on the distribution of vectors in the previous frame and as such will tend to - // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the - // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks. - // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors. + /* MV costing is based on the distribution of vectors in the previous + * frame and as such will tend to over state the cost of vectors. In + * addition coding a new vector can have a knock on effect on the cost + * of subsequent vectors and the quality of prediction from NEAR and + * NEAREST for subsequent blocks. The "Weight" parameter allows, to a + * limited extent, for some account to be taken of these factors. + */ return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7; } static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit) { - return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + - mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) - * error_per_bit + 128) >> 8; + /* Ignore mv costing if mvcost is NULL */ + if (mvcost) + return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) + * error_per_bit + 128) >> 8; + return 0; } static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit) { /* Calculate sad error cost on full pixel basis. */ - return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + - mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) - * error_per_bit + 128) >> 8; + /* Ignore mv costing if mvsadcost is NULL */ + if (mvsadcost) + return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + + mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) + * error_per_bit + 128) >> 8; + return 0; } void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) @@ -53,7 +62,7 @@ void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) int search_site_count = 0; - // Generate offsets for 4 search sites per step. + /* Generate offsets for 4 search sites per step. */ Len = MAX_FIRST_STEP; x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = 0; @@ -63,31 +72,31 @@ void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) while (Len > 0) { - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride; search_site_count++; - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride; search_site_count++; - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = -Len; search_site_count++; - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = Len; search_site_count++; - // Contract. + /* Contract. */ Len /= 2; } @@ -100,7 +109,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) int Len; int search_site_count = 0; - // Generate offsets for 8 search sites per step. + /* Generate offsets for 8 search sites per step. */ Len = MAX_FIRST_STEP; x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = 0; @@ -110,56 +119,56 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) while (Len > 0) { - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride; search_site_count++; - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride; search_site_count++; - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = -Len; search_site_count++; - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = Len; search_site_count++; - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride - Len; search_site_count++; - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride + Len; search_site_count++; - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride - Len; search_site_count++; - // Compute offsets for search sites. + /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride + Len; search_site_count++; - // Contract. + /* Contract. */ Len /= 2; } @@ -176,13 +185,20 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we * could reduce the area. */ -#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) -#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector -#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc -#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. + +/* estimated cost of a motion vector (r,c) */ +#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0) +/* pointer to predictor base of a motionvector */ +#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) +/* convert motion vector component to offset for svf calc */ +#define SP(x) (((x)&3)<<1) +/* returns subpixel variance error function. */ +#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; -#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost -#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best +/* returns distortion + motion vector cost */ +#define ERR(r,c) (MVC(r,c)+DIST(r,c)) +/* checks if (r,c) has better score than previous best */ +#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;) int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, @@ -196,7 +212,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2; int tr = br, tc = bc; - unsigned int besterr = INT_MAX; + unsigned int besterr; unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; @@ -221,7 +237,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, unsigned char *y; int buf_r1, buf_r2, buf_c1, buf_c2; - // Clamping to avoid out-of-range data access + /* Clamping to avoid out-of-range data access */ buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3; buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3; buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3; @@ -238,19 +254,21 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; - // central mv + /* central mv */ bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; - // calculate central point error + /* calculate central point error */ besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) + /* TODO: Each subsequent iteration checks at least one point in common + * with the last iteration could be 2 ( if diag selected) + */ while (--halfiters) { - // 1/2 pel + /* 1/2 pel */ CHECK_BETTER(left, tr, tc - 2); CHECK_BETTER(right, tr, tc + 2); CHECK_BETTER(up, tr - 2, tc); @@ -274,7 +292,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, break; } - // no reason to check the same one again. + /* no reason to check the same one again. */ if (tr == br && tc == bc) break; @@ -282,8 +300,11 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, tc = bc; } - // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) - // 1/4 pel + /* TODO: Each subsequent iteration checks at least one point in common + * with the last iteration could be 2 ( if diag selected) + */ + + /* 1/4 pel */ while (--quarteriters) { CHECK_BETTER(left, tr, tc - 1); @@ -309,7 +330,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, break; } - // no reason to check the same one again. + /* no reason to check the same one again. */ if (tr == br && tc == bc) break; @@ -367,17 +388,17 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, y_stride = pre_stride; #endif - // central mv + /* central mv */ bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; startmv = *bestmv; - // calculate central point error + /* calculate central point error */ bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - // go left then right and check error + /* go left then right and check error */ this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); @@ -403,7 +424,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, *sse1 = sse; } - // go up then down and check error + /* go up then down and check error */ this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); @@ -430,10 +451,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, } - // now check 1 more diagonal + /* now check 1 more diagonal */ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - //for(whichdir =0;whichdir<4;whichdir++) - //{ this_mv = startmv; switch (whichdir) @@ -471,10 +490,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, *sse1 = sse; } -// } - - // time to check quarter pels. + /* time to check quarter pels. */ if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride; @@ -485,7 +502,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - // go left then right and check error + /* go left then right and check error */ this_mv.as_mv.row = startmv.as_mv.row; if (startmv.as_mv.col & 7) @@ -521,7 +538,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, *sse1 = sse; } - // go up then down and check error + /* go up then down and check error */ this_mv.as_mv.col = startmv.as_mv.col; if (startmv.as_mv.row & 7) @@ -558,11 +575,9 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, } - // now check 1 more diagonal + /* now check 1 more diagonal */ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); -// for(whichdir=0;whichdir<4;whichdir++) -// { this_mv = startmv; switch (whichdir) @@ -684,17 +699,17 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, y_stride = pre_stride; #endif - // central mv + /* central mv */ bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; startmv = *bestmv; - // calculate central point error + /* calculate central point error */ bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - // go left then right and check error + /* go left then right and check error */ this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); @@ -720,7 +735,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, *sse1 = sse; } - // go up then down and check error + /* go up then down and check error */ this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); @@ -746,7 +761,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, *sse1 = sse; } - // now check 1 more diagonal - + /* now check 1 more diagonal - */ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); this_mv = startmv; @@ -855,7 +870,7 @@ int vp8_hex_search int in_what_stride = pre_stride; int br, bc; int_mv this_mv; - unsigned int bestsad = 0x7fffffff; + unsigned int bestsad; unsigned int thissad; unsigned char *base_offset; unsigned char *this_offset; @@ -869,18 +884,17 @@ int vp8_hex_search fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - // adjust ref_mv to make sure it is within MV range + /* adjust ref_mv to make sure it is within MV range */ vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); br = ref_mv->as_mv.row; bc = ref_mv->as_mv.col; - // Work out the start point for the search + /* Work out the start point for the search */ base_offset = (unsigned char *)(base_pre + d->offset); this_offset = base_offset + (br * (pre_stride)) + bc; this_mv.as_mv.row = br; this_mv.as_mv.col = bc; - bestsad = vfp->sdf( what, what_stride, this_offset, - in_what_stride, 0x7fffffff) + bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX) + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); #if CONFIG_MULTI_RES_ENCODING @@ -895,8 +909,7 @@ int vp8_hex_search dia_range = 8; #endif - // hex search - //j=0 + /* hex search */ CHECK_BOUNDS(2) if(all_in) @@ -906,7 +919,7 @@ int vp8_hex_search this_mv.as_mv.row = br + hex[i].row; this_mv.as_mv.col = bc + hex[i].col; this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; - thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } }else @@ -917,7 +930,7 @@ int vp8_hex_search this_mv.as_mv.col = bc + hex[i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; - thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } @@ -943,7 +956,7 @@ int vp8_hex_search this_mv.as_mv.row = br + next_chkpts[k][i].row; this_mv.as_mv.col = bc + next_chkpts[k][i].col; this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } }else @@ -954,7 +967,7 @@ int vp8_hex_search this_mv.as_mv.col = bc + next_chkpts[k][i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } @@ -971,7 +984,7 @@ int vp8_hex_search } } - // check 4 1-away neighbors + /* check 4 1-away neighbors */ cal_neighbors: for (j = 0; j < dia_range; j++) { @@ -985,7 +998,7 @@ cal_neighbors: this_mv.as_mv.row = br + neighbors[i].row; this_mv.as_mv.col = bc + neighbors[i].col; this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } }else @@ -996,7 +1009,7 @@ cal_neighbors: this_mv.as_mv.col = bc + neighbors[i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad); + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } @@ -1047,7 +1060,8 @@ int vp8_diamond_search_sad_c int tot_steps; int_mv this_mv; - int bestsad = INT_MAX; + unsigned int bestsad; + unsigned int thissad; int best_site = 0; int last_site = 0; @@ -1058,10 +1072,12 @@ int vp8_diamond_search_sad_c search_site *ss; unsigned char *check_here; - int thissad; - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + int *mvsadcost[2]; int_mv fcenter_mv; + + mvsadcost[0] = x->mvsadcost[0]; + mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; @@ -1072,17 +1088,18 @@ int vp8_diamond_search_sad_c best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; - // Work out the start point for the search + /* Work out the start point for the search */ in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); best_address = in_what; - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, - in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); + /* Check the starting position */ + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - // search_param determines the length of the initial step and hence the number of iterations - // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. + /* search_param determines the length of the initial step and hence + * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel : + * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. + */ ss = &x->ss[search_param * x->searches_per_step]; tot_steps = (x->ss_count / x->searches_per_step) - search_param; @@ -1092,7 +1109,7 @@ int vp8_diamond_search_sad_c { for (j = 0 ; j < x->searches_per_step ; j++) { - // Trap illegal vectors + /* Trap illegal vectors */ this_row_offset = best_mv->as_mv.row + ss[i].mv.row; this_col_offset = best_mv->as_mv.col + ss[i].mv.col; @@ -1101,14 +1118,14 @@ int vp8_diamond_search_sad_c { check_here = ss[i].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvsadcost, sad_per_bit); + mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1135,11 +1152,8 @@ int vp8_diamond_search_sad_c this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; - if (bestsad == INT_MAX) - return INT_MAX; - - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) - + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } int vp8_diamond_search_sadx4 @@ -1170,7 +1184,8 @@ int vp8_diamond_search_sadx4 int tot_steps; int_mv this_mv; - unsigned int bestsad = UINT_MAX; + unsigned int bestsad; + unsigned int thissad; int best_site = 0; int last_site = 0; @@ -1181,10 +1196,12 @@ int vp8_diamond_search_sadx4 search_site *ss; unsigned char *check_here; - unsigned int thissad; - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + int *mvsadcost[2]; int_mv fcenter_mv; + + mvsadcost[0] = x->mvsadcost[0]; + mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; @@ -1195,17 +1212,18 @@ int vp8_diamond_search_sadx4 best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; - // Work out the start point for the search + /* Work out the start point for the search */ in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); best_address = in_what; - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, - in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); + /* Check the starting position */ + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - // search_param determines the length of the initial step and hence the number of iterations - // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. + /* search_param determines the length of the initial step and hence the + * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 = + * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. + */ ss = &x->ss[search_param * x->searches_per_step]; tot_steps = (x->ss_count / x->searches_per_step) - search_param; @@ -1215,8 +1233,10 @@ int vp8_diamond_search_sadx4 { int all_in = 1, t; - // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of - // checking 4 bounds for each points. + /* To know if all neighbor points are within the bounds, 4 bounds + * checking are enough instead of checking 4 bounds for each + * points. + */ all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min); all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max); all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min); @@ -1228,7 +1248,7 @@ int vp8_diamond_search_sadx4 for (j = 0 ; j < x->searches_per_step ; j += 4) { - unsigned char *block_offset[4]; + const unsigned char *block_offset[4]; for (t = 0; t < 4; t++) block_offset[t] = ss[i+t].offset + best_address; @@ -1257,7 +1277,7 @@ int vp8_diamond_search_sadx4 { for (j = 0 ; j < x->searches_per_step ; j++) { - // Trap illegal vectors + /* Trap illegal vectors */ this_row_offset = best_mv->as_mv.row + ss[i].mv.row; this_col_offset = best_mv->as_mv.col + ss[i].mv.col; @@ -1265,14 +1285,14 @@ int vp8_diamond_search_sadx4 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = ss[i].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvsadcost, sad_per_bit); + mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1299,11 +1319,8 @@ int vp8_diamond_search_sadx4 this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; - if (bestsad == INT_MAX) - return INT_MAX; - - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) - + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, @@ -1321,11 +1338,11 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, unsigned char *bestaddress; int_mv *best_mv = &d->bmi.mv; int_mv this_mv; - int bestsad = INT_MAX; + unsigned int bestsad; + unsigned int thissad; int r, c; unsigned char *check_here; - int thissad; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; @@ -1335,24 +1352,29 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int col_min = ref_col - distance; int col_max = ref_col + distance; - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + int *mvsadcost[2]; int_mv fcenter_mv; + + mvsadcost[0] = x->mvsadcost[0]; + mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - // Work out the mid point for the search + /* Work out the mid point for the search */ in_what = base_pre + d->offset; bestaddress = in_what + (ref_row * pre_stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; - // Baseline value at the centre + /* Baseline value at the centre */ bestsad = fn_ptr->sdf(what, what_stride, bestaddress, - in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); + in_what_stride, UINT_MAX) + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border + /* Apply further limits to prevent us looking using vectors that + * stretch beyiond the UMV border + */ if (col_min < x->mv_col_min) col_min = x->mv_col_min; @@ -1372,11 +1394,11 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, for (c = col_min; c < col_max; c++) { - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1393,11 +1415,8 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) - + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - else - return INT_MAX; + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, @@ -1415,11 +1434,11 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, unsigned char *bestaddress; int_mv *best_mv = &d->bmi.mv; int_mv this_mv; - unsigned int bestsad = UINT_MAX; + unsigned int bestsad; + unsigned int thissad; int r, c; unsigned char *check_here; - unsigned int thissad; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; @@ -1431,24 +1450,29 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, unsigned int sad_array[3]; - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + int *mvsadcost[2]; int_mv fcenter_mv; + + mvsadcost[0] = x->mvsadcost[0]; + mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - // Work out the mid point for the search + /* Work out the mid point for the search */ in_what = base_pre + d->offset; bestaddress = in_what + (ref_row * pre_stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; - // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); + /* Baseline value at the centre */ + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, + in_what_stride, UINT_MAX) + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border + /* Apply further limits to prevent us looking using vectors that stretch + * beyond the UMV border + */ if (col_min < x->mv_col_min) col_min = x->mv_col_min; @@ -1471,7 +1495,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, { int i; - fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array); + fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); for (i = 0; i < 3; i++) { @@ -1480,8 +1504,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, if (thissad < bestsad) { this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1499,13 +1523,13 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1525,11 +1549,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) - + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - else - return INT_MAX; + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, @@ -1547,11 +1568,11 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, unsigned char *bestaddress; int_mv *best_mv = &d->bmi.mv; int_mv this_mv; - unsigned int bestsad = UINT_MAX; + unsigned int bestsad; + unsigned int thissad; int r, c; unsigned char *check_here; - unsigned int thissad; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; @@ -1564,24 +1585,29 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8); unsigned int sad_array[3]; - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + int *mvsadcost[2]; int_mv fcenter_mv; + + mvsadcost[0] = x->mvsadcost[0]; + mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - // Work out the mid point for the search + /* Work out the mid point for the search */ in_what = base_pre + d->offset; bestaddress = in_what + (ref_row * pre_stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; - // Baseline value at the centre + /* Baseline value at the centre */ bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); + bestaddress, in_what_stride, UINT_MAX) + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border + /* Apply further limits to prevent us looking using vectors that stretch + * beyond the UMV border + */ if (col_min < x->mv_col_min) col_min = x->mv_col_min; @@ -1604,17 +1630,17 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, { int i; - fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8); + fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); for (i = 0; i < 8; i++) { - thissad = (unsigned int)sad_array8[i]; + thissad = sad_array8[i]; if (thissad < bestsad) { this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1687,11 +1713,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) - + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - else - return INT_MAX; + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, @@ -1711,17 +1734,21 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv unsigned char *best_address = (unsigned char *)(base_pre + d->offset + (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); unsigned char *check_here; - unsigned int thissad; int_mv this_mv; - unsigned int bestsad = INT_MAX; + unsigned int bestsad; + unsigned int thissad; - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + int *mvsadcost[2]; int_mv fcenter_mv; + mvsadcost[0] = x->mvsadcost[0]; + mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, best_address, + in_what_stride, UINT_MAX) + + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); for (i=0; ias_mv.row << 3; this_mv.as_mv.col = ref_mv->as_mv.col << 3; - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) - + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - else - return INT_MAX; + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, @@ -1790,17 +1814,21 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, unsigned char *best_address = (unsigned char *)(base_pre + d->offset + (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); unsigned char *check_here; - unsigned int thissad; int_mv this_mv; - unsigned int bestsad = INT_MAX; + unsigned int bestsad; + unsigned int thissad; - int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + int *mvsadcost[2]; int_mv fcenter_mv; + mvsadcost[0] = x->mvsadcost[0]; + mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, best_address, + in_what_stride, UINT_MAX) + + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); for (i=0; ias_mv.row << 3; this_mv.as_mv.col = ref_mv->as_mv.col << 3; - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) - + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); - else - return INT_MAX; + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } #ifdef ENTROPY_STATS @@ -1900,16 +1925,16 @@ void print_mode_context(void) for (j = 0; j < 6; j++) { - fprintf(f, " { // %d \n", j); + fprintf(f, " { /* %d */\n", j); fprintf(f, " "); for (i = 0; i < 4; i++) { int overal_prob; int this_prob; - int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1]; + int count; - // Overall probs + /* Overall probs */ count = mv_mode_cts[i][0] + mv_mode_cts[i][1]; if (count) @@ -1920,7 +1945,7 @@ void print_mode_context(void) if (overal_prob == 0) overal_prob = 1; - // context probs + /* context probs */ count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; if (count) @@ -1932,8 +1957,6 @@ void print_mode_context(void) this_prob = 1; fprintf(f, "%5d, ", this_prob); - //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob); - //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob); } fprintf(f, " },\n"); diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index cdb0cb6..890113f 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -21,9 +21,16 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); #endif -#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step -#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) // Max full pel mv specified in 1 pel units -#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units +/* The maximum number of steps in a step search given the largest allowed + * initial step + */ +#define MAX_MVSEARCH_STEPS 8 + +/* Max full pel mv specified in 1 pel units */ +#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) + +/* Maximum size of the first step in full pel units */ +#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) extern void print_mode_context(void); extern int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight); diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c index c636c48..c61563c 100644 --- a/vp8/encoder/modecosts.c +++ b/vp8/encoder/modecosts.c @@ -18,6 +18,8 @@ void vp8_init_mode_costs(VP8_COMP *c) { VP8_COMMON *x = &c->common; + struct rd_costs_struct *rd_costs = &c->rd_costs; + { const vp8_tree_p T = vp8_bmode_tree; @@ -29,19 +31,24 @@ void vp8_init_mode_costs(VP8_COMP *c) do { - vp8_cost_tokens((int *)c->mb.bmode_costs[i][j], x->kf_bmode_prob[i][j], T); + vp8_cost_tokens(rd_costs->bmode_costs[i][j], + vp8_kf_bmode_prob[i][j], T); } while (++j < VP8_BINTRAMODES); } while (++i < VP8_BINTRAMODES); - vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T); + vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.bmode_prob, T); } - vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_tree); + vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.sub_mv_ref_prob, + vp8_sub_mv_ref_tree); - vp8_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree); - vp8_cost_tokens(c->mb.mbmode_cost[0], x->kf_ymode_prob, vp8_kf_ymode_tree); + vp8_cost_tokens(rd_costs->mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree); + vp8_cost_tokens(rd_costs->mbmode_cost[0], vp8_kf_ymode_prob, + vp8_kf_ymode_tree); - vp8_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree); - vp8_cost_tokens(c->mb.intra_uv_mode_cost[0], x->kf_uv_mode_prob, vp8_uv_mode_tree); + vp8_cost_tokens(rd_costs->intra_uv_mode_cost[1], x->fc.uv_mode_prob, + vp8_uv_mode_tree); + vp8_cost_tokens(rd_costs->intra_uv_mode_cost[0], vp8_kf_uv_mode_prob, + vp8_uv_mode_tree); } diff --git a/vp8/encoder/mr_dissim.c b/vp8/encoder/mr_dissim.c index 7a62a06..71218cc 100644 --- a/vp8/encoder/mr_dissim.c +++ b/vp8/encoder/mr_dissim.c @@ -53,6 +53,7 @@ if(x->mbmi.ref_frame !=INTRA_FRAME) \ void vp8_cal_dissimilarity(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; + int i; /* Note: The first row & first column in mip are outside the frame, which * were initialized to all 0.(ref_frame, mode, mv...) @@ -65,14 +66,25 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi) /* Store info for show/no-show frames for supporting alt_ref. * If parent frame is alt_ref, child has one too. */ + LOWER_RES_FRAME_INFO* store_info + = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info; + + store_info->frame_type = cm->frame_type; + + if(cm->frame_type != KEY_FRAME) + { + store_info->is_frame_dropped = 0; + for (i = 1; i < MAX_REF_FRAMES; i++) + store_info->low_res_ref_frames[i] = cpi->current_ref_frames[i]; + } + if(cm->frame_type != KEY_FRAME) { int mb_row; int mb_col; /* Point to beginning of allocated MODE_INFO arrays. */ MODE_INFO *tmp = cm->mip + cm->mode_info_stride; - LOWER_RES_INFO* store_mode_info - = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info; + LOWER_RES_MB_INFO* store_mode_info = store_info->mb_info; for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) { @@ -199,3 +211,26 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi) } } } + +/* This function is called only when this frame is dropped at current + resolution level. */ +void vp8_store_drop_frame_info(VP8_COMP *cpi) +{ + /* If the frame is dropped in lower-resolution encoding, this information + is passed to higher resolution level so that the encoder knows there + is no mode & motion info available. + */ + if (cpi->oxcf.mr_total_resolutions >1 + && cpi->oxcf.mr_encoder_id < (cpi->oxcf.mr_total_resolutions - 1)) + { + /* Store info for show/no-show frames for supporting alt_ref. + * If parent frame is alt_ref, child has one too. + */ + LOWER_RES_FRAME_INFO* store_info + = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info; + + /* Set frame_type to be INTER_FRAME since we won't drop key frame. */ + store_info->frame_type = INTER_FRAME; + store_info->is_frame_dropped = 1; + } +} diff --git a/vp8/encoder/mr_dissim.h b/vp8/encoder/mr_dissim.h index 3d2c203..f8cb135 100644 --- a/vp8/encoder/mr_dissim.h +++ b/vp8/encoder/mr_dissim.h @@ -15,5 +15,6 @@ extern void vp8_cal_low_res_mb_cols(VP8_COMP *cpi); extern void vp8_cal_dissimilarity(VP8_COMP *cpi); +extern void vp8_store_drop_frame_info(VP8_COMP *cpi); #endif diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index cee62fa..4680f39 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -11,6 +11,7 @@ #include "vpx_config.h" #include "vp8/common/onyxc_int.h" +#include "vp8/common/blockd.h" #include "onyx_int.h" #include "vp8/common/systemdependent.h" #include "quantize.h" @@ -55,12 +56,8 @@ extern void vp8_deblock_frame(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *po extern void print_parms(VP8_CONFIG *ocf, char *filenam); extern unsigned int vp8_get_processor_freq(); extern void print_tree_update_probs(); -extern void vp8cx_create_encoder_threads(VP8_COMP *cpi); +extern int vp8cx_create_encoder_threads(VP8_COMP *cpi); extern void vp8cx_remove_encoder_threads(VP8_COMP *cpi); -#if HAVE_NEON -extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); -extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); -#endif int vp8_estimate_entropy_savings(VP8_COMP *cpi); @@ -143,7 +140,7 @@ extern const int qzbin_factors[129]; extern void vp8cx_init_quantizer(VP8_COMP *cpi); extern const int vp8cx_base_skip_false_prob[128]; -// Tables relating active max Q to active min Q +/* Tables relating active max Q to active min Q */ static const unsigned char kf_low_motion_minq[QINDEX_RANGE] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -219,9 +216,8 @@ static void save_layer_context(VP8_COMP *cpi) { LAYER_CONTEXT *lc = &cpi->layer_context[cpi->current_layer]; - // Save layer dependent coding state + /* Save layer dependent coding state */ lc->target_bandwidth = cpi->target_bandwidth; - //lc->target_bandwidth = cpi->oxcf.target_bandwidth; lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; @@ -242,7 +238,7 @@ static void save_layer_context(VP8_COMP *cpi) lc->rate_correction_factor = cpi->rate_correction_factor; lc->key_frame_rate_correction_factor = cpi->key_frame_rate_correction_factor; lc->gf_rate_correction_factor = cpi->gf_rate_correction_factor; - lc->zbin_over_quant = cpi->zbin_over_quant; + lc->zbin_over_quant = cpi->mb.zbin_over_quant; lc->inter_frame_target = cpi->inter_frame_target; lc->total_byte_count = cpi->total_byte_count; lc->filter_level = cpi->common.filter_level; @@ -250,15 +246,15 @@ static void save_layer_context(VP8_COMP *cpi) lc->last_frame_percent_intra = cpi->last_frame_percent_intra; memcpy (lc->count_mb_ref_frame_usage, - cpi->count_mb_ref_frame_usage, - sizeof(cpi->count_mb_ref_frame_usage)); + cpi->mb.count_mb_ref_frame_usage, + sizeof(cpi->mb.count_mb_ref_frame_usage)); } static void restore_layer_context(VP8_COMP *cpi, const int layer) { LAYER_CONTEXT *lc = &cpi->layer_context[layer]; - // Restore layer dependent coding state + /* Restore layer dependent coding state */ cpi->current_layer = layer; cpi->target_bandwidth = lc->target_bandwidth; cpi->oxcf.target_bandwidth = lc->target_bandwidth; @@ -271,9 +267,7 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) cpi->buffer_level = lc->buffer_level; cpi->bits_off_target = lc->bits_off_target; cpi->total_actual_bits = lc->total_actual_bits; - //cpi->worst_quality = lc->worst_quality; cpi->active_worst_quality = lc->active_worst_quality; - //cpi->best_quality = lc->best_quality; cpi->active_best_quality = lc->active_best_quality; cpi->ni_av_qi = lc->ni_av_qi; cpi->ni_tot_qi = lc->ni_tot_qi; @@ -282,26 +276,31 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) cpi->rate_correction_factor = lc->rate_correction_factor; cpi->key_frame_rate_correction_factor = lc->key_frame_rate_correction_factor; cpi->gf_rate_correction_factor = lc->gf_rate_correction_factor; - cpi->zbin_over_quant = lc->zbin_over_quant; + cpi->mb.zbin_over_quant = lc->zbin_over_quant; cpi->inter_frame_target = lc->inter_frame_target; cpi->total_byte_count = lc->total_byte_count; cpi->common.filter_level = lc->filter_level; cpi->last_frame_percent_intra = lc->last_frame_percent_intra; - memcpy (cpi->count_mb_ref_frame_usage, + memcpy (cpi->mb.count_mb_ref_frame_usage, lc->count_mb_ref_frame_usage, - sizeof(cpi->count_mb_ref_frame_usage)); + sizeof(cpi->mb.count_mb_ref_frame_usage)); } static void setup_features(VP8_COMP *cpi) { - // Set up default state for MB feature flags - cpi->mb.e_mbd.segmentation_enabled = 0; - cpi->mb.e_mbd.update_mb_segmentation_map = 0; - cpi->mb.e_mbd.update_mb_segmentation_data = 0; - vpx_memset(cpi->mb.e_mbd.mb_segment_tree_probs, 255, sizeof(cpi->mb.e_mbd.mb_segment_tree_probs)); - vpx_memset(cpi->mb.e_mbd.segment_feature_data, 0, sizeof(cpi->mb.e_mbd.segment_feature_data)); + // If segmentation enabled set the update flags + if ( cpi->mb.e_mbd.segmentation_enabled ) + { + cpi->mb.e_mbd.update_mb_segmentation_map = 1; + cpi->mb.e_mbd.update_mb_segmentation_data = 1; + } + else + { + cpi->mb.e_mbd.update_mb_segmentation_map = 0; + cpi->mb.e_mbd.update_mb_segmentation_data = 0; + } cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0; cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; @@ -323,7 +322,7 @@ static void dealloc_compressor_data(VP8_COMP *cpi) vpx_free(cpi->tplist); cpi->tplist = NULL; - // Delete last frame MV storage buffers + /* Delete last frame MV storage buffers */ vpx_free(cpi->lfmv); cpi->lfmv = 0; @@ -333,7 +332,7 @@ static void dealloc_compressor_data(VP8_COMP *cpi) vpx_free(cpi->lf_ref_frame); cpi->lf_ref_frame = 0; - // Delete sementation map + /* Delete sementation map */ vpx_free(cpi->segmentation_map); cpi->segmentation_map = 0; @@ -349,53 +348,61 @@ static void dealloc_compressor_data(VP8_COMP *cpi) vpx_free(cpi->tok); cpi->tok = 0; - // Structure used to monitor GF usage + /* Structure used to monitor GF usage */ vpx_free(cpi->gf_active_flags); cpi->gf_active_flags = 0; - // Activity mask based per mb zbin adjustments + /* Activity mask based per mb zbin adjustments */ vpx_free(cpi->mb_activity_map); cpi->mb_activity_map = 0; - vpx_free(cpi->mb_norm_activity_map); - cpi->mb_norm_activity_map = 0; vpx_free(cpi->mb.pip); cpi->mb.pip = 0; + +#if CONFIG_MULTITHREAD + vpx_free(cpi->mt_current_mb_col); + cpi->mt_current_mb_col = NULL; +#endif } static void enable_segmentation(VP8_COMP *cpi) { - // Set the appropriate feature bit + /* Set the appropriate feature bit */ cpi->mb.e_mbd.segmentation_enabled = 1; cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; } static void disable_segmentation(VP8_COMP *cpi) { - // Clear the appropriate feature bit + /* Clear the appropriate feature bit */ cpi->mb.e_mbd.segmentation_enabled = 0; } -// Valid values for a segment are 0 to 3 -// Segmentation map is arrange as [Rows][Columns] +/* Valid values for a segment are 0 to 3 + * Segmentation map is arrange as [Rows][Columns] + */ static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map) { - // Copy in the new segmentation map + /* Copy in the new segmentation map */ vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols)); - // Signal that the map should be updated. + /* Signal that the map should be updated. */ cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; } -// The values given for each segment can be either deltas (from the default value chosen for the frame) or absolute values. -// -// Valid range for abs values is (0-127 for MB_LVL_ALT_Q) , (0-63 for SEGMENT_ALT_LF) -// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q) , (+/-63 for SEGMENT_ALT_LF) -// -// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use the absolute values given). -// -// +/* The values given for each segment can be either deltas (from the default + * value chosen for the frame) or absolute values. + * + * Valid range for abs values is: + * (0-127 for MB_LVL_ALT_Q), (0-63 for SEGMENT_ALT_LF) + * Valid range for delta values are: + * (+/-127 for MB_LVL_ALT_Q), (+/-63 for SEGMENT_ALT_LF) + * + * abs_delta = SEGMENT_DELTADATA (deltas) + * abs_delta = SEGMENT_ABSDATA (use the absolute values given). + * + */ static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta) { cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta; @@ -411,26 +418,6 @@ static void segmentation_test_function(VP8_COMP *cpi) // Create a temporary map for segmentation data. CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); - // MB loop to set local segmentation map - /*for ( i = 0; i < cpi->common.mb_rows; i++ ) - { - for ( j = 0; j < cpi->common.mb_cols; j++ ) - { - //seg_map[(i*cpi->common.mb_cols) + j] = (j % 2) + ((i%2)* 2); - //if ( j < cpi->common.mb_cols/2 ) - - // Segment 1 around the edge else 0 - if ( (i == 0) || (j == 0) || (i == (cpi->common.mb_rows-1)) || (j == (cpi->common.mb_cols-1)) ) - seg_map[(i*cpi->common.mb_cols) + j] = 1; - //else if ( (i < 2) || (j < 2) || (i > (cpi->common.mb_rows-3)) || (j > (cpi->common.mb_cols-3)) ) - // seg_map[(i*cpi->common.mb_cols) + j] = 2; - //else if ( (i < 5) || (j < 5) || (i > (cpi->common.mb_rows-6)) || (j > (cpi->common.mb_cols-6)) ) - // seg_map[(i*cpi->common.mb_cols) + j] = 3; - else - seg_map[(i*cpi->common.mb_cols) + j] = 0; - } - }*/ - // Set the segmentation Map set_segmentation_map(cpi, seg_map); @@ -453,103 +440,78 @@ static void segmentation_test_function(VP8_COMP *cpi) set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); // Delete sementation map - vpx_free(seg_map); + vpx_free(seg_map); seg_map = 0; - } -// A simple function to cyclically refresh the background at a lower Q +/* A simple function to cyclically refresh the background at a lower Q */ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) { - unsigned char *seg_map; + unsigned char *seg_map = cpi->segmentation_map; signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; int i; int block_count = cpi->cyclic_refresh_mode_max_mbs_perframe; int mbs_in_frame = cpi->common.mb_rows * cpi->common.mb_cols; - // Create a temporary map for segmentation data. - CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); + cpi->cyclic_refresh_q = Q / 2; - cpi->cyclic_refresh_q = Q; + // Set every macroblock to be eligible for update. + // For key frame this will reset seg map to 0. + vpx_memset(cpi->segmentation_map, 0, mbs_in_frame); - for (i = Q; i > 0; i--) - { - if (vp8_bits_per_mb[cpi->common.frame_type][i] >= ((vp8_bits_per_mb[cpi->common.frame_type][Q]*(Q + 128)) / 64)) - //if ( vp8_bits_per_mb[cpi->common.frame_type][i] >= ((vp8_bits_per_mb[cpi->common.frame_type][Q]*((2*Q)+96))/64) ) - { - break; - } - } - - cpi->cyclic_refresh_q = i; - - // Only update for inter frames if (cpi->common.frame_type != KEY_FRAME) { - // Cycle through the macro_block rows - // MB loop to set local segmentation map - for (i = cpi->cyclic_refresh_mode_index; i < mbs_in_frame; i++) + /* Cycle through the macro_block rows */ + /* MB loop to set local segmentation map */ + i = cpi->cyclic_refresh_mode_index; + assert(i < mbs_in_frame); + do { - // If the MB is as a candidate for clean up then mark it for possible boost/refresh (segment 1) - // The segment id may get reset to 0 later if the MB gets coded anything other than last frame 0,0 - // as only (last frame 0,0) MBs are eligable for refresh : that is to say Mbs likely to be background blocks. - if (cpi->cyclic_refresh_map[i] == 0) - { - seg_map[i] = 1; - } - else - { - seg_map[i] = 0; - - // Skip blocks that have been refreshed recently anyway. - if (cpi->cyclic_refresh_map[i] < 0) - //cpi->cyclic_refresh_map[i] = cpi->cyclic_refresh_map[i] / 16; - cpi->cyclic_refresh_map[i]++; - } - - - if (block_count > 0) - block_count--; - else - break; + /* If the MB is as a candidate for clean up then mark it for + * possible boost/refresh (segment 1) The segment id may get + * reset to 0 later if the MB gets coded anything other than + * last frame 0,0 as only (last frame 0,0) MBs are eligable for + * refresh : that is to say Mbs likely to be background blocks. + */ + if (cpi->cyclic_refresh_map[i] == 0) + { + seg_map[i] = 1; + block_count --; + } + else if (cpi->cyclic_refresh_map[i] < 0) + cpi->cyclic_refresh_map[i]++; + + i++; + if (i == mbs_in_frame) + i = 0; } + while(block_count && i != cpi->cyclic_refresh_mode_index); - // If we have gone through the frame reset to the start cpi->cyclic_refresh_mode_index = i; - - if (cpi->cyclic_refresh_mode_index >= mbs_in_frame) - cpi->cyclic_refresh_mode_index = 0; } - // Set the segmentation Map - set_segmentation_map(cpi, seg_map); - - // Activate segmentation. + /* Activate segmentation. */ + cpi->mb.e_mbd.update_mb_segmentation_map = 1; + cpi->mb.e_mbd.update_mb_segmentation_data = 1; enable_segmentation(cpi); - // Set up the quant segment data + /* Set up the quant segment data */ feature_data[MB_LVL_ALT_Q][0] = 0; feature_data[MB_LVL_ALT_Q][1] = (cpi->cyclic_refresh_q - Q); feature_data[MB_LVL_ALT_Q][2] = 0; feature_data[MB_LVL_ALT_Q][3] = 0; - // Set up the loop segment data + /* Set up the loop segment data */ feature_data[MB_LVL_ALT_LF][0] = 0; feature_data[MB_LVL_ALT_LF][1] = lf_adjustment; feature_data[MB_LVL_ALT_LF][2] = 0; feature_data[MB_LVL_ALT_LF][3] = 0; - // Initialise the feature data structure - // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 + /* Initialise the feature data structure */ set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); - // Delete sementation map - vpx_free(seg_map); - - seg_map = 0; - } static void set_default_lf_deltas(VP8_COMP *cpi) @@ -560,16 +522,21 @@ static void set_default_lf_deltas(VP8_COMP *cpi) vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); - // Test of ref frame deltas + /* Test of ref frame deltas */ cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2; cpi->mb.e_mbd.ref_lf_deltas[LAST_FRAME] = 0; cpi->mb.e_mbd.ref_lf_deltas[GOLDEN_FRAME] = -2; cpi->mb.e_mbd.ref_lf_deltas[ALTREF_FRAME] = -2; - cpi->mb.e_mbd.mode_lf_deltas[0] = 4; // BPRED - cpi->mb.e_mbd.mode_lf_deltas[1] = -2; // Zero - cpi->mb.e_mbd.mode_lf_deltas[2] = 2; // New mv - cpi->mb.e_mbd.mode_lf_deltas[3] = 4; // Split mv + cpi->mb.e_mbd.mode_lf_deltas[0] = 4; /* BPRED */ + + if(cpi->oxcf.Mode == MODE_REALTIME) + cpi->mb.e_mbd.mode_lf_deltas[1] = -12; /* Zero */ + else + cpi->mb.e_mbd.mode_lf_deltas[1] = -2; /* Zero */ + + cpi->mb.e_mbd.mode_lf_deltas[2] = 2; /* New mv */ + cpi->mb.e_mbd.mode_lf_deltas[3] = 4; /* Split mv */ } /* Convenience macros for mapping speed and mode into a continuous @@ -669,17 +636,16 @@ void vp8_set_speed_features(VP8_COMP *cpi) int last_improved_quant = sf->improved_quant; int ref_frames; - // Initialise default mode frequency sampling variables + /* Initialise default mode frequency sampling variables */ for (i = 0; i < MAX_MODES; i ++) { cpi->mode_check_freq[i] = 0; - cpi->mode_test_hit_counts[i] = 0; cpi->mode_chosen_counts[i] = 0; } - cpi->mbs_tested_so_far = 0; + cpi->mb.mbs_tested_so_far = 0; - // best quality defaults + /* best quality defaults */ sf->RD = 1; sf->search_method = NSTEP; sf->improved_quant = 1; @@ -697,17 +663,17 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->max_step_search_steps = MAX_MVSEARCH_STEPS; sf->improved_mv_pred = 1; - // default thresholds to 0 + /* default thresholds to 0 */ for (i = 0; i < MAX_MODES; i++) sf->thresh_mult[i] = 0; /* Count enabled references */ ref_frames = 1; - if (cpi->ref_frame_flags & VP8_LAST_FLAG) + if (cpi->ref_frame_flags & VP8_LAST_FRAME) ref_frames++; - if (cpi->ref_frame_flags & VP8_GOLD_FLAG) + if (cpi->ref_frame_flags & VP8_GOLD_FRAME) ref_frames++; - if (cpi->ref_frame_flags & VP8_ALT_FLAG) + if (cpi->ref_frame_flags & VP8_ALTR_FRAME) ref_frames++; /* Convert speed to continuous range, with clamping */ @@ -779,7 +745,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) switch (Mode) { #if !(CONFIG_REALTIME_ONLY) - case 0: // best quality mode + case 0: /* best quality mode */ sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; break; @@ -800,8 +766,9 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->improved_quant = 0; sf->improved_dct = 0; - // Only do recode loop on key frames, golden frames and - // alt ref frames + /* Only do recode loop on key frames, golden frames and + * alt ref frames + */ sf->recode_loop = 2; } @@ -809,14 +776,14 @@ void vp8_set_speed_features(VP8_COMP *cpi) if (Speed > 3) { sf->auto_filter = 1; - sf->recode_loop = 0; // recode loop off - sf->RD = 0; // Turn rd off + sf->recode_loop = 0; /* recode loop off */ + sf->RD = 0; /* Turn rd off */ } if (Speed > 4) { - sf->auto_filter = 0; // Faster selection of loop filter + sf->auto_filter = 0; /* Faster selection of loop filter */ } break; @@ -839,7 +806,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) } if (Speed > 2) - sf->auto_filter = 0; // Faster selection of loop filter + sf->auto_filter = 0; /* Faster selection of loop filter */ if (Speed > 3) { @@ -849,7 +816,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) if (Speed > 4) { - sf->auto_filter = 0; // Faster selection of loop filter + sf->auto_filter = 0; /* Faster selection of loop filter */ sf->search_method = HEX; sf->iterative_sub_pixel = 0; } @@ -870,16 +837,16 @@ void vp8_set_speed_features(VP8_COMP *cpi) for (i = 0; i < min; i++) { - sum += cpi->error_bins[i]; + sum += cpi->mb.error_bins[i]; } total_skip = sum; sum = 0; - // i starts from 2 to make sure thresh started from 2048 + /* i starts from 2 to make sure thresh started from 2048 */ for (; i < 1024; i++) { - sum += cpi->error_bins[i]; + sum += cpi->mb.error_bins[i]; if (10 * sum >= (unsigned int)(cpi->Speed - 6)*(total_mbs - total_skip)) break; @@ -930,16 +897,17 @@ void vp8_set_speed_features(VP8_COMP *cpi) cm->filter_type = SIMPLE_LOOPFILTER; } - // This has a big hit on quality. Last resort + /* This has a big hit on quality. Last resort */ if (Speed >= 15) sf->half_pixel_search = 0; - vpx_memset(cpi->error_bins, 0, sizeof(cpi->error_bins)); + vpx_memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins)); }; /* switch */ - // Slow quant, dct and trellis not worthwhile for first pass - // so make sure they are always turned off. + /* Slow quant, dct and trellis not worthwhile for first pass + * so make sure they are always turned off. + */ if ( cpi->pass == 1 ) { sf->improved_quant = 0; @@ -1107,27 +1075,46 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); } - // Data used for real time vc mode to see if gf needs refreshing - cpi->inter_zz_count = 0; - cpi->gf_bad_count = 0; - cpi->gf_update_recommended = 0; + /* Data used for real time vc mode to see if gf needs refreshing */ + cpi->zeromv_count = 0; - // Structures used to minitor GF usage + /* Structures used to monitor GF usage */ vpx_free(cpi->gf_active_flags); CHECK_MEM_ERROR(cpi->gf_active_flags, - vpx_calloc(1, cm->mb_rows * cm->mb_cols)); + vpx_calloc(sizeof(*cpi->gf_active_flags), + cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; vpx_free(cpi->mb_activity_map); CHECK_MEM_ERROR(cpi->mb_activity_map, - vpx_calloc(sizeof(unsigned int), + vpx_calloc(sizeof(*cpi->mb_activity_map), cm->mb_rows * cm->mb_cols)); - vpx_free(cpi->mb_norm_activity_map); - CHECK_MEM_ERROR(cpi->mb_norm_activity_map, - vpx_calloc(sizeof(unsigned int), - cm->mb_rows * cm->mb_cols)); + /* allocate memory for storing last frame's MVs for MV prediction. */ + vpx_free(cpi->lfmv); + CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2), + sizeof(*cpi->lfmv))); + vpx_free(cpi->lf_ref_frame_sign_bias); + CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, + vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2), + sizeof(*cpi->lf_ref_frame_sign_bias))); + vpx_free(cpi->lf_ref_frame); + CHECK_MEM_ERROR(cpi->lf_ref_frame, + vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2), + sizeof(*cpi->lf_ref_frame))); + + /* Create the encoder segmentation map and set all entries to 0 */ + vpx_free(cpi->segmentation_map); + CHECK_MEM_ERROR(cpi->segmentation_map, + vpx_calloc(cm->mb_rows * cm->mb_cols, + sizeof(*cpi->segmentation_map))); + cpi->cyclic_refresh_mode_index = 0; + vpx_free(cpi->active_map); + CHECK_MEM_ERROR(cpi->active_map, + vpx_calloc(cm->mb_rows * cm->mb_cols, + sizeof(*cpi->active_map))); + vpx_memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols)); #if CONFIG_MULTITHREAD if (width < 640) @@ -1138,15 +1125,22 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) cpi->mt_sync_range = 8; else cpi->mt_sync_range = 16; + + if (cpi->oxcf.multi_threaded > 1) + { + vpx_free(cpi->mt_current_mb_col); + CHECK_MEM_ERROR(cpi->mt_current_mb_col, + vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); + } + #endif vpx_free(cpi->tplist); - - CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows)); + CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows)); } -// Quant MOD +/* Quant MOD */ static const int q_trans[] = { 0, 1, 2, 3, 4, 5, 7, 8, @@ -1168,7 +1162,7 @@ int vp8_reverse_trans(int x) return i; return 63; -}; +} void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) { if(framerate < .1) @@ -1182,16 +1176,16 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); - // Set Maximum gf/arf interval + /* Set Maximum gf/arf interval */ cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2); if(cpi->max_gf_interval < 12) cpi->max_gf_interval = 12; - // Extended interval for genuinely static scenes + /* Extended interval for genuinely static scenes */ cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; - // Special conditions when altr ref frame enabled in lagged compress mode + /* Special conditions when altr ref frame enabled in lagged compress mode */ if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) { if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1) @@ -1213,7 +1207,7 @@ rescale(int val, int num, int denom) int64_t llden = denom; int64_t llval = val; - return llval * llnum / llden; + return (int)(llval * llnum / llden); } @@ -1225,7 +1219,6 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->auto_gold = 1; cpi->auto_adjust_gold_quantizer = 1; - cpi->goldfreq = 7; cm->version = oxcf->Version; vp8_setup_version(cm); @@ -1244,15 +1237,15 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->ref_frame_rate = cpi->frame_rate; - // change includes all joint functionality + /* change includes all joint functionality */ vp8_change_config(cpi, oxcf); - // Initialize active best and worst q and average q values. + /* Initialize active best and worst q and average q values. */ cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; cpi->active_best_quality = cpi->oxcf.best_allowed_q; cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q; - // Initialise the starting buffer levels + /* Initialise the starting buffer levels */ cpi->buffer_level = cpi->oxcf.starting_buffer_level; cpi->bits_off_target = cpi->oxcf.starting_buffer_level; @@ -1264,7 +1257,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->total_actual_bits = 0; cpi->total_target_vs_actual = 0; - // Temporal scalabilty + /* Temporal scalabilty */ if (cpi->oxcf.number_of_layers > 1) { unsigned int i; @@ -1274,7 +1267,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; - // Layer configuration + /* Layer configuration */ lc->frame_rate = cpi->output_frame_rate / cpi->oxcf.rate_decimator[i]; lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000; @@ -1284,28 +1277,29 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size; lc->starting_buffer_level = - rescale(oxcf->starting_buffer_level, + rescale((int)(oxcf->starting_buffer_level), lc->target_bandwidth, 1000); if (oxcf->optimal_buffer_level == 0) lc->optimal_buffer_level = lc->target_bandwidth / 8; else lc->optimal_buffer_level = - rescale(oxcf->optimal_buffer_level, + rescale((int)(oxcf->optimal_buffer_level), lc->target_bandwidth, 1000); if (oxcf->maximum_buffer_size == 0) lc->maximum_buffer_size = lc->target_bandwidth / 8; else lc->maximum_buffer_size = - rescale(oxcf->maximum_buffer_size, + rescale((int)oxcf->maximum_buffer_size, lc->target_bandwidth, 1000); - // Work out the average size of a frame within this layer + /* Work out the average size of a frame within this layer */ if (i > 0) - lc->avg_frame_size_for_layer = (cpi->oxcf.target_bitrate[i] - - cpi->oxcf.target_bitrate[i-1]) * 1000 / - (lc->frame_rate - prev_layer_frame_rate); + lc->avg_frame_size_for_layer = + (int)((cpi->oxcf.target_bitrate[i] - + cpi->oxcf.target_bitrate[i-1]) * 1000 / + (lc->frame_rate - prev_layer_frame_rate)); lc->active_worst_quality = cpi->oxcf.worst_allowed_q; lc->active_best_quality = cpi->oxcf.best_allowed_q; @@ -1321,7 +1315,7 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) lc->rate_correction_factor = 1.0; lc->key_frame_rate_correction_factor = 1.0; lc->gf_rate_correction_factor = 1.0; - lc->inter_frame_target = 0.0; + lc->inter_frame_target = 0; prev_layer_frame_rate = lc->frame_rate; } @@ -1358,32 +1352,29 @@ static void update_layer_contexts (VP8_COMP *cpi) lc->target_bandwidth = oxcf->target_bitrate[i] * 1000; lc->starting_buffer_level = rescale( - oxcf->starting_buffer_level_in_ms, + (int)oxcf->starting_buffer_level_in_ms, lc->target_bandwidth, 1000); if (oxcf->optimal_buffer_level == 0) lc->optimal_buffer_level = lc->target_bandwidth / 8; else lc->optimal_buffer_level = rescale( - oxcf->optimal_buffer_level_in_ms, + (int)oxcf->optimal_buffer_level_in_ms, lc->target_bandwidth, 1000); if (oxcf->maximum_buffer_size == 0) lc->maximum_buffer_size = lc->target_bandwidth / 8; else lc->maximum_buffer_size = rescale( - oxcf->maximum_buffer_size_in_ms, + (int)oxcf->maximum_buffer_size_in_ms, lc->target_bandwidth, 1000); - // Work out the average size of a frame within this layer + /* Work out the average size of a frame within this layer */ if (i > 0) - lc->avg_frame_size_for_layer = (oxcf->target_bitrate[i] - - oxcf->target_bitrate[i-1]) * 1000 / - (lc->frame_rate - prev_layer_frame_rate); - - lc->active_worst_quality = oxcf->worst_allowed_q; - lc->active_best_quality = oxcf->best_allowed_q; - lc->avg_frame_qindex = oxcf->worst_allowed_q; + lc->avg_frame_size_for_layer = + (int)((oxcf->target_bitrate[i] - + oxcf->target_bitrate[i-1]) * 1000 / + (lc->frame_rate - prev_layer_frame_rate)); prev_layer_frame_rate = lc->frame_rate; } @@ -1514,10 +1505,8 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->baseline_gf_interval = cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL; - cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG; + cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME; - //cpi->use_golden_frame_only = 0; - //cpi->use_last_frame_only = 0; cm->refresh_golden_frame = 0; cm->refresh_last_frame = 1; cm->refresh_entropy_probs = 1; @@ -1539,11 +1528,11 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout; } - // At the moment the first order values may not be > MAXQ + /* At the moment the first order values may not be > MAXQ */ if (cpi->oxcf.fixed_q > MAXQ) cpi->oxcf.fixed_q = MAXQ; - // local file playback mode == really big buffer + /* local file playback mode == really big buffer */ if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { cpi->oxcf.starting_buffer_level = 60000; @@ -1554,41 +1543,41 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->oxcf.maximum_buffer_size_in_ms = 240000; } - // Convert target bandwidth from Kbit/s to Bit/s + /* Convert target bandwidth from Kbit/s to Bit/s */ cpi->oxcf.target_bandwidth *= 1000; cpi->oxcf.starting_buffer_level = - rescale(cpi->oxcf.starting_buffer_level, + rescale((int)cpi->oxcf.starting_buffer_level, cpi->oxcf.target_bandwidth, 1000); - // Set or reset optimal and maximum buffer levels. + /* Set or reset optimal and maximum buffer levels. */ if (cpi->oxcf.optimal_buffer_level == 0) cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; else cpi->oxcf.optimal_buffer_level = - rescale(cpi->oxcf.optimal_buffer_level, + rescale((int)cpi->oxcf.optimal_buffer_level, cpi->oxcf.target_bandwidth, 1000); if (cpi->oxcf.maximum_buffer_size == 0) cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; else cpi->oxcf.maximum_buffer_size = - rescale(cpi->oxcf.maximum_buffer_size, + rescale((int)cpi->oxcf.maximum_buffer_size, cpi->oxcf.target_bandwidth, 1000); - // Set up frame rate and related parameters rate control values. + /* Set up frame rate and related parameters rate control values. */ vp8_new_frame_rate(cpi, cpi->frame_rate); - // Set absolute upper and lower quality limits + /* Set absolute upper and lower quality limits */ cpi->worst_quality = cpi->oxcf.worst_allowed_q; cpi->best_quality = cpi->oxcf.best_allowed_q; - // active values should only be modified if out of new range + /* active values should only be modified if out of new range */ if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q) { cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; } - // less likely + /* less likely */ else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q) { cpi->active_worst_quality = cpi->oxcf.best_allowed_q; @@ -1597,7 +1586,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { cpi->active_best_quality = cpi->oxcf.best_allowed_q; } - // less likely + /* less likely */ else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q) { cpi->active_best_quality = cpi->oxcf.worst_allowed_q; @@ -1607,14 +1596,9 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->cq_target_quality = cpi->oxcf.cq_level; - // Only allow dropped frames in buffered mode + /* Only allow dropped frames in buffered mode */ cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode; - if (!cm->use_bilinear_mc_filter) - cm->mcomp_filter_type = SIXTAP; - else - cm->mcomp_filter_type = BILINEAR; - cpi->target_bandwidth = cpi->oxcf.target_bandwidth; @@ -1627,7 +1611,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) * correct. */ - // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) + /* VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) */ if (cpi->oxcf.Sharpness > 7) cpi->oxcf.Sharpness = 7; @@ -1641,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) Scale2Ratio(cm->horiz_scale, &hr, &hs); Scale2Ratio(cm->vert_scale, &vr, &vs); - // always go to the next whole number + /* always go to the next whole number */ cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs; cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs; } @@ -1655,6 +1639,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cm->yv12_fb[cm->lst_fb_idx].y_height || cm->yv12_fb[cm->lst_fb_idx].y_width == 0) { + dealloc_raw_frame_buffers(cpi); alloc_raw_frame_buffers(cpi); vp8_alloc_compressor_data(cpi); } @@ -1667,16 +1652,16 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->Speed = cpi->oxcf.cpu_used; - // force to allowlag to 0 if lag_in_frames is 0; + /* force to allowlag to 0 if lag_in_frames is 0; */ if (cpi->oxcf.lag_in_frames == 0) { cpi->oxcf.allow_lag = 0; } - // Limit on lag buffers as these are not currently dynamically allocated + /* Limit on lag buffers as these are not currently dynamically allocated */ else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS; - // YX Temp + /* YX Temp */ cpi->alt_ref_source = NULL; cpi->is_src_frame_alt_ref = 0; @@ -1693,7 +1678,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) #endif #if 0 - // Experimental RD Code + /* Experimental RD Code */ cpi->frame_distortion = 0; cpi->last_frame_distortion = 0; #endif @@ -1728,7 +1713,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) VP8_COMMON *cm; cpi = vpx_memalign(32, sizeof(VP8_COMP)); - // Check that the CPI instance is valid + /* Check that the CPI instance is valid */ if (!cpi) return 0; @@ -1762,14 +1747,15 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->prob_gf_coded = 128; cpi->prob_intra_coded = 63; - // Prime the recent reference frame usage counters. - // Hereafter they will be maintained as a sort of moving average + /* Prime the recent reference frame usage counters. + * Hereafter they will be maintained as a sort of moving average + */ cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; cpi->recent_ref_frame_usage[LAST_FRAME] = 1; cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; - // Set reference frame sign bias for ALTREF frame to 1 (for now) + /* Set reference frame sign bias for ALTREF frame to 1 (for now) */ cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; cpi->twopass.gf_decay_rate = 0; @@ -1779,21 +1765,12 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->alt_is_last = 0 ; cpi->gold_is_alt = 0 ; - // allocate memory for storing last frame's MVs for MV prediction. - CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int_mv))); - CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int))); - CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int))); - - // Create the encoder segmentation map and set all entries to 0 - CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); - CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); - vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols)); cpi->active_map_enabled = 0; #if 0 - // Experimental code for lagged and one pass - // Initialise one_pass GF frames stats - // Update stats used for GF selection + /* Experimental code for lagged and one pass */ + /* Initialise one_pass GF frames stats */ + /* Update stats used for GF selection */ if (cpi->pass == 0) { cpi->one_pass_frame_index = 0; @@ -1813,10 +1790,11 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) } #endif - // Should we use the cyclic refresh method. - // Currently this is tied to error resilliant mode + /* Should we use the cyclic refresh method. + * Currently this is tied to error resilliant mode + */ cpi->cyclic_refresh_mode_enabled = cpi->oxcf.error_resilient_mode; - cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 40; + cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 5; cpi->cyclic_refresh_mode_index = 0; cpi->cyclic_refresh_q = 32; @@ -1827,9 +1805,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) else cpi->cyclic_refresh_map = (signed char *) NULL; - // Test function for segmentation - //segmentation_test_function( cpi); - #ifdef ENTROPY_STATS init_context_counters(); #endif @@ -1837,7 +1812,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90<<12; - cpi->frames_since_key = 8; // Give a sensible default for the first frame. + /* Give a sensible default for the first frame. */ + cpi->frames_since_key = 8; cpi->key_frame_frequency = cpi->oxcf.key_freq; cpi->this_key_frame_forced = 0; cpi->next_key_frame_forced = 0; @@ -1880,10 +1856,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) #endif -#ifndef LLONG_MAX -#define LLONG_MAX 9223372036854775807LL -#endif - cpi->first_time_stamp_ever = LLONG_MAX; + cpi->first_time_stamp_ever = 0x7FFFFFFF; cpi->frames_till_gf_update_due = 0; cpi->key_frame_count = 1; @@ -1894,22 +1867,12 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->total_byte_count = 0; cpi->drop_frame = 0; - cpi->drop_count = 0; - cpi->max_drop_count = 0; - cpi->max_consec_dropped_frames = 4; cpi->rate_correction_factor = 1.0; cpi->key_frame_rate_correction_factor = 1.0; cpi->gf_rate_correction_factor = 1.0; cpi->twopass.est_max_qcorrection_factor = 1.0; - cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max+1]; - cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max+1]; - cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max+1]; - cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max+1]; - - cal_mvsadcosts(cpi->mb.mvsadcost); - for (i = 0; i < KEY_FRAME_CONTEXT; i++) { cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate; @@ -1935,7 +1898,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) else if (cpi->pass == 2) { size_t packet_sz = sizeof(FIRSTPASS_STATS); - int packets = oxcf->two_pass_stats_in.sz / packet_sz; + int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; cpi->twopass.stats_in = cpi->twopass.stats_in_start; @@ -1948,17 +1911,16 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) if (cpi->compressor_speed == 2) { - cpi->cpu_freq = 0; //vp8_get_processor_freq(); cpi->avg_encode_time = 0; cpi->avg_pick_mode_time = 0; } vp8_set_speed_features(cpi); - // Set starting values of RD threshold multipliers (128 = *1) + /* Set starting values of RD threshold multipliers (128 = *1) */ for (i = 0; i < MAX_MODES; i++) { - cpi->rd_thresh_mult[i] = 128; + cpi->mb.rd_thresh_mult[i] = 128; } #ifdef ENTROPY_STATS @@ -1966,7 +1928,11 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) #endif #if CONFIG_MULTITHREAD - vp8cx_create_encoder_threads(cpi); + if(vp8cx_create_encoder_threads(cpi)) + { + vp8_remove_compressor(&cpi); + return 0; + } #endif cpi->fn_ptr[BLOCK_16X16].sdf = vp8_sad16x16; @@ -2031,11 +1997,14 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->diamond_search_sad = vp8_diamond_search_sad; cpi->refining_search_sad = vp8_refining_search_sad; - // make sure frame 1 is okay - cpi->error_bins[0] = cpi->common.MBs; + /* make sure frame 1 is okay */ + cpi->mb.error_bins[0] = cpi->common.MBs; - //vp8cx_init_quantizer() is first called here. Add check in vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only called later - //when needed. This will avoid unnecessary calls of vp8cx_init_quantizer() for every frame. + /* vp8cx_init_quantizer() is first called here. Add check in + * vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only + * called later when needed. This will avoid unnecessary calls of + * vp8cx_init_quantizer() for every frame. + */ vp8cx_init_quantizer(cpi); vp8_loop_filter_init(cm); @@ -2043,13 +2012,33 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->common.error.setjmp = 0; #if CONFIG_MULTI_RES_ENCODING + /* Calculate # of MBs in a row in lower-resolution level image. */ if (cpi->oxcf.mr_encoder_id > 0) vp8_cal_low_res_mb_cols(cpi); + #endif - return cpi; + /* setup RD costs to MACROBLOCK struct */ + + cpi->mb.mvcost[0] = &cpi->rd_costs.mvcosts[0][mv_max+1]; + cpi->mb.mvcost[1] = &cpi->rd_costs.mvcosts[1][mv_max+1]; + cpi->mb.mvsadcost[0] = &cpi->rd_costs.mvsadcosts[0][mvfp_max+1]; + cpi->mb.mvsadcost[1] = &cpi->rd_costs.mvsadcosts[1][mvfp_max+1]; + cal_mvsadcosts(cpi->mb.mvsadcost); + + cpi->mb.mbmode_cost = cpi->rd_costs.mbmode_cost; + cpi->mb.intra_uv_mode_cost = cpi->rd_costs.intra_uv_mode_cost; + cpi->mb.bmode_costs = cpi->rd_costs.bmode_costs; + cpi->mb.inter_bmode_costs = cpi->rd_costs.inter_bmode_costs; + cpi->mb.token_costs = cpi->rd_costs.token_costs; + + /* setup block ptrs & offsets */ + vp8_setup_block_ptrs(&cpi->mb); + vp8_setup_block_dptrs(&cpi->mb.e_mbd); + + return cpi; } @@ -2099,7 +2088,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) fprintf(f, "Layer\tBitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t" "GLPsnrP\tVPXSSIM\t\n"); - for (i=0; ioxcf.number_of_layers; i++) + for (i=0; i<(int)cpi->oxcf.number_of_layers; i++) { double dr = (double)cpi->bytes_in_layer[i] * 8.0 / 1000.0 / time_encoded; @@ -2150,7 +2139,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) fprintf(f, "Layer\tBitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t" "Time(us)\n"); - for (i=0; ioxcf.number_of_layers; i++) + for (i=0; i<(int)cpi->oxcf.number_of_layers; i++) { double dr = (double)cpi->bytes_in_layer[i] * 8.0 / 1000.0 / time_encoded; @@ -2204,7 +2193,6 @@ void vp8_remove_compressor(VP8_COMP **ptr) fprintf(f, "%5d", frames_at_speed[i]); fprintf(f, "\n"); - //fprintf(f, "%10d PM %10d %10d %10d EF %10d %10d %10d\n", cpi->Speed, cpi->avg_pick_mode_time, (tot_pm/cnt_pm), cnt_pm, cpi->avg_encode_time, 0, 0); fclose(f); } @@ -2266,7 +2254,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) for (i = 0; i < 10; i++) { - fprintf(fmode, " { //Above Mode : %d\n", i); + fprintf(fmode, " { /* Above Mode : %d */\n", i); for (j = 0; j < 10; j++) { @@ -2281,7 +2269,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) fprintf(fmode, " %5d, ", intra_mode_stats[i][j][k]); } - fprintf(fmode, "}, // left_mode %d\n", j); + fprintf(fmode, "}, /* left_mode %d */\n", j); } @@ -2459,7 +2447,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) for (i = 0; i < 4; i++) pkt.data.psnr.psnr[i] = vp8_mse2psnr(pkt.data.psnr.samples[i], 255.0, - pkt.data.psnr.sse[i]); + (double)(pkt.data.psnr.sse[i])); vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); } @@ -2482,28 +2470,28 @@ int vp8_update_reference(VP8_COMP *cpi, int ref_frame_flags) cpi->common.refresh_alt_ref_frame = 0; cpi->common.refresh_last_frame = 0; - if (ref_frame_flags & VP8_LAST_FLAG) + if (ref_frame_flags & VP8_LAST_FRAME) cpi->common.refresh_last_frame = 1; - if (ref_frame_flags & VP8_GOLD_FLAG) + if (ref_frame_flags & VP8_GOLD_FRAME) cpi->common.refresh_golden_frame = 1; - if (ref_frame_flags & VP8_ALT_FLAG) + if (ref_frame_flags & VP8_ALTR_FRAME) cpi->common.refresh_alt_ref_frame = 1; return 0; } -int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) +int vp8_get_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP8_COMMON *cm = &cpi->common; int ref_fb_idx; - if (ref_frame_flag == VP8_LAST_FLAG) + if (ref_frame_flag == VP8_LAST_FRAME) ref_fb_idx = cm->lst_fb_idx; - else if (ref_frame_flag == VP8_GOLD_FLAG) + else if (ref_frame_flag == VP8_GOLD_FRAME) ref_fb_idx = cm->gld_fb_idx; - else if (ref_frame_flag == VP8_ALT_FLAG) + else if (ref_frame_flag == VP8_ALTR_FRAME) ref_fb_idx = cm->alt_fb_idx; else return -1; @@ -2512,17 +2500,17 @@ int vp8_get_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CO return 0; } -int vp8_set_reference(VP8_COMP *cpi, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) +int vp8_set_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP8_COMMON *cm = &cpi->common; int ref_fb_idx; - if (ref_frame_flag == VP8_LAST_FLAG) + if (ref_frame_flag == VP8_LAST_FRAME) ref_fb_idx = cm->lst_fb_idx; - else if (ref_frame_flag == VP8_GOLD_FLAG) + else if (ref_frame_flag == VP8_GOLD_FRAME) ref_fb_idx = cm->gld_fb_idx; - else if (ref_frame_flag == VP8_ALT_FLAG) + else if (ref_frame_flag == VP8_ALTR_FRAME) ref_fb_idx = cm->alt_fb_idx; else return -1; @@ -2583,7 +2571,7 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; - // are we resizing the image + /* are we resizing the image */ if (cm->horiz_scale != 0 || cm->vert_scale != 0) { #if CONFIG_SPATIAL_RESAMPLING @@ -2611,51 +2599,57 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) } -static void resize_key_frame(VP8_COMP *cpi) +static int resize_key_frame(VP8_COMP *cpi) { #if CONFIG_SPATIAL_RESAMPLING VP8_COMMON *cm = &cpi->common; - // Do we need to apply resampling for one pass cbr. - // In one pass this is more limited than in two pass cbr - // The test and any change is only made one per key frame sequence + /* Do we need to apply resampling for one pass cbr. + * In one pass this is more limited than in two pass cbr + * The test and any change is only made one per key frame sequence + */ if (cpi->oxcf.allow_spatial_resampling && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) { int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs); int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs); int new_width, new_height; - // If we are below the resample DOWN watermark then scale down a notch. + /* If we are below the resample DOWN watermark then scale down a + * notch. + */ if (cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100)) { cm->horiz_scale = (cm->horiz_scale < ONETWO) ? cm->horiz_scale + 1 : ONETWO; cm->vert_scale = (cm->vert_scale < ONETWO) ? cm->vert_scale + 1 : ONETWO; } - // Should we now start scaling back up + /* Should we now start scaling back up */ else if (cpi->buffer_level > (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100)) { cm->horiz_scale = (cm->horiz_scale > NORMAL) ? cm->horiz_scale - 1 : NORMAL; cm->vert_scale = (cm->vert_scale > NORMAL) ? cm->vert_scale - 1 : NORMAL; } - // Get the new hieght and width + /* Get the new hieght and width */ Scale2Ratio(cm->horiz_scale, &hr, &hs); Scale2Ratio(cm->vert_scale, &vr, &vs); new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs; new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs; - // If the image size has changed we need to reallocate the buffers - // and resample the source image + /* If the image size has changed we need to reallocate the buffers + * and resample the source image + */ if ((cm->Width != new_width) || (cm->Height != new_height)) { cm->Width = new_width; cm->Height = new_height; vp8_alloc_compressor_data(cpi); scale_and_extend_source(cpi->un_scaled_source, cpi); + return 1; } } #endif + return 0; } @@ -2663,34 +2657,35 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; - // Select an interval before next GF or altref + /* Select an interval before next GF or altref */ if (!cpi->auto_gold) - cpi->frames_till_gf_update_due = cpi->goldfreq; + cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; if ((cpi->pass != 2) && cpi->frames_till_gf_update_due) { cpi->current_gf_interval = cpi->frames_till_gf_update_due; - // Set the bits per frame that we should try and recover in subsequent inter frames - // to account for the extra GF spend... note that his does not apply for GF updates - // that occur coincident with a key frame as the extra cost of key frames is dealt - // with elsewhere. - + /* Set the bits per frame that we should try and recover in + * subsequent inter frames to account for the extra GF spend... + * note that his does not apply for GF updates that occur + * coincident with a key frame as the extra cost of key frames is + * dealt with elsewhere. + */ cpi->gf_overspend_bits += cpi->projected_frame_size; cpi->non_gf_bitrate_adjustment = cpi->gf_overspend_bits / cpi->frames_till_gf_update_due; } - // Update data structure that monitors level of reference to last GF + /* Update data structure that monitors level of reference to last GF */ vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - // this frame refreshes means next frames don't unless specified by user + /* this frame refreshes means next frames don't unless specified by user */ cpi->common.frames_since_golden = 0; - // Clear the alternate reference update pending flag. + /* Clear the alternate reference update pending flag. */ cpi->source_alt_ref_pending = 0; - // Set the alternate refernce frame active flag + /* Set the alternate refernce frame active flag */ cpi->source_alt_ref_active = 1; @@ -2699,25 +2694,29 @@ static void update_golden_frame_stats(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; - // Update the Golden frame usage counts. + /* Update the Golden frame usage counts. */ if (cm->refresh_golden_frame) { - // Select an interval before next GF + /* Select an interval before next GF */ if (!cpi->auto_gold) - cpi->frames_till_gf_update_due = cpi->goldfreq; + cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; if ((cpi->pass != 2) && (cpi->frames_till_gf_update_due > 0)) { cpi->current_gf_interval = cpi->frames_till_gf_update_due; - // Set the bits per frame that we should try and recover in subsequent inter frames - // to account for the extra GF spend... note that his does not apply for GF updates - // that occur coincident with a key frame as the extra cost of key frames is dealt - // with elsewhere. + /* Set the bits per frame that we should try and recover in + * subsequent inter frames to account for the extra GF spend... + * note that his does not apply for GF updates that occur + * coincident with a key frame as the extra cost of key frames + * is dealt with elsewhere. + */ if ((cm->frame_type != KEY_FRAME) && !cpi->source_alt_ref_active) { - // Calcluate GF bits to be recovered - // Projected size - av frame bits available for inter frames for clip as a whole + /* Calcluate GF bits to be recovered + * Projected size - av frame bits available for inter + * frames for clip as a whole + */ cpi->gf_overspend_bits += (cpi->projected_frame_size - cpi->inter_frame_target); } @@ -2725,32 +2724,25 @@ static void update_golden_frame_stats(VP8_COMP *cpi) } - // Update data structure that monitors level of reference to last GF + /* Update data structure that monitors level of reference to last GF */ vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - // this frame refreshes means next frames don't unless specified by user + /* this frame refreshes means next frames don't unless specified by + * user + */ cm->refresh_golden_frame = 0; cpi->common.frames_since_golden = 0; - //if ( cm->frame_type == KEY_FRAME ) - //{ cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; cpi->recent_ref_frame_usage[LAST_FRAME] = 1; cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; - //} - //else - //{ - // // Carry a potrtion of count over to begining of next gf sequence - // cpi->recent_ref_frame_usage[INTRA_FRAME] >>= 5; - // cpi->recent_ref_frame_usage[LAST_FRAME] >>= 5; - // cpi->recent_ref_frame_usage[GOLDEN_FRAME] >>= 5; - // cpi->recent_ref_frame_usage[ALTREF_FRAME] >>= 5; - //} - - // ******** Fixed Q test code only ************ - // If we are going to use the ALT reference for the next group of frames set a flag to say so. + + /* ******** Fixed Q test code only ************ */ + /* If we are going to use the ALT reference for the next group of + * frames set a flag to say so. + */ if (cpi->oxcf.fixed_q >= 0 && cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame) { @@ -2761,14 +2753,14 @@ static void update_golden_frame_stats(VP8_COMP *cpi) if (!cpi->source_alt_ref_pending) cpi->source_alt_ref_active = 0; - // Decrement count down till next gf + /* Decrement count down till next gf */ if (cpi->frames_till_gf_update_due > 0) cpi->frames_till_gf_update_due--; } else if (!cpi->common.refresh_alt_ref_frame) { - // Decrement count down till next gf + /* Decrement count down till next gf */ if (cpi->frames_till_gf_update_due > 0) cpi->frames_till_gf_update_due--; @@ -2779,21 +2771,26 @@ static void update_golden_frame_stats(VP8_COMP *cpi) if (cpi->common.frames_since_golden > 1) { - cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->count_mb_ref_frame_usage[INTRA_FRAME]; - cpi->recent_ref_frame_usage[LAST_FRAME] += cpi->count_mb_ref_frame_usage[LAST_FRAME]; - cpi->recent_ref_frame_usage[GOLDEN_FRAME] += cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]; - cpi->recent_ref_frame_usage[ALTREF_FRAME] += cpi->count_mb_ref_frame_usage[ALTREF_FRAME]; + cpi->recent_ref_frame_usage[INTRA_FRAME] += + cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME]; + cpi->recent_ref_frame_usage[LAST_FRAME] += + cpi->mb.count_mb_ref_frame_usage[LAST_FRAME]; + cpi->recent_ref_frame_usage[GOLDEN_FRAME] += + cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME]; + cpi->recent_ref_frame_usage[ALTREF_FRAME] += + cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME]; } } } -// This function updates the reference frame probability estimates that -// will be used during mode selection +/* This function updates the reference frame probability estimates that + * will be used during mode selection + */ static void update_rd_ref_frame_probs(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; - const int *const rfct = cpi->count_mb_ref_frame_usage; + const int *const rfct = cpi->mb.count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; @@ -2810,7 +2807,9 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi) cpi->prob_gf_coded = 128; } - // update reference frame costs since we can do better than what we got last frame. + /* update reference frame costs since we can do better than what we got + * last frame. + */ if (cpi->oxcf.number_of_layers == 1) { if (cpi->common.refresh_alt_ref_frame) @@ -2841,7 +2840,7 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi) } -// 1 = key, 0 = inter +/* 1 = key, 0 = inter */ static int decide_key_frame(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; @@ -2853,43 +2852,22 @@ static int decide_key_frame(VP8_COMP *cpi) if (cpi->Speed > 11) return 0; - // Clear down mmx registers - vp8_clear_system_state(); //__asm emms; + /* Clear down mmx registers */ + vp8_clear_system_state(); if ((cpi->compressor_speed == 2) && (cpi->Speed >= 5) && (cpi->sf.RD == 0)) { - double change = 1.0 * abs((int)(cpi->intra_error - cpi->last_intra_error)) / (1 + cpi->last_intra_error); - double change2 = 1.0 * abs((int)(cpi->prediction_error - cpi->last_prediction_error)) / (1 + cpi->last_prediction_error); + double change = 1.0 * abs((int)(cpi->mb.intra_error - + cpi->last_intra_error)) / (1 + cpi->last_intra_error); + double change2 = 1.0 * abs((int)(cpi->mb.prediction_error - + cpi->last_prediction_error)) / (1 + cpi->last_prediction_error); double minerror = cm->MBs * 256; -#if 0 - - if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15 - && cpi->prediction_error > minerror - && (change > .25 || change2 > .25)) - { - FILE *f = fopen("intra_inter.stt", "a"); - - if (cpi->prediction_error <= 0) - cpi->prediction_error = 1; - - fprintf(f, "%d %d %d %d %14.4f\n", - cm->current_video_frame, - (int) cpi->prediction_error, - (int) cpi->intra_error, - (int)((10 * cpi->intra_error) / cpi->prediction_error), - change); - - fclose(f); - } - -#endif - - cpi->last_intra_error = cpi->intra_error; - cpi->last_prediction_error = cpi->prediction_error; + cpi->last_intra_error = cpi->mb.intra_error; + cpi->last_prediction_error = cpi->mb.prediction_error; - if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15 - && cpi->prediction_error > minerror + if (10 * cpi->mb.intra_error / (1 + cpi->mb.prediction_error) < 15 + && cpi->mb.prediction_error > minerror && (change > .25 || change2 > .25)) { /*(change > 1.4 || change < .75)&& cpi->this_frame_percent_intra > cpi->last_frame_percent_intra + 3*/ @@ -2900,7 +2878,7 @@ static int decide_key_frame(VP8_COMP *cpi) } - // If the following are true we might as well code a key frame + /* If the following are true we might as well code a key frame */ if (((cpi->this_frame_percent_intra == 100) && (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra + 2))) || ((cpi->this_frame_percent_intra > 95) && @@ -2908,9 +2886,12 @@ static int decide_key_frame(VP8_COMP *cpi) { code_key_frame = 1; } - // in addition if the following are true and this is not a golden frame then code a key frame - // Note that on golden frames there often seems to be a pop in intra useage anyway hence this - // restriction is designed to prevent spurious key frames. The Intra pop needs to be investigated. + /* in addition if the following are true and this is not a golden frame + * then code a key frame Note that on golden frames there often seems + * to be a pop in intra useage anyway hence this restriction is + * designed to prevent spurious key frames. The Intra pop needs to be + * investigated. + */ else if (((cpi->this_frame_percent_intra > 60) && (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra * 2))) || ((cpi->this_frame_percent_intra > 75) && @@ -2942,7 +2923,7 @@ static void Pass1Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { - // write the frame + /* write the frame */ FILE *yframe; int i; char filename[255]; @@ -2970,10 +2951,11 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) fclose(yframe); } #endif -// return of 0 means drop frame +/* return of 0 means drop frame */ -// Function to test for conditions that indeicate we should loop -// back and recode a frame. +/* Function to test for conditions that indeicate we should loop + * back and recode a frame. + */ static int recode_loop_test( VP8_COMP *cpi, int high_limit, int low_limit, int q, int maxq, int minq ) @@ -2981,32 +2963,33 @@ static int recode_loop_test( VP8_COMP *cpi, int force_recode = 0; VP8_COMMON *cm = &cpi->common; - // Is frame recode allowed at all - // Yes if either recode mode 1 is selected or mode two is selcted - // and the frame is a key frame. golden frame or alt_ref_frame + /* Is frame recode allowed at all + * Yes if either recode mode 1 is selected or mode two is selcted + * and the frame is a key frame. golden frame or alt_ref_frame + */ if ( (cpi->sf.recode_loop == 1) || ( (cpi->sf.recode_loop == 2) && ( (cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame ) ) ) { - // General over and under shoot tests + /* General over and under shoot tests */ if ( ((cpi->projected_frame_size > high_limit) && (q < maxq)) || ((cpi->projected_frame_size < low_limit) && (q > minq)) ) { force_recode = 1; } - // Special Constrained quality tests + /* Special Constrained quality tests */ else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - // Undershoot and below auto cq level + /* Undershoot and below auto cq level */ if ( (q > cpi->cq_target_quality) && (cpi->projected_frame_size < ((cpi->this_frame_target * 7) >> 3))) { force_recode = 1; } - // Severe undershoot and between auto and user cq level + /* Severe undershoot and between auto and user cq level */ else if ( (q > cpi->oxcf.cq_level) && (cpi->projected_frame_size < cpi->min_frame_bandwidth) && (cpi->active_best_quality > cpi->oxcf.cq_level)) @@ -3020,21 +3003,28 @@ static int recode_loop_test( VP8_COMP *cpi, return force_recode; } -static void update_reference_frames(VP8_COMMON *cm) +static void update_reference_frames(VP8_COMP *cpi) { + VP8_COMMON *cm = &cpi->common; YV12_BUFFER_CONFIG *yv12_fb = cm->yv12_fb; - // At this point the new frame has been encoded. - // If any buffer copy / swapping is signaled it should be done here. + /* At this point the new frame has been encoded. + * If any buffer copy / swapping is signaled it should be done here. + */ if (cm->frame_type == KEY_FRAME) { - yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FLAG | VP8_ALT_FLAG ; + yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME | VP8_ALTR_FRAME ; - yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; - yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; + yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; + yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; cm->alt_fb_idx = cm->gld_fb_idx = cm->new_fb_idx; + +#if CONFIG_MULTI_RES_ENCODING + cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame; + cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame; +#endif } else /* For non key frames */ { @@ -3042,9 +3032,13 @@ static void update_reference_frames(VP8_COMMON *cm) { assert(!cm->copy_buffer_to_arf); - cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALT_FLAG; - cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; + cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALTR_FRAME; + cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; cm->alt_fb_idx = cm->new_fb_idx; + +#if CONFIG_MULTI_RES_ENCODING + cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame; +#endif } else if (cm->copy_buffer_to_arf) { @@ -3054,18 +3048,28 @@ static void update_reference_frames(VP8_COMMON *cm) { if(cm->alt_fb_idx != cm->lst_fb_idx) { - yv12_fb[cm->lst_fb_idx].flags |= VP8_ALT_FLAG; - yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; + yv12_fb[cm->lst_fb_idx].flags |= VP8_ALTR_FRAME; + yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; cm->alt_fb_idx = cm->lst_fb_idx; + +#if CONFIG_MULTI_RES_ENCODING + cpi->current_ref_frames[ALTREF_FRAME] = + cpi->current_ref_frames[LAST_FRAME]; +#endif } } else /* if (cm->copy_buffer_to_arf == 2) */ { if(cm->alt_fb_idx != cm->gld_fb_idx) { - yv12_fb[cm->gld_fb_idx].flags |= VP8_ALT_FLAG; - yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALT_FLAG; + yv12_fb[cm->gld_fb_idx].flags |= VP8_ALTR_FRAME; + yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; cm->alt_fb_idx = cm->gld_fb_idx; + +#if CONFIG_MULTI_RES_ENCODING + cpi->current_ref_frames[ALTREF_FRAME] = + cpi->current_ref_frames[GOLDEN_FRAME]; +#endif } } } @@ -3074,9 +3078,13 @@ static void update_reference_frames(VP8_COMMON *cm) { assert(!cm->copy_buffer_to_gf); - cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FLAG; - cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; + cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME; + cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; cm->gld_fb_idx = cm->new_fb_idx; + +#if CONFIG_MULTI_RES_ENCODING + cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame; +#endif } else if (cm->copy_buffer_to_gf) { @@ -3086,18 +3094,28 @@ static void update_reference_frames(VP8_COMMON *cm) { if(cm->gld_fb_idx != cm->lst_fb_idx) { - yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FLAG; - yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; + yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FRAME; + yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; cm->gld_fb_idx = cm->lst_fb_idx; + +#if CONFIG_MULTI_RES_ENCODING + cpi->current_ref_frames[GOLDEN_FRAME] = + cpi->current_ref_frames[LAST_FRAME]; +#endif } } else /* if (cm->copy_buffer_to_gf == 2) */ { if(cm->alt_fb_idx != cm->gld_fb_idx) { - yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FLAG; - yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FLAG; + yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FRAME; + yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; cm->gld_fb_idx = cm->alt_fb_idx; + +#if CONFIG_MULTI_RES_ENCODING + cpi->current_ref_frames[GOLDEN_FRAME] = + cpi->current_ref_frames[ALTREF_FRAME]; +#endif } } } @@ -3105,14 +3123,71 @@ static void update_reference_frames(VP8_COMMON *cm) if (cm->refresh_last_frame) { - cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FLAG; - cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FLAG; + cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FRAME; + cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FRAME; cm->lst_fb_idx = cm->new_fb_idx; + +#if CONFIG_MULTI_RES_ENCODING + cpi->current_ref_frames[LAST_FRAME] = cm->current_video_frame; +#endif } + +#if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) + { + /* we shouldn't have to keep multiple copies as we know in advance which + * buffer we should start - for now to get something up and running + * I've chosen to copy the buffers + */ + if (cm->frame_type == KEY_FRAME) + { + int i; + vp8_yv12_copy_frame( + cpi->Source, + &cpi->denoiser.yv12_running_avg[LAST_FRAME]); + + vp8_yv12_extend_frame_borders( + &cpi->denoiser.yv12_running_avg[LAST_FRAME]); + + for (i = 2; i < MAX_REF_FRAMES - 1; i++) + vp8_yv12_copy_frame( + &cpi->denoiser.yv12_running_avg[LAST_FRAME], + &cpi->denoiser.yv12_running_avg[i]); + } + else /* For non key frames */ + { + vp8_yv12_extend_frame_borders( + &cpi->denoiser.yv12_running_avg[INTRA_FRAME]); + + if (cm->refresh_alt_ref_frame || cm->copy_buffer_to_arf) + { + vp8_yv12_copy_frame( + &cpi->denoiser.yv12_running_avg[INTRA_FRAME], + &cpi->denoiser.yv12_running_avg[ALTREF_FRAME]); + } + if (cm->refresh_golden_frame || cm->copy_buffer_to_gf) + { + vp8_yv12_copy_frame( + &cpi->denoiser.yv12_running_avg[INTRA_FRAME], + &cpi->denoiser.yv12_running_avg[GOLDEN_FRAME]); + } + if(cm->refresh_last_frame) + { + vp8_yv12_copy_frame( + &cpi->denoiser.yv12_running_avg[INTRA_FRAME], + &cpi->denoiser.yv12_running_avg[LAST_FRAME]); + } + } + + } +#endif + } void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) { + const FRAME_TYPE frame_type = cm->frame_type; + if (cm->no_lpf) { cm->filter_level = 0; @@ -3130,6 +3205,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) else vp8cx_pick_filter_level(cpi->Source, cpi); + if (cm->filter_level > 0) + { + vp8cx_set_alt_lf_level(cpi, cm->filter_level); + } + vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); } @@ -3141,17 +3221,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) if (cm->filter_level > 0) { - vp8cx_set_alt_lf_level(cpi, cm->filter_level); - vp8_loop_filter_frame(cm, &cpi->mb.e_mbd); + vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, frame_type); } vp8_yv12_extend_frame_borders(cm->frame_to_show); -#if CONFIG_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity) - { - vp8_yv12_extend_frame_borders(&cpi->denoiser.yv12_running_avg); - } -#endif + } static void encode_frame_to_data_rate @@ -3184,13 +3258,14 @@ static void encode_frame_to_data_rate int undershoot_seen = 0; #endif - int drop_mark = cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100; + int drop_mark = (int)(cpi->oxcf.drop_frames_water_mark * + cpi->oxcf.optimal_buffer_level / 100); int drop_mark75 = drop_mark * 2 / 3; int drop_mark50 = drop_mark / 4; int drop_mark25 = drop_mark / 8; - // Clear down mmx registers to allow floating point in what follows + /* Clear down mmx registers to allow floating point in what follows */ vp8_clear_system_state(); #if CONFIG_MULTITHREAD @@ -3202,108 +3277,125 @@ static void encode_frame_to_data_rate } #endif - // Test code for segmentation of gf/arf (0,0) - //segmentation_test_function( cpi); - if(cpi->force_next_frame_intra) { cm->frame_type = KEY_FRAME; /* delayed intra frame */ cpi->force_next_frame_intra = 0; } - // For an alt ref frame in 2 pass we skip the call to the second pass function that sets the target bandwidth + /* For an alt ref frame in 2 pass we skip the call to the second pass + * function that sets the target bandwidth + */ #if !(CONFIG_REALTIME_ONLY) if (cpi->pass == 2) { if (cpi->common.refresh_alt_ref_frame) { - cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame - cpi->target_bandwidth = cpi->twopass.gf_bits * cpi->output_frame_rate; // per second target bitrate + /* Per frame bit target for the alt ref frame */ + cpi->per_frame_bandwidth = cpi->twopass.gf_bits; + /* per second target bitrate */ + cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * + cpi->output_frame_rate); } } else #endif cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_frame_rate); - // Default turn off buffer to buffer copying + /* Default turn off buffer to buffer copying */ cm->copy_buffer_to_gf = 0; cm->copy_buffer_to_arf = 0; - // Clear zbin over-quant value and mode boost values. - cpi->zbin_over_quant = 0; - cpi->zbin_mode_boost = 0; + /* Clear zbin over-quant value and mode boost values. */ + cpi->mb.zbin_over_quant = 0; + cpi->mb.zbin_mode_boost = 0; - // Enable or disable mode based tweaking of the zbin - // For 2 Pass Only used where GF/ARF prediction quality - // is above a threshold - cpi->zbin_mode_boost_enabled = 1; + /* Enable or disable mode based tweaking of the zbin + * For 2 Pass Only used where GF/ARF prediction quality + * is above a threshold + */ + cpi->mb.zbin_mode_boost_enabled = 1; if (cpi->pass == 2) { if ( cpi->gfu_boost <= 400 ) { - cpi->zbin_mode_boost_enabled = 0; + cpi->mb.zbin_mode_boost_enabled = 0; } } - // Current default encoder behaviour for the altref sign bias + /* Current default encoder behaviour for the altref sign bias */ if (cpi->source_alt_ref_active) cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; else cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0; - // Check to see if a key frame is signalled - // For two pass with auto key frame enabled cm->frame_type may already be set, but not for one pass. + /* Check to see if a key frame is signalled + * For two pass with auto key frame enabled cm->frame_type may already + * be set, but not for one pass. + */ if ((cm->current_video_frame == 0) || (cm->frame_flags & FRAMEFLAGS_KEY) || (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0))) { - // Key frame from VFW/auto-keyframe/first frame + /* Key frame from VFW/auto-keyframe/first frame */ cm->frame_type = KEY_FRAME; } - // Set default state for segment and mode based loop filter update flags - cpi->mb.e_mbd.update_mb_segmentation_map = 0; - cpi->mb.e_mbd.update_mb_segmentation_data = 0; - cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; +#if CONFIG_MULTI_RES_ENCODING + /* In multi-resolution encoding, frame_type is decided by lowest-resolution + * encoder. Same frame_type is adopted while encoding at other resolution. + */ + if (cpi->oxcf.mr_encoder_id) + { + LOWER_RES_FRAME_INFO* low_res_frame_info + = (LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info; + + cm->frame_type = low_res_frame_info->frame_type; - // Set various flags etc to special state if it is a key frame + if(cm->frame_type != KEY_FRAME) + { + cpi->mr_low_res_mv_avail = 1; + cpi->mr_low_res_mv_avail &= !(low_res_frame_info->is_frame_dropped); + + if (cpi->ref_frame_flags & VP8_LAST_FRAME) + cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[LAST_FRAME] + == low_res_frame_info->low_res_ref_frames[LAST_FRAME]); + + if (cpi->ref_frame_flags & VP8_GOLD_FRAME) + cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[GOLDEN_FRAME] + == low_res_frame_info->low_res_ref_frames[GOLDEN_FRAME]); + + if (cpi->ref_frame_flags & VP8_ALTR_FRAME) + cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[ALTREF_FRAME] + == low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]); + } + } +#endif + + /* Set various flags etc to special state if it is a key frame */ if (cm->frame_type == KEY_FRAME) { int i; - // Reset the loop filter deltas and segmentation map + // Set the loop filter deltas and segmentation map update setup_features(cpi); - // If segmentation is enabled force a map update for key frames - if (cpi->mb.e_mbd.segmentation_enabled) - { - cpi->mb.e_mbd.update_mb_segmentation_map = 1; - cpi->mb.e_mbd.update_mb_segmentation_data = 1; - } - - // The alternate reference frame cannot be active for a key frame + /* The alternate reference frame cannot be active for a key frame */ cpi->source_alt_ref_active = 0; - // Reset the RD threshold multipliers to default of * 1 (128) + /* Reset the RD threshold multipliers to default of * 1 (128) */ for (i = 0; i < MAX_MODES; i++) { - cpi->rd_thresh_mult[i] = 128; + cpi->mb.rd_thresh_mult[i] = 128; } } - // Test code for segmentation - //if ( (cm->frame_type == KEY_FRAME) || ((cm->current_video_frame % 2) == 0)) - //if ( (cm->current_video_frame % 2) == 0 ) - // enable_segmentation(cpi); - //else - // disable_segmentation(cpi); - #if 0 - // Experimental code for lagged compress and one pass - // Initialise one_pass GF frames stats - // Update stats used for GF selection - //if ( cpi->pass == 0 ) + /* Experimental code for lagged compress and one pass + * Initialise one_pass GF frames stats + * Update stats used for GF selection + */ { cpi->one_pass_frame_index = cm->current_video_frame % MAX_LAG_BUFFERS; @@ -3323,8 +3415,9 @@ static void encode_frame_to_data_rate if (cpi->drop_frames_allowed) { - // The reset to decimation 0 is only done here for one pass. - // Once it is set two pass leaves decimation on till the next kf. + /* The reset to decimation 0 is only done here for one pass. + * Once it is set two pass leaves decimation on till the next kf. + */ if ((cpi->buffer_level > drop_mark) && (cpi->decimation_factor > 0)) cpi->decimation_factor --; @@ -3343,14 +3436,17 @@ static void encode_frame_to_data_rate { cpi->decimation_factor = 1; } - //vpx_log("Encoder: Decimation Factor: %d \n",cpi->decimation_factor); } - // The following decimates the frame rate according to a regular pattern (i.e. to 1/2 or 2/3 frame rate) - // This can be used to help prevent buffer under-run in CBR mode. Alternatively it might be desirable in - // some situations to drop frame rate but throw more bits at each frame. - // - // Note that dropping a key frame can be problematic if spatial resampling is also active + /* The following decimates the frame rate according to a regular + * pattern (i.e. to 1/2 or 2/3 frame rate) This can be used to help + * prevent buffer under-run in CBR mode. Alternatively it might be + * desirable in some situations to drop frame rate but throw more bits + * at each frame. + * + * Note that dropping a key frame can be problematic if spatial + * resampling is also active + */ if (cpi->decimation_factor > 0) { switch (cpi->decimation_factor) @@ -3366,8 +3462,10 @@ static void encode_frame_to_data_rate break; } - // Note that we should not throw out a key frame (especially when spatial resampling is enabled). - if ((cm->frame_type == KEY_FRAME)) // && cpi->oxcf.allow_spatial_resampling ) + /* Note that we should not throw out a key frame (especially when + * spatial resampling is enabled). + */ + if ((cm->frame_type == KEY_FRAME)) { cpi->decimation_count = cpi->decimation_factor; } @@ -3379,6 +3477,10 @@ static void encode_frame_to_data_rate if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; +#if CONFIG_MULTI_RES_ENCODING + vp8_store_drop_frame_info(cpi); +#endif + cm->current_video_frame++; cpi->frames_since_key++; @@ -3392,7 +3494,9 @@ static void encode_frame_to_data_rate { unsigned int i; - // Propagate bits saved by dropping the frame to higher layers + /* Propagate bits saved by dropping the frame to higher + * layers + */ for (i=cpi->current_layer+1; ioxcf.number_of_layers; i++) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; @@ -3408,24 +3512,32 @@ static void encode_frame_to_data_rate else cpi->decimation_count = cpi->decimation_factor; } + else + cpi->decimation_count = 0; - // Decide how big to make the frame + /* Decide how big to make the frame */ if (!vp8_pick_frame_size(cpi)) { + /*TODO: 2 drop_frame and return code could be put together. */ +#if CONFIG_MULTI_RES_ENCODING + vp8_store_drop_frame_info(cpi); +#endif cm->current_video_frame++; cpi->frames_since_key++; return; } - // Reduce active_worst_allowed_q for CBR if our buffer is getting too full. - // This has a knock on effect on active best quality as well. - // For CBR if the buffer reaches its maximum level then we can no longer - // save up bits for later frames so we might as well use them up - // on the current frame. + /* Reduce active_worst_allowed_q for CBR if our buffer is getting too full. + * This has a knock on effect on active best quality as well. + * For CBR if the buffer reaches its maximum level then we can no longer + * save up bits for later frames so we might as well use them up + * on the current frame. + */ if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode) { - int Adjustment = cpi->active_worst_quality / 4; // Max adjustment is 1/4 + /* Max adjustment is 1/4 */ + int Adjustment = cpi->active_worst_quality / 4; if (Adjustment) { @@ -3433,10 +3545,16 @@ static void encode_frame_to_data_rate if (cpi->buffer_level < cpi->oxcf.maximum_buffer_size) { - buff_lvl_step = (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level) / Adjustment; + buff_lvl_step = (int) + ((cpi->oxcf.maximum_buffer_size - + cpi->oxcf.optimal_buffer_level) / + Adjustment); if (buff_lvl_step) - Adjustment = (cpi->buffer_level - cpi->oxcf.optimal_buffer_level) / buff_lvl_step; + Adjustment = (int) + ((cpi->buffer_level - + cpi->oxcf.optimal_buffer_level) / + buff_lvl_step); else Adjustment = 0; } @@ -3448,8 +3566,9 @@ static void encode_frame_to_data_rate } } - // Set an active best quality and if necessary active worst quality - // There is some odd behavior for one pass here that needs attention. + /* Set an active best quality and if necessary active worst quality + * There is some odd behavior for one pass here that needs attention. + */ if ( (cpi->pass == 2) || (cpi->ni_frames > 150)) { vp8_clear_system_state(); @@ -3465,9 +3584,10 @@ static void encode_frame_to_data_rate else cpi->active_best_quality = kf_high_motion_minq[Q]; - // Special case for key frames forced because we have reached - // the maximum key frame interval. Here force the Q to a range - // based on the ambient Q to reduce the risk of popping + /* Special case for key frames forced because we have reached + * the maximum key frame interval. Here force the Q to a range + * based on the ambient Q to reduce the risk of popping + */ if ( cpi->this_key_frame_forced ) { if ( cpi->active_best_quality > cpi->avg_frame_qindex * 7/8) @@ -3476,7 +3596,7 @@ static void encode_frame_to_data_rate cpi->active_best_quality = cpi->avg_frame_qindex >> 2; } } - // One pass more conservative + /* One pass more conservative */ else cpi->active_best_quality = kf_high_motion_minq[Q]; } @@ -3484,16 +3604,17 @@ static void encode_frame_to_data_rate else if (cpi->oxcf.number_of_layers==1 && (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)) { - // Use the lower of cpi->active_worst_quality and recent - // average Q as basis for GF/ARF Q limit unless last frame was - // a key frame. + /* Use the lower of cpi->active_worst_quality and recent + * average Q as basis for GF/ARF Q limit unless last frame was + * a key frame. + */ if ( (cpi->frames_since_key > 1) && (cpi->avg_frame_qindex < cpi->active_worst_quality) ) { Q = cpi->avg_frame_qindex; } - // For constrained quality dont allow Q less than the cq level + /* For constrained quality dont allow Q less than the cq level */ if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && (Q < cpi->cq_target_quality) ) { @@ -3509,14 +3630,14 @@ static void encode_frame_to_data_rate else cpi->active_best_quality = gf_mid_motion_minq[Q]; - // Constrained quality use slightly lower active best. + /* Constrained quality use slightly lower active best. */ if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY ) { cpi->active_best_quality = cpi->active_best_quality * 15/16; } } - // One pass more conservative + /* One pass more conservative */ else cpi->active_best_quality = gf_high_motion_minq[Q]; } @@ -3524,14 +3645,16 @@ static void encode_frame_to_data_rate { cpi->active_best_quality = inter_minq[Q]; - // For the constant/constrained quality mode we dont want - // q to fall below the cq level. + /* For the constant/constrained quality mode we dont want + * q to fall below the cq level. + */ if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && (cpi->active_best_quality < cpi->cq_target_quality) ) { - // If we are strongly undershooting the target rate in the last - // frames then use the user passed in cq value not the auto - // cq value. + /* If we are strongly undershooting the target rate in the last + * frames then use the user passed in cq value not the auto + * cq value. + */ if ( cpi->rolling_actual_bits < cpi->min_frame_bandwidth ) cpi->active_best_quality = cpi->oxcf.cq_level; else @@ -3539,26 +3662,33 @@ static void encode_frame_to_data_rate } } - // If CBR and the buffer is as full then it is reasonable to allow - // higher quality on the frames to prevent bits just going to waste. + /* If CBR and the buffer is as full then it is reasonable to allow + * higher quality on the frames to prevent bits just going to waste. + */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - // Note that the use of >= here elliminates the risk of a devide - // by 0 error in the else if clause + /* Note that the use of >= here elliminates the risk of a devide + * by 0 error in the else if clause + */ if (cpi->buffer_level >= cpi->oxcf.maximum_buffer_size) cpi->active_best_quality = cpi->best_quality; else if (cpi->buffer_level > cpi->oxcf.optimal_buffer_level) { - int Fraction = ((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128) / (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level); - int min_qadjustment = ((cpi->active_best_quality - cpi->best_quality) * Fraction) / 128; + int Fraction = (int) + (((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128) + / (cpi->oxcf.maximum_buffer_size - + cpi->oxcf.optimal_buffer_level)); + int min_qadjustment = ((cpi->active_best_quality - + cpi->best_quality) * Fraction) / 128; cpi->active_best_quality -= min_qadjustment; } } } - // Make sure constrained quality mode limits are adhered to for the first - // few frames of one pass encodes + /* Make sure constrained quality mode limits are adhered to for the first + * few frames of one pass encodes + */ else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { if ( (cm->frame_type == KEY_FRAME) || @@ -3572,7 +3702,7 @@ static void encode_frame_to_data_rate } } - // Clip the active best and worst quality values to limits + /* Clip the active best and worst quality values to limits */ if (cpi->active_worst_quality > cpi->worst_quality) cpi->active_worst_quality = cpi->worst_quality; @@ -3582,14 +3712,14 @@ static void encode_frame_to_data_rate if ( cpi->active_worst_quality < cpi->active_best_quality ) cpi->active_worst_quality = cpi->active_best_quality; - // Determine initial Q to try + /* Determine initial Q to try */ Q = vp8_regulate_q(cpi, cpi->this_frame_target); #if !(CONFIG_REALTIME_ONLY) - // Set highest allowed value for Zbin over quant + /* Set highest allowed value for Zbin over quant */ if (cm->frame_type == KEY_FRAME) - zbin_oq_high = 0; //ZBIN_OQ_MAX/16 + zbin_oq_high = 0; else if ((cpi->oxcf.number_of_layers == 1) && ((cm->refresh_alt_ref_frame || (cm->refresh_golden_frame && !cpi->source_alt_ref_active)))) { @@ -3599,15 +3729,21 @@ static void encode_frame_to_data_rate zbin_oq_high = ZBIN_OQ_MAX; #endif - // Setup background Q adjustment for error resilient mode. - // For multi-layer encodes only enable this for the base layer. - if (cpi->cyclic_refresh_mode_enabled && (cpi->current_layer==0)) + /* Setup background Q adjustment for error resilient mode. + * For multi-layer encodes only enable this for the base layer. + */ + if (cpi->cyclic_refresh_mode_enabled) + { + if (cpi->current_layer==0) cyclic_background_refresh(cpi, Q, 0); + else + disable_segmentation(cpi); + } vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); #if !(CONFIG_REALTIME_ONLY) - // Limit Q range for the adaptive loop. + /* Limit Q range for the adaptive loop. */ bottom_index = cpi->active_best_quality; top_index = cpi->active_worst_quality; q_low = cpi->active_best_quality; @@ -3652,11 +3788,11 @@ static void encode_frame_to_data_rate if (cm->frame_type == KEY_FRAME) { - vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0); + vp8_de_noise(cm, cpi->Source, cpi->Source, l , 1, 0); } else { - vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0); + vp8_de_noise(cm, cpi->Source, cpi->Source, l , 1, 0); src = cpi->Source->y_buffer; @@ -3675,16 +3811,11 @@ static void encode_frame_to_data_rate do { - vp8_clear_system_state(); //__asm emms; - - /* - if(cpi->is_src_frame_alt_ref) - Q = 127; - */ + vp8_clear_system_state(); vp8_set_quantizer(cpi, Q); - // setup skip prob for costing in mode/mv decision + /* setup skip prob for costing in mode/mv decision */ if (cpi->common.mb_no_coeff_skip) { cpi->prob_skip_false = cpi->base_skip_false_prob[Q]; @@ -3728,7 +3859,9 @@ static void encode_frame_to_data_rate */ } - //as this is for cost estimate, let's make sure it does not go extreme eitehr way + /* as this is for cost estimate, let's make sure it does not + * go extreme eitehr way + */ if (cpi->prob_skip_false < 5) cpi->prob_skip_false = 5; @@ -3754,7 +3887,22 @@ static void encode_frame_to_data_rate if (cm->frame_type == KEY_FRAME) { - resize_key_frame(cpi); + if(resize_key_frame(cpi)) + { + /* If the frame size has changed, need to reset Q, quantizer, + * and background refresh. + */ + Q = vp8_regulate_q(cpi, cpi->this_frame_target); + if (cpi->cyclic_refresh_mode_enabled) + { + if (cpi->current_layer==0) + cyclic_background_refresh(cpi, Q, 0); + else + disable_segmentation(cpi); + } + vp8_set_quantizer(cpi, Q); + } + vp8_setup_key_frame(cpi); } @@ -3773,7 +3921,7 @@ static void encode_frame_to_data_rate if (cm->refresh_entropy_probs == 0) { - // save a copy for later refresh + /* save a copy for later refresh */ vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc)); } @@ -3781,61 +3929,52 @@ static void encode_frame_to_data_rate vp8_update_coef_probs(cpi); - // transform / motion compensation build reconstruction frame - // +pack coef partitions + /* transform / motion compensation build reconstruction frame + * +pack coef partitions + */ vp8_encode_frame(cpi); /* cpi->projected_frame_size is not needed for RT mode */ } #else - // transform / motion compensation build reconstruction frame + /* transform / motion compensation build reconstruction frame */ vp8_encode_frame(cpi); cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi); cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0; #endif - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); - // Test to see if the stats generated for this frame indicate that we should have coded a key frame - // (assuming that we didn't)! - if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME) - { - int key_frame_decision = decide_key_frame(cpi); + /* Test to see if the stats generated for this frame indicate that + * we should have coded a key frame (assuming that we didn't)! + */ - if (cpi->compressor_speed == 2) - { - /* we don't do re-encoding in realtime mode - * if key frame is decided then we force it on next frame */ - cpi->force_next_frame_intra = key_frame_decision; - } + if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME + && cpi->compressor_speed != 2) + { #if !(CONFIG_REALTIME_ONLY) - else if (key_frame_decision) + if (decide_key_frame(cpi)) { - // Reset all our sizing numbers and recode + /* Reset all our sizing numbers and recode */ cm->frame_type = KEY_FRAME; vp8_pick_frame_size(cpi); - // Clear the Alt reference frame active flag when we have a key frame + /* Clear the Alt reference frame active flag when we have + * a key frame + */ cpi->source_alt_ref_active = 0; - // Reset the loop filter deltas and segmentation map + // Set the loop filter deltas and segmentation map update setup_features(cpi); - // If segmentation is enabled force a map update for key frames - if (cpi->mb.e_mbd.segmentation_enabled) - { - cpi->mb.e_mbd.update_mb_segmentation_map = 1; - cpi->mb.e_mbd.update_mb_segmentation_data = 1; - } - vp8_restore_coding_context(cpi); Q = vp8_regulate_q(cpi, cpi->this_frame_target); vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); - // Limit Q range for the adaptive loop. + /* Limit Q range for the adaptive loop. */ bottom_index = cpi->active_best_quality; top_index = cpi->active_worst_quality; q_low = cpi->active_best_quality; @@ -3854,7 +3993,7 @@ static void encode_frame_to_data_rate if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; - // Are we are overshooting and up against the limit of active max Q. + /* Are we are overshooting and up against the limit of active max Q. */ if (((cpi->pass != 2) || (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) && (Q == cpi->active_worst_quality) && (cpi->active_worst_quality < cpi->worst_quality) && @@ -3862,50 +4001,52 @@ static void encode_frame_to_data_rate { int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit; - // If so is there any scope for relaxing it + /* If so is there any scope for relaxing it */ while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0)) { cpi->active_worst_quality++; - - over_size_percent = (int)(over_size_percent * 0.96); // Assume 1 qstep = about 4% on frame size. + /* Assume 1 qstep = about 4% on frame size. */ + over_size_percent = (int)(over_size_percent * 0.96); } #if !(CONFIG_REALTIME_ONLY) top_index = cpi->active_worst_quality; #endif - // If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop. + /* If we have updated the active max Q do not call + * vp8_update_rate_correction_factors() this loop. + */ active_worst_qchanged = 1; } else active_worst_qchanged = 0; #if !(CONFIG_REALTIME_ONLY) - // Special case handling for forced key frames + /* Special case handling for forced key frames */ if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced ) { int last_q = Q; int kf_err = vp8_calc_ss_err(cpi->Source, &cm->yv12_fb[cm->new_fb_idx]); - // The key frame is not good enough + /* The key frame is not good enough */ if ( kf_err > ((cpi->ambient_err * 7) >> 3) ) { - // Lower q_high + /* Lower q_high */ q_high = (Q > q_low) ? (Q - 1) : q_low; - // Adjust Q + /* Adjust Q */ Q = (q_high + q_low) >> 1; } - // The key frame is much better than the previous frame + /* The key frame is much better than the previous frame */ else if ( kf_err < (cpi->ambient_err >> 1) ) { - // Raise q_low + /* Raise q_low */ q_low = (Q < q_high) ? (Q + 1) : q_high; - // Adjust Q + /* Adjust Q */ Q = (q_high + q_low + 1) >> 1; } - // Clamp Q to upper and lower limits: + /* Clamp Q to upper and lower limits: */ if (Q > q_high) Q = q_high; else if (Q < q_low) @@ -3914,7 +4055,9 @@ static void encode_frame_to_data_rate Loop = Q != last_q; } - // Is the projected frame size out of range and are we allowed to attempt to recode. + /* Is the projected frame size out of range and are we allowed + * to attempt to recode. + */ else if ( recode_loop_test( cpi, frame_over_shoot_limit, frame_under_shoot_limit, Q, top_index, bottom_index ) ) @@ -3922,45 +4065,57 @@ static void encode_frame_to_data_rate int last_q = Q; int Retries = 0; - // Frame size out of permitted range: - // Update correction factor & compute new Q to try... + /* Frame size out of permitted range. Update correction factor + * & compute new Q to try... + */ - // Frame is too large + /* Frame is too large */ if (cpi->projected_frame_size > cpi->this_frame_target) { - //if ( cpi->zbin_over_quant == 0 ) - q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value + /* Raise Qlow as to at least the current value */ + q_low = (Q < q_high) ? (Q + 1) : q_high; - if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low - zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high; + /* If we are using over quant do the same for zbin_oq_low */ + if (cpi->mb.zbin_over_quant > 0) + zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ? + (cpi->mb.zbin_over_quant + 1) : zbin_oq_high; - //if ( undershoot_seen || (Q == MAXQ) ) if (undershoot_seen) { - // Update rate_correction_factor unless cpi->active_worst_quality has changed. + /* Update rate_correction_factor unless + * cpi->active_worst_quality has changed. + */ if (!active_worst_qchanged) vp8_update_rate_correction_factors(cpi, 1); Q = (q_high + q_low + 1) / 2; - // Adjust cpi->zbin_over_quant (only allowed when Q is max) + /* Adjust cpi->zbin_over_quant (only allowed when Q + * is max) + */ if (Q < MAXQ) - cpi->zbin_over_quant = 0; + cpi->mb.zbin_over_quant = 0; else { - zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high; - cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2; + zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ? + (cpi->mb.zbin_over_quant + 1) : zbin_oq_high; + cpi->mb.zbin_over_quant = + (zbin_oq_high + zbin_oq_low) / 2; } } else { - // Update rate_correction_factor unless cpi->active_worst_quality has changed. + /* Update rate_correction_factor unless + * cpi->active_worst_quality has changed. + */ if (!active_worst_qchanged) vp8_update_rate_correction_factors(cpi, 0); Q = vp8_regulate_q(cpi, cpi->this_frame_target); - while (((Q < q_low) || (cpi->zbin_over_quant < zbin_oq_low)) && (Retries < 10)) + while (((Q < q_low) || + (cpi->mb.zbin_over_quant < zbin_oq_low)) && + (Retries < 10)) { vp8_update_rate_correction_factors(cpi, 0); Q = vp8_regulate_q(cpi, cpi->this_frame_target); @@ -3970,47 +4125,60 @@ static void encode_frame_to_data_rate overshoot_seen = 1; } - // Frame is too small + /* Frame is too small */ else { - if (cpi->zbin_over_quant == 0) - q_high = (Q > q_low) ? (Q - 1) : q_low; // Lower q_high if not using over quant - else // else lower zbin_oq_high - zbin_oq_high = (cpi->zbin_over_quant > zbin_oq_low) ? (cpi->zbin_over_quant - 1) : zbin_oq_low; + if (cpi->mb.zbin_over_quant == 0) + /* Lower q_high if not using over quant */ + q_high = (Q > q_low) ? (Q - 1) : q_low; + else + /* else lower zbin_oq_high */ + zbin_oq_high = (cpi->mb.zbin_over_quant > zbin_oq_low) ? + (cpi->mb.zbin_over_quant - 1) : zbin_oq_low; if (overshoot_seen) { - // Update rate_correction_factor unless cpi->active_worst_quality has changed. + /* Update rate_correction_factor unless + * cpi->active_worst_quality has changed. + */ if (!active_worst_qchanged) vp8_update_rate_correction_factors(cpi, 1); Q = (q_high + q_low) / 2; - // Adjust cpi->zbin_over_quant (only allowed when Q is max) + /* Adjust cpi->zbin_over_quant (only allowed when Q + * is max) + */ if (Q < MAXQ) - cpi->zbin_over_quant = 0; + cpi->mb.zbin_over_quant = 0; else - cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2; + cpi->mb.zbin_over_quant = + (zbin_oq_high + zbin_oq_low) / 2; } else { - // Update rate_correction_factor unless cpi->active_worst_quality has changed. + /* Update rate_correction_factor unless + * cpi->active_worst_quality has changed. + */ if (!active_worst_qchanged) vp8_update_rate_correction_factors(cpi, 0); Q = vp8_regulate_q(cpi, cpi->this_frame_target); - // Special case reset for qlow for constrained quality. - // This should only trigger where there is very substantial - // undershoot on a frame and the auto cq level is above - // the user passsed in value. + /* Special case reset for qlow for constrained quality. + * This should only trigger where there is very substantial + * undershoot on a frame and the auto cq level is above + * the user passsed in value. + */ if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && (Q < q_low) ) { q_low = Q; } - while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10)) + while (((Q > q_high) || + (cpi->mb.zbin_over_quant > zbin_oq_high)) && + (Retries < 10)) { vp8_update_rate_correction_factors(cpi, 0); Q = vp8_regulate_q(cpi, cpi->this_frame_target); @@ -4021,14 +4189,16 @@ static void encode_frame_to_data_rate undershoot_seen = 1; } - // Clamp Q to upper and lower limits: + /* Clamp Q to upper and lower limits: */ if (Q > q_high) Q = q_high; else if (Q < q_low) Q = q_low; - // Clamp cpi->zbin_over_quant - cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ? zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ? zbin_oq_high : cpi->zbin_over_quant; + /* Clamp cpi->zbin_over_quant */ + cpi->mb.zbin_over_quant = (cpi->mb.zbin_over_quant < zbin_oq_low) ? + zbin_oq_low : (cpi->mb.zbin_over_quant > zbin_oq_high) ? + zbin_oq_high : cpi->mb.zbin_over_quant; Loop = Q != last_q; } @@ -4051,30 +4221,20 @@ static void encode_frame_to_data_rate while (Loop == 1); #if 0 - // Experimental code for lagged and one pass - // Update stats used for one pass GF selection - { - /* - int frames_so_far; - double frame_intra_error; - double frame_coded_error; - double frame_pcnt_inter; - double frame_pcnt_motion; - double frame_mvr; - double frame_mvr_abs; - double frame_mvc; - double frame_mvc_abs; - */ - + /* Experimental code for lagged and one pass + * Update stats used for one pass GF selection + */ + { cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_coded_error = (double)cpi->prediction_error; cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_intra_error = (double)cpi->intra_error; cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_pcnt_inter = (double)(100 - cpi->this_frame_percent_intra) / 100.0; } #endif - // Special case code to reduce pulsing when key frames are forced at a - // fixed interval. Note the reconstruction error if it is the frame before - // the force key frame + /* Special case code to reduce pulsing when key frames are forced at a + * fixed interval. Note the reconstruction error if it is the frame before + * the force key frame + */ if ( cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0) ) { cpi->ambient_err = vp8_calc_ss_err(cpi->Source, @@ -4113,13 +4273,38 @@ static void encode_frame_to_data_rate } } + /* Count last ref frame 0,0 usage on current encoded frame. */ + { + int mb_row; + int mb_col; + /* Point to beginning of MODE_INFO arrays. */ + MODE_INFO *tmp = cm->mi; + + cpi->zeromv_count = 0; + + if(cm->frame_type != KEY_FRAME) + { + for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) + { + for (mb_col = 0; mb_col < cm->mb_cols; mb_col ++) + { + if(tmp->mbmi.mode == ZEROMV) + cpi->zeromv_count++; + tmp++; + } + tmp++; + } + } + } + #if CONFIG_MULTI_RES_ENCODING vp8_cal_dissimilarity(cpi); #endif - // Update the GF useage maps. - // This is done after completing the compression of a frame when all - // modes etc. are finalized but before loop filter + /* Update the GF useage maps. + * This is done after completing the compression of a frame when all + * modes etc. are finalized but before loop filter + */ if (cpi->oxcf.number_of_layers == 1) vp8_update_gf_useage_maps(cpi, cm, &cpi->mb); @@ -4134,9 +4319,10 @@ static void encode_frame_to_data_rate } #endif - // For inter frames the current default behavior is that when - // cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer - // This is purely an encoder decision at present. + /* For inter frames the current default behavior is that when + * cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer + * This is purely an encoder decision at present. + */ if (!cpi->oxcf.error_resilient_mode && cm->refresh_golden_frame) cm->copy_buffer_to_arf = 2; else @@ -4147,7 +4333,8 @@ static void encode_frame_to_data_rate #if CONFIG_MULTITHREAD if (cpi->b_multi_threaded) { - sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */ + /* start loopfilter in separate thread */ + sem_post(&cpi->h_event_start_lpf); cpi->b_lpf_running = 1; } else @@ -4156,7 +4343,7 @@ static void encode_frame_to_data_rate vp8_loopfilter_frame(cpi, cm); } - update_reference_frames(cm); + update_reference_frames(cpi); #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) if (cpi->oxcf.error_resilient_mode) @@ -4171,7 +4358,7 @@ static void encode_frame_to_data_rate sem_wait(&cpi->h_event_end_lpf); #endif - // build the bitstream + /* build the bitstream */ vp8_pack_bitstream(cpi, dest, dest_end, size); #if CONFIG_MULTITHREAD @@ -4187,7 +4374,7 @@ static void encode_frame_to_data_rate * needed in motion search besides loopfilter */ cm->last_frame_type = cm->frame_type; - // Update rate control heuristics + /* Update rate control heuristics */ cpi->total_byte_count += (*size); cpi->projected_frame_size = (*size) << 3; @@ -4208,18 +4395,21 @@ static void encode_frame_to_data_rate vp8_adjust_key_frame_context(cpi); } - // Keep a record of ambient average Q. + /* Keep a record of ambient average Q. */ if (cm->frame_type != KEY_FRAME) cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2; - // Keep a record from which we can calculate the average Q excluding GF updates and key frames + /* Keep a record from which we can calculate the average Q excluding + * GF updates and key frames + */ if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || (!cm->refresh_golden_frame && !cm->refresh_alt_ref_frame))) { cpi->ni_frames++; - // Calculate the average Q for normal inter frames (not key or GFU - // frames). + /* Calculate the average Q for normal inter frames (not key or GFU + * frames). + */ if ( cpi->pass == 2 ) { cpi->ni_tot_qi += Q; @@ -4227,81 +4417,62 @@ static void encode_frame_to_data_rate } else { - // Damp value for first few frames + /* Damp value for first few frames */ if (cpi->ni_frames > 150 ) { cpi->ni_tot_qi += Q; cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); } - // For one pass, early in the clip ... average the current frame Q - // value with the worstq entered by the user as a dampening measure + /* For one pass, early in the clip ... average the current frame Q + * value with the worstq entered by the user as a dampening measure + */ else { cpi->ni_tot_qi += Q; cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2; } - // If the average Q is higher than what was used in the last frame - // (after going through the recode loop to keep the frame size within range) - // then use the last frame value - 1. - // The -1 is designed to stop Q and hence the data rate, from progressively - // falling away during difficult sections, but at the same time reduce the number of - // itterations around the recode loop. + /* If the average Q is higher than what was used in the last + * frame (after going through the recode loop to keep the frame + * size within range) then use the last frame value - 1. The -1 + * is designed to stop Q and hence the data rate, from + * progressively falling away during difficult sections, but at + * the same time reduce the number of itterations around the + * recode loop. + */ if (Q > cpi->ni_av_qi) cpi->ni_av_qi = Q - 1; } } -#if 0 - - // If the frame was massively oversize and we are below optimal buffer level drop next frame - if ((cpi->drop_frames_allowed) && - (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && - (cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) && - (cpi->projected_frame_size > (4 * cpi->this_frame_target))) - { - cpi->drop_frame = 1; - } - -#endif - - // Set the count for maximum consecutive dropped frames based upon the ratio of - // this frame size to the target average per frame bandwidth. - // (cpi->av_per_frame_bandwidth > 0) is just a sanity check to prevent / 0. - if (cpi->drop_frames_allowed && (cpi->av_per_frame_bandwidth > 0)) - { - cpi->max_drop_count = cpi->projected_frame_size / cpi->av_per_frame_bandwidth; - - if (cpi->max_drop_count > cpi->max_consec_dropped_frames) - cpi->max_drop_count = cpi->max_consec_dropped_frames; - } - - // Update the buffer level variable. - // Non-viewable frames are a special case and are treated as pure overhead. + /* Update the buffer level variable. */ + /* Non-viewable frames are a special case and are treated as pure overhead. */ if ( !cm->show_frame ) cpi->bits_off_target -= cpi->projected_frame_size; else cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size; - // Clip the buffer level to the maximum specified buffer size + /* Clip the buffer level to the maximum specified buffer size */ if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; - // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass. + /* Rolling monitors of whether we are over or underspending used to + * help regulate min and Max Q in two pass. + */ cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4; cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4; cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32; cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32; - // Actual bits spent + /* Actual bits spent */ cpi->total_actual_bits += cpi->projected_frame_size; - // Debug stats + /* Debug stats */ cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size); cpi->buffer_level = cpi->bits_off_target; - // Propagate values to higher temporal layers + /* Propagate values to higher temporal layers */ if (cpi->oxcf.number_of_layers > 1) { unsigned int i; @@ -4309,12 +4480,13 @@ static void encode_frame_to_data_rate for (i=cpi->current_layer+1; ioxcf.number_of_layers; i++) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; - int bits_off_for_this_layer = lc->target_bandwidth / lc->frame_rate - - cpi->projected_frame_size; + int bits_off_for_this_layer = + (int)(lc->target_bandwidth / lc->frame_rate - + cpi->projected_frame_size); lc->bits_off_target += bits_off_for_this_layer; - // Clip buffer level to maximum buffer size for the layer + /* Clip buffer level to maximum buffer size for the layer */ if (lc->bits_off_target > lc->maximum_buffer_size) lc->bits_off_target = lc->maximum_buffer_size; @@ -4324,7 +4496,9 @@ static void encode_frame_to_data_rate } } - // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames + /* Update bits left to the kf and gf groups to account for overshoot + * or undershoot on these frames + */ if (cm->frame_type == KEY_FRAME) { cpi->twopass.kf_group_bits += cpi->this_frame_target - cpi->projected_frame_size; @@ -4357,7 +4531,7 @@ static void encode_frame_to_data_rate cpi->last_skip_false_probs[0] = cpi->prob_skip_false; cpi->last_skip_probs_q[0] = cm->base_qindex; - //update the baseline + /* update the baseline */ cpi->base_skip_false_prob[cm->base_qindex] = cpi->prob_skip_false; } @@ -4367,7 +4541,7 @@ static void encode_frame_to_data_rate { FILE *f = fopen("tmp.stt", "a"); - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); if (cpi->twopass.total_left_stats.coded_error != 0.0) fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" @@ -4383,7 +4557,6 @@ static void encode_frame_to_data_rate cpi->active_best_quality, cpi->active_worst_quality, cpi->ni_av_qi, cpi->cq_target_quality, cpi->zbin_over_quant, - //cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, @@ -4406,7 +4579,6 @@ static void encode_frame_to_data_rate cpi->active_best_quality, cpi->active_worst_quality, cpi->ni_av_qi, cpi->cq_target_quality, cpi->zbin_over_quant, - //cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, @@ -4436,10 +4608,6 @@ static void encode_frame_to_data_rate #endif - // If this was a kf or Gf note the Q - if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame) - cm->last_kf_gf_q = cm->base_qindex; - if (cm->refresh_golden_frame == 1) cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN; else @@ -4451,49 +4619,55 @@ static void encode_frame_to_data_rate cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF; - if (cm->refresh_last_frame & cm->refresh_golden_frame) // both refreshed + if (cm->refresh_last_frame & cm->refresh_golden_frame) + /* both refreshed */ cpi->gold_is_last = 1; - else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other + else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) + /* 1 refreshed but not the other */ cpi->gold_is_last = 0; - if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) // both refreshed + if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) + /* both refreshed */ cpi->alt_is_last = 1; - else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) // 1 refreshed but not the other + else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) + /* 1 refreshed but not the other */ cpi->alt_is_last = 0; - if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) // both refreshed + if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) + /* both refreshed */ cpi->gold_is_alt = 1; - else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other + else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) + /* 1 refreshed but not the other */ cpi->gold_is_alt = 0; - cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG; + cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME; if (cpi->gold_is_last) - cpi->ref_frame_flags &= ~VP8_GOLD_FLAG; + cpi->ref_frame_flags &= ~VP8_GOLD_FRAME; if (cpi->alt_is_last) - cpi->ref_frame_flags &= ~VP8_ALT_FLAG; + cpi->ref_frame_flags &= ~VP8_ALTR_FRAME; if (cpi->gold_is_alt) - cpi->ref_frame_flags &= ~VP8_ALT_FLAG; + cpi->ref_frame_flags &= ~VP8_ALTR_FRAME; if (!cpi->oxcf.error_resilient_mode) { if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) - // Update the alternate reference frame stats as appropriate. + /* Update the alternate reference frame stats as appropriate. */ update_alt_ref_frame_stats(cpi); else - // Update the Golden frame stats as appropriate. + /* Update the Golden frame stats as appropriate. */ update_golden_frame_stats(cpi); } if (cm->frame_type == KEY_FRAME) { - // Tell the caller that the frame was coded as a key frame + /* Tell the caller that the frame was coded as a key frame */ *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY; - // As this frame is a key frame the next defaults to an inter frame. + /* As this frame is a key frame the next defaults to an inter frame. */ cm->frame_type = INTER_FRAME; cpi->last_frame_percent_intra = 100; @@ -4505,20 +4679,24 @@ static void encode_frame_to_data_rate cpi->last_frame_percent_intra = cpi->this_frame_percent_intra; } - // Clear the one shot update flags for segmentation map and mode/ref loop filter deltas. + /* Clear the one shot update flags for segmentation map and mode/ref + * loop filter deltas. + */ cpi->mb.e_mbd.update_mb_segmentation_map = 0; cpi->mb.e_mbd.update_mb_segmentation_data = 0; cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; - // Dont increment frame counters if this was an altref buffer update not a real frame + /* Dont increment frame counters if this was an altref buffer update + * not a real frame + */ if (cm->show_frame) { cm->current_video_frame++; cpi->frames_since_key++; } - // reset to normal state now that we are done. + /* reset to normal state now that we are done. */ @@ -4534,67 +4712,11 @@ static void encode_frame_to_data_rate } #endif - // DEBUG - //vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show); + /* DEBUG */ + /* vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show); */ } - - -static void check_gf_quality(VP8_COMP *cpi) -{ - VP8_COMMON *cm = &cpi->common; - int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols); - int gf_ref_usage_pct = (cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] * 100) / (cm->mb_rows * cm->mb_cols); - int last_ref_zz_useage = (cpi->inter_zz_count * 100) / (cm->mb_rows * cm->mb_cols); - - // Gf refresh is not currently being signalled - if (cpi->gf_update_recommended == 0) - { - if (cpi->common.frames_since_golden > 7) - { - // Low use of gf - if ((gf_active_pct < 10) || ((gf_active_pct + gf_ref_usage_pct) < 15)) - { - // ...but last frame zero zero usage is reasonbable so a new gf might be appropriate - if (last_ref_zz_useage >= 25) - { - cpi->gf_bad_count ++; - - if (cpi->gf_bad_count >= 8) // Check that the condition is stable - { - cpi->gf_update_recommended = 1; - cpi->gf_bad_count = 0; - } - } - else - cpi->gf_bad_count = 0; // Restart count as the background is not stable enough - } - else - cpi->gf_bad_count = 0; // Gf useage has picked up so reset count - } - } - // If the signal is set but has not been read should we cancel it. - else if (last_ref_zz_useage < 15) - { - cpi->gf_update_recommended = 0; - cpi->gf_bad_count = 0; - } - -#if 0 - { - FILE *f = fopen("gfneeded.stt", "a"); - fprintf(f, "%10d %10d %10d %10d %10ld \n", - cm->current_video_frame, - cpi->common.frames_since_golden, - gf_active_pct, gf_ref_usage_pct, - cpi->gf_update_recommended); - fclose(f); - } - -#endif -} - #if !(CONFIG_REALTIME_ONLY) static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned char * dest_end, unsigned int *frame_flags) { @@ -4614,7 +4736,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, } #endif -//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us. +/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ #if HAVE_NEON extern void vp8_push_neon(int64_t *store); extern void vp8_pop_neon(int64_t *store); @@ -4721,7 +4843,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l cpi->source = NULL; #if !(CONFIG_REALTIME_ONLY) - // Should we code an alternate reference frame + /* Should we code an alternate reference frame */ if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.play_alternate && cpi->source_alt_ref_pending) @@ -4742,7 +4864,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l cm->refresh_golden_frame = 0; cm->refresh_last_frame = 0; cm->show_frame = 0; - cpi->source_alt_ref_pending = 0; // Clear Pending alt Ref flag. + /* Clear Pending alt Ref flag. */ + cpi->source_alt_ref_pending = 0; cpi->is_src_frame_alt_ref = 0; } } @@ -4814,7 +4937,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l cpi->last_end_time_stamp_seen = cpi->source->ts_start; } - // adjust frame rates based on timestamps given + /* adjust frame rates based on timestamps given */ if (cm->show_frame) { int64_t this_duration; @@ -4832,9 +4955,10 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen; last_duration = cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen; - // do a step update if the duration changes by 10% + /* do a step update if the duration changes by 10% */ if (last_duration) - step = ((this_duration - last_duration) * 10 / last_duration); + step = (int)(((this_duration - last_duration) * + 10 / last_duration)); } if (this_duration) @@ -4849,7 +4973,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l * frame rate. If we haven't seen 1 second yet, then average * over the whole interval seen. */ - interval = cpi->source->ts_end - cpi->first_time_stamp_ever; + interval = (double)(cpi->source->ts_end - + cpi->first_time_stamp_ever); if(interval > 10000000.0) interval = 10000000; @@ -4862,9 +4987,9 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (cpi->oxcf.number_of_layers > 1) { - int i; + unsigned int i; - // Update frame rates for each layer + /* Update frame rates for each layer */ for (i=0; ioxcf.number_of_layers; i++) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; @@ -4886,7 +5011,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l update_layer_contexts (cpi); - // Restore layer specific context & set frame rate + /* Restore layer specific context & set frame rate */ layer = cpi->oxcf.layer_id[ cm->current_video_frame % cpi->oxcf.periodicity]; restore_layer_context (cpi, layer); @@ -4895,12 +5020,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (cpi->compressor_speed == 2) { - if (cpi->oxcf.number_of_layers == 1) - check_gf_quality(cpi); vpx_usec_timer_start(&tsctimer); vpx_usec_timer_start(&ticktimer); } + cpi->lf_zeromv_pct = (cpi->zeromv_count * 100)/cm->MBs; + #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING { int i; @@ -4924,11 +5049,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l } #endif - // start with a 0 size frame + /* start with a 0 size frame */ *size = 0; - // Clear down mmx registers - vp8_clear_system_state(); //__asm emms; + /* Clear down mmx registers */ + vp8_clear_system_state(); cm->frame_type = INTER_FRAME; cm->frame_flags = *frame_flags; @@ -4937,7 +5062,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (cm->refresh_alt_ref_frame) { - //cm->refresh_golden_frame = 1; cm->refresh_golden_frame = 0; cm->refresh_last_frame = 0; } @@ -4982,7 +5106,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l vpx_usec_timer_mark(&tsctimer); vpx_usec_timer_mark(&ticktimer); - duration = vpx_usec_timer_elapsed(&ticktimer); + duration = (int)(vpx_usec_timer_elapsed(&ticktimer)); duration2 = (unsigned int)((double)duration / 2); if (cm->frame_type != KEY_FRAME) @@ -4995,7 +5119,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (duration2) { - //if(*frame_flags!=1) { if (cpi->avg_pick_mode_time == 0) @@ -5012,8 +5135,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l vpx_memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc)); } - // Save the contexts separately for alt ref, gold and last. - // (TODO jbb -> Optimize this with pointers to avoid extra copies. ) + /* Save the contexts separately for alt ref, gold and last. */ + /* (TODO jbb -> Optimize this with pointers to avoid extra copies. ) */ if(cm->refresh_alt_ref_frame) vpx_memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc)); @@ -5023,12 +5146,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if(cm->refresh_last_frame) vpx_memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc)); - // if its a dropped frame honor the requests on subsequent frames + /* if its a dropped frame honor the requests on subsequent frames */ if (*size > 0) { cpi->droppable = !frame_is_reference(cpi); - // return to normal state + /* return to normal state */ cm->refresh_entropy_probs = 1; cm->refresh_alt_ref_frame = 0; cm->refresh_golden_frame = 0; @@ -5037,7 +5160,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l } - // Save layer specific state + /* Save layer specific state */ if (cpi->oxcf.number_of_layers > 1) save_layer_context (cpi); @@ -5062,14 +5185,14 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (cpi->b_calculate_psnr) { - double ye,ue,ve; + uint64_t ye,ue,ve; double frame_psnr; YV12_BUFFER_CONFIG *orig = cpi->Source; YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; int y_samples = orig->y_height * orig->y_width ; int uv_samples = orig->uv_height * orig->uv_width ; int t_samples = y_samples + 2 * uv_samples; - int64_t sq_error, sq_error2; + double sq_error, sq_error2; ye = calc_plane_error(orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height); @@ -5080,13 +5203,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l ve = calc_plane_error(orig->v_buffer, orig->uv_stride, recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height); - sq_error = ye + ue + ve; + sq_error = (double)(ye + ue + ve); frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error); - cpi->total_y += vp8_mse2psnr(y_samples, 255.0, ye); - cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, ue); - cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve); + cpi->total_y += vp8_mse2psnr(y_samples, 255.0, (double)ye); + cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, (double)ue); + cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, (double)ve); cpi->total_sq_error += sq_error; cpi->total += frame_psnr; #if CONFIG_POSTPROC @@ -5095,7 +5218,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l double frame_psnr2, frame_ssim2 = 0; double weight = 0; - vp8_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0); + vp8_deblock(cm, cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0); vp8_clear_system_state(); ye = calc_plane_error(orig->y_buffer, orig->y_stride, @@ -5107,13 +5230,16 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l ve = calc_plane_error(orig->v_buffer, orig->uv_stride, pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height); - sq_error2 = ye + ue + ve; + sq_error2 = (double)(ye + ue + ve); frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error2); - cpi->totalp_y += vp8_mse2psnr(y_samples, 255.0, ye); - cpi->totalp_u += vp8_mse2psnr(uv_samples, 255.0, ue); - cpi->totalp_v += vp8_mse2psnr(uv_samples, 255.0, ve); + cpi->totalp_y += vp8_mse2psnr(y_samples, + 255.0, (double)ye); + cpi->totalp_u += vp8_mse2psnr(uv_samples, + 255.0, (double)ue); + cpi->totalp_v += vp8_mse2psnr(uv_samples, + 255.0, (double)ve); cpi->total_sq_error2 += sq_error2; cpi->totalp += frame_psnr2; @@ -5125,7 +5251,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (cpi->oxcf.number_of_layers > 1) { - int i; + unsigned int i; for (i=cpi->current_layer; ioxcf.number_of_layers; i++) @@ -5153,7 +5279,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (cpi->oxcf.number_of_layers > 1) { - int i; + unsigned int i; for (i=cpi->current_layer; ioxcf.number_of_layers; i++) @@ -5251,7 +5377,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla ret = -1; } -#endif //!CONFIG_POSTPROC +#endif vp8_clear_system_state(); return ret; } @@ -5260,29 +5386,53 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]) { signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; + int internal_delta_q[MAX_MB_SEGMENTS]; + const int range = 63; + int i; + // This method is currently incompatible with the cyclic refresh method + if ( cpi->cyclic_refresh_mode_enabled ) + return -1; + + // Check number of rows and columns match if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols) return -1; + // Range check the delta Q values and convert the external Q range values + // to internal ones. + if ( (abs(delta_q[0]) > range) || (abs(delta_q[1]) > range) || + (abs(delta_q[2]) > range) || (abs(delta_q[3]) > range) ) + return -1; + + // Range check the delta lf values + if ( (abs(delta_lf[0]) > range) || (abs(delta_lf[1]) > range) || + (abs(delta_lf[2]) > range) || (abs(delta_lf[3]) > range) ) + return -1; + if (!map) { disable_segmentation(cpi); return 0; } - // Set the segmentation Map + // Translate the external delta q values to internal values. + for ( i = 0; i < MAX_MB_SEGMENTS; i++ ) + internal_delta_q[i] = + ( delta_q[i] >= 0 ) ? q_trans[delta_q[i]] : -q_trans[-delta_q[i]]; + + /* Set the segmentation Map */ set_segmentation_map(cpi, map); - // Activate segmentation. + /* Activate segmentation. */ enable_segmentation(cpi); - // Set up the quant segment data - feature_data[MB_LVL_ALT_Q][0] = delta_q[0]; - feature_data[MB_LVL_ALT_Q][1] = delta_q[1]; - feature_data[MB_LVL_ALT_Q][2] = delta_q[2]; - feature_data[MB_LVL_ALT_Q][3] = delta_q[3]; + /* Set up the quant segment data */ + feature_data[MB_LVL_ALT_Q][0] = internal_delta_q[0]; + feature_data[MB_LVL_ALT_Q][1] = internal_delta_q[1]; + feature_data[MB_LVL_ALT_Q][2] = internal_delta_q[2]; + feature_data[MB_LVL_ALT_Q][3] = internal_delta_q[3]; - // Set up the loop segment data s + /* Set up the loop segment data s */ feature_data[MB_LVL_ALT_LF][0] = delta_lf[0]; feature_data[MB_LVL_ALT_LF][1] = delta_lf[1]; feature_data[MB_LVL_ALT_LF][2] = delta_lf[2]; @@ -5293,8 +5443,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne cpi->segment_encode_breakout[2] = threshold[2]; cpi->segment_encode_breakout[3] = threshold[3]; - // Initialise the feature data structure - // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 + /* Initialise the feature data structure */ set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); return 0; @@ -5316,7 +5465,6 @@ int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, uns } else { - //cpi->active_map_enabled = 0; return -1 ; } } @@ -5346,7 +5494,9 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) unsigned char *src = source->y_buffer; unsigned char *dst = dest->y_buffer; - // Loop through the Y plane raw and reconstruction data summing (square differences) + /* Loop through the Y plane raw and reconstruction data summing + * (square differences) + */ for (i = 0; i < source->y_height; i += 16) { for (j = 0; j < source->y_width; j += 16) diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 900141b..fb8ad35 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -25,6 +25,7 @@ #include "vp8/common/threading.h" #include "vpx_ports/mem.h" #include "vpx/internal/vpx_codec_internal.h" +#include "vpx/vp8.h" #include "mcomp.h" #include "vp8/common/findnearmv.h" #include "lookahead.h" @@ -32,7 +33,6 @@ #include "vp8/encoder/denoising.h" #endif -//#define SPEEDSTATS 1 #define MIN_GF_INTERVAL 4 #define DEFAULT_GF_INTERVAL 7 @@ -43,7 +43,7 @@ #define AF_THRESH 25 #define AF_THRESH2 100 #define ARF_DECAY_THRESH 12 -#define MAX_MODES 20 + #define MIN_THRESHMULT 32 #define MAX_THRESHMULT 512 @@ -73,7 +73,6 @@ typedef struct int mvcosts[2][MVvals+1]; #ifdef MODE_STATS - // Stats int y_modes[5]; int uv_modes[4]; int b_modes[10]; @@ -232,22 +231,22 @@ enum typedef struct { - // Layer configuration + /* Layer configuration */ double frame_rate; int target_bandwidth; - // Layer specific coding parameters - int starting_buffer_level; - int optimal_buffer_level; - int maximum_buffer_size; - int starting_buffer_level_in_ms; - int optimal_buffer_level_in_ms; - int maximum_buffer_size_in_ms; + /* Layer specific coding parameters */ + int64_t starting_buffer_level; + int64_t optimal_buffer_level; + int64_t maximum_buffer_size; + int64_t starting_buffer_level_in_ms; + int64_t optimal_buffer_level_in_ms; + int64_t maximum_buffer_size_in_ms; int avg_frame_size_for_layer; - int buffer_level; - int bits_off_target; + int64_t buffer_level; + int64_t bits_off_target; int64_t total_actual_bits; int total_target_vs_actual; @@ -307,7 +306,7 @@ typedef struct VP8_COMP MACROBLOCK mb; VP8_COMMON common; - vp8_writer bc[9]; // one boolcoder for each partition + vp8_writer bc[9]; /* one boolcoder for each partition */ VP8_CONFIG oxcf; @@ -321,16 +320,20 @@ typedef struct VP8_COMP YV12_BUFFER_CONFIG scaled_source; YV12_BUFFER_CONFIG *last_frame_unscaled_source; - int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref - int source_alt_ref_active; // an alt ref frame has been encoded and is usable - - int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame + /* frame in src_buffers has been identified to be encoded as an alt ref */ + int source_alt_ref_pending; + /* an alt ref frame has been encoded and is usable */ + int source_alt_ref_active; + /* source of frame to encode is an exact copy of an alt ref frame */ + int is_src_frame_alt_ref; - int gold_is_last; // golden frame same as last frame ( short circuit gold searches) - int alt_is_last; // Alt reference frame same as last ( short circuit altref search) - int gold_is_alt; // don't do both alt and gold search ( just do gold). + /* golden frame same as last frame ( short circuit gold searches) */ + int gold_is_last; + /* Alt reference frame same as last ( short circuit altref search) */ + int alt_is_last; + /* don't do both alt and gold search ( just do gold). */ + int gold_is_alt; - //int refresh_alt_ref_frame; YV12_BUFFER_CONFIG pick_lf_lvl_frame; TOKENEXTRA *tok; @@ -342,55 +345,62 @@ typedef struct VP8_COMP unsigned int this_key_frame_forced; unsigned int next_key_frame_forced; - // Ambient reconstruction err target for force key frames + /* Ambient reconstruction err target for force key frames */ int ambient_err; unsigned int mode_check_freq[MAX_MODES]; - unsigned int mode_test_hit_counts[MAX_MODES]; unsigned int mode_chosen_counts[MAX_MODES]; - unsigned int mbs_tested_so_far; - int rd_thresh_mult[MAX_MODES]; int rd_baseline_thresh[MAX_MODES]; - int rd_threshes[MAX_MODES]; int RDMULT; int RDDIV ; CODING_CONTEXT coding_context; - // Rate targetting variables - int64_t prediction_error; + /* Rate targetting variables */ int64_t last_prediction_error; - int64_t intra_error; int64_t last_intra_error; int this_frame_target; int projected_frame_size; - int last_q[2]; // Separate values for Intra/Inter + int last_q[2]; /* Separate values for Intra/Inter */ double rate_correction_factor; double key_frame_rate_correction_factor; double gf_rate_correction_factor; - int frames_till_gf_update_due; // Count down till next GF - int current_gf_interval; // GF interval chosen when we coded the last GF + /* Count down till next GF */ + int frames_till_gf_update_due; + + /* GF interval chosen when we coded the last GF */ + int current_gf_interval; - int gf_overspend_bits; // Total bits overspent becasue of GF boost (cumulative) + /* Total bits overspent becasue of GF boost (cumulative) */ + int gf_overspend_bits; - int non_gf_bitrate_adjustment; // Used in the few frames following a GF to recover the extra bits spent in that GF + /* Used in the few frames following a GF to recover the extra bits + * spent in that GF + */ + int non_gf_bitrate_adjustment; - int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames - int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame. + /* Extra bits spent on key frames that need to be recovered */ + int kf_overspend_bits; + + /* Current number of bit s to try and recover on each inter frame. */ + int kf_bitrate_adjustment; int max_gf_interval; int baseline_gf_interval; - int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames + int active_arnr_frames; int64_t key_frame_count; int prior_key_frame_distance[KEY_FRAME_CONTEXT]; - int per_frame_bandwidth; // Current section per frame bandwidth target - int av_per_frame_bandwidth; // Average frame size target for clip - int min_frame_bandwidth; // Minimum allocation that should be used for any frame + /* Current section per frame bandwidth target */ + int per_frame_bandwidth; + /* Average frame size target for clip */ + int av_per_frame_bandwidth; + /* Minimum allocation that should be used for any frame */ + int min_frame_bandwidth; int inter_frame_target; double output_frame_rate; int64_t last_time_stamp_seen; @@ -402,12 +412,6 @@ typedef struct VP8_COMP int ni_frames; int avg_frame_qindex; - int zbin_over_quant; - int zbin_mode_boost; - int zbin_mode_boost_enabled; - int last_zbin_over_quant; - int last_zbin_mode_boost; - int64_t total_byte_count; int buffered_mode; @@ -415,7 +419,7 @@ typedef struct VP8_COMP double frame_rate; double ref_frame_rate; int64_t buffer_level; - int bits_off_target; + int64_t bits_off_target; int rolling_target_bits; int rolling_actual_bits; @@ -424,7 +428,7 @@ typedef struct VP8_COMP int long_rolling_actual_bits; int64_t total_actual_bits; - int total_target_vs_actual; // debug stats + int total_target_vs_actual; /* debug stats */ int worst_quality; int active_worst_quality; @@ -433,22 +437,9 @@ typedef struct VP8_COMP int cq_target_quality; - int drop_frames_allowed; // Are we permitted to drop frames? - int drop_frame; // Drop this frame? - int drop_count; // How many frames have we dropped? - int max_drop_count; // How many frames should we drop? - int max_consec_dropped_frames; // Limit number of consecutive frames that can be dropped. - - - int ymode_count [VP8_YMODES]; /* intra MB type cts this frame */ - int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */ + int drop_frames_allowed; /* Are we permitted to drop frames? */ + int drop_frame; /* Drop this frame? */ - unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */ - - unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - - //DECLARE_ALIGNED(16, int, coef_counts_backup [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]); //not used any more - //save vp8_tree_probs_from_distribution result for each frame to avoid repeat calculation vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; char update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; @@ -462,7 +453,7 @@ typedef struct VP8_COMP struct vpx_codec_pkt_list *output_pkt_list; #if 0 - // Experimental code for lagged and one pass + /* Experimental code for lagged and one pass */ ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS]; int one_pass_frame_index; #endif @@ -470,17 +461,14 @@ typedef struct VP8_COMP int decimation_factor; int decimation_count; - // for real time encoding - int avg_encode_time; //microsecond - int avg_pick_mode_time; //microsecond + /* for real time encoding */ + int avg_encode_time; /* microsecond */ + int avg_pick_mode_time; /* microsecond */ int Speed; - unsigned int cpu_freq; //Mhz int compressor_speed; - int interquantizer; int auto_gold; int auto_adjust_gold_quantizer; - int goldfreq; int auto_worst_q; int cpu_used; int pass; @@ -494,29 +482,28 @@ typedef struct VP8_COMP int last_skip_probs_q[3]; int recent_ref_frame_usage[MAX_REF_FRAMES]; - int count_mb_ref_frame_usage[MAX_REF_FRAMES]; int this_frame_percent_intra; int last_frame_percent_intra; int ref_frame_flags; SPEED_FEATURES sf; - int error_bins[1024]; - // Data used for real time conferencing mode to help determine if it would be good to update the gf - int inter_zz_count; - int gf_bad_count; - int gf_update_recommended; - int skip_true_count; + /* Count ZEROMV on all reference frames. */ + int zeromv_count; + int lf_zeromv_pct; unsigned char *segmentation_map; - signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment data (can be deltas or absolute values) - int segment_encode_breakout[MAX_MB_SEGMENTS]; // segment threashold for encode breakout + signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; + int segment_encode_breakout[MAX_MB_SEGMENTS]; unsigned char *active_map; unsigned int active_map_enabled; - // Video conferencing cyclic refresh mode flags etc - // This is a mode designed to clean up the background over time in live encoding scenarious. It uses segmentation + + /* Video conferencing cyclic refresh mode flags. This is a mode + * designed to clean up the background over time in live encoding + * scenarious. It uses segmentation. + */ int cyclic_refresh_mode_enabled; int cyclic_refresh_mode_max_mbs_perframe; int cyclic_refresh_mode_index; @@ -524,7 +511,7 @@ typedef struct VP8_COMP signed char *cyclic_refresh_map; #if CONFIG_MULTITHREAD - // multithread data + /* multithread data */ int * mt_current_mb_col; int mt_sync_range; int b_multi_threaded; @@ -538,7 +525,7 @@ typedef struct VP8_COMP ENCODETHREAD_DATA *en_thread_data; LPFTHREAD_DATA lpf_thread_data; - //events + /* events */ sem_t *h_event_start_encoding; sem_t h_event_end_encoding; sem_t h_event_start_lpf; @@ -549,7 +536,6 @@ typedef struct VP8_COMP unsigned int partition_sz[MAX_PARTITIONS]; unsigned char *partition_d[MAX_PARTITIONS]; unsigned char *partition_d_end[MAX_PARTITIONS]; - // end of multithread data fractional_mv_step_fp *find_fractional_mv_step; @@ -557,10 +543,10 @@ typedef struct VP8_COMP vp8_refining_search_fn_t refining_search_sad; vp8_diamond_search_fn_t diamond_search_sad; vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS]; - unsigned int time_receive_data; - unsigned int time_compress_data; - unsigned int time_pick_lpf; - unsigned int time_encode_mb_row; + uint64_t time_receive_data; + uint64_t time_compress_data; + uint64_t time_pick_lpf; + uint64_t time_encode_mb_row; int base_skip_false_prob[128]; @@ -594,16 +580,16 @@ typedef struct VP8_COMP int gf_decay_rate; int static_scene_max_gf_interval; int kf_bits; - int gf_group_error_left; // Remaining error from uncoded frames in a gf group. Two pass use only - - // Projected total bits available for a key frame group of frames + /* Remaining error from uncoded frames in a gf group. */ + int gf_group_error_left; + /* Projected total bits available for a key frame group of frames */ int64_t kf_group_bits; - - // Error score of frames still to be coded in kf group + /* Error score of frames still to be coded in kf group */ int64_t kf_group_error_left; - - int gf_group_bits; // Projected Bits available for a group of frames including 1 GF or ARF - int gf_bits; // Bits for the golden frame or ARF - 2 pass only + /* Projected Bits available for a group including 1 GF or ARF */ + int gf_group_bits; + /* Bits for the golden frame or ARF */ + int gf_bits; int alt_extra_bits; double est_max_qcorrection_factor; } twopass; @@ -641,24 +627,25 @@ typedef struct VP8_COMP #endif int b_calculate_psnr; - // Per MB activity measurement + /* Per MB activity measurement */ unsigned int activity_avg; unsigned int * mb_activity_map; - int * mb_norm_activity_map; - // Record of which MBs still refer to last golden frame either - // directly or through 0,0 + /* Record of which MBs still refer to last golden frame either + * directly or through 0,0 + */ unsigned char *gf_active_flags; int gf_active_count; int output_partition; - //Store last frame's MV info for next frame MV prediction + /* Store last frame's MV info for next frame MV prediction */ int_mv *lfmv; int *lf_ref_frame_sign_bias; int *lf_ref_frame; - int force_next_frame_intra; /* force next frame to intra when kf_auto says so */ + /* force next frame to intra when kf_auto says so */ + int force_next_frame_intra; int droppable; @@ -666,7 +653,7 @@ typedef struct VP8_COMP VP8_DENOISER denoiser; #endif - // Coding layer state variables + /* Coding layer state variables */ unsigned int current_layer; LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS]; @@ -687,17 +674,29 @@ typedef struct VP8_COMP #if CONFIG_MULTI_RES_ENCODING /* Number of MBs per row at lower-resolution level */ int mr_low_res_mb_cols; + /* Indicate if lower-res mv info is available */ + unsigned char mr_low_res_mv_avail; + /* The frame number of each reference frames */ + unsigned int current_ref_frames[MAX_REF_FRAMES]; #endif + struct rd_costs_struct + { + int mvcosts[2][MVvals+1]; + int mvsadcosts[2][MVfpvals+1]; + int mbmode_cost[2][MB_MODE_COUNT]; + int intra_uv_mode_cost[2][MB_MODE_COUNT]; + int bmode_costs[10][10][10]; + int inter_bmode_costs[B_MODE_COUNT]; + int token_costs[BLOCK_TYPES][COEF_BANDS] + [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; + } rd_costs; } VP8_COMP; -void control_data_rate(VP8_COMP *cpi); - -void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char *dest_end, unsigned long *size); - -int rd_cost_intra_mb(MACROBLOCKD *x); +void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, + unsigned char *dest_end, unsigned long *size); -void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **); +void vp8_tokenize_mb(VP8_COMP *, MACROBLOCK *, TOKENEXTRA **); void vp8_set_speed_features(VP8_COMP *cpi); diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index dafb645..673de2b 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -61,7 +61,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, } -static int get_inter_mbpred_error(MACROBLOCK *mb, +int vp8_get_inter_mbpred_error(MACROBLOCK *mb, const vp8_variance_fn_ptr_t *vfp, unsigned int *sse, int_mv this_mv) @@ -132,7 +132,7 @@ static int pick_intra4x4block( MACROBLOCK *x, int ib, B_PREDICTION_MODE *best_mode, - unsigned int *mode_costs, + const int *mode_costs, int *bestrate, int *bestdistortion) @@ -141,20 +141,24 @@ static int pick_intra4x4block( BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; int dst_stride = x->e_mbd.dst.y_stride; - unsigned char *base_dst = x->e_mbd.dst.y_buffer; + unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; B_PREDICTION_MODE mode; - int best_rd = INT_MAX; // 1<<30 + int best_rd = INT_MAX; int rate; int distortion; - for (mode = B_DC_PRED; mode <= B_HE_PRED /*B_HU_PRED*/; mode++) + unsigned char *Above = dst - dst_stride; + unsigned char *yleft = dst - 1; + unsigned char top_left = Above[-1]; + + for (mode = B_DC_PRED; mode <= B_HE_PRED; mode++) { int this_rd; rate = mode_costs[mode]; - vp8_intra4x4_predict - (base_dst + b->offset, dst_stride, - mode, b->predictor, 16); + + vp8_intra4x4_predict(Above, yleft, dst_stride, mode, + b->predictor, 16, top_left); distortion = get_prediction_error(be, b); this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); @@ -167,7 +171,7 @@ static int pick_intra4x4block( } } - b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode); + b->bmi.as_mode = *best_mode; vp8_encode_intra4x4block(x, ib); return best_rd; } @@ -185,7 +189,7 @@ static int pick_intra4x4mby_modes int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; int error; int distortion = 0; - unsigned int *bmode_costs; + const int *bmode_costs; intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16); @@ -214,8 +218,9 @@ static int pick_intra4x4mby_modes distortion += d; mic->bmi[i].as_mode = best_mode; - // Break out case where we have already exceeded best so far value - // that was passed in + /* Break out case where we have already exceeded best so far value + * that was passed in + */ if (distortion > *best_dist) break; } @@ -384,15 +389,16 @@ static void pick_intra_mbuv_mode(MACROBLOCK *mb) } -static void update_mvcount(VP8_COMP *cpi, MACROBLOCKD *xd, int_mv *best_ref_mv) +static void update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) { + MACROBLOCKD *xd = &x->e_mbd; /* Split MV modes currently not supported when RD is nopt enabled, * therefore, only need to modify MVcount in NEWMV mode. */ if (xd->mode_info_context->mbmi.mode == NEWMV) { - cpi->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row - + x->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row - best_ref_mv->as_mv.row) >> 1)]++; - cpi->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col - + x->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col - best_ref_mv->as_mv.col) >> 1)]++; } } @@ -405,10 +411,9 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, MB_PREDICTION_MODE *parent_mode, int_mv *parent_ref_mv, int mb_row, int mb_col) { - LOWER_RES_INFO* store_mode_info - = (LOWER_RES_INFO*)cpi->oxcf.mr_low_res_mode_info; + LOWER_RES_MB_INFO* store_mode_info + = ((LOWER_RES_FRAME_INFO*)cpi->oxcf.mr_low_res_mode_info)->mb_info; unsigned int parent_mb_index; - //unsigned int parent_mb_index = map_640x480_to_320x240[mb_row][mb_col]; /* Consider different down_sampling_factor. */ { @@ -440,7 +445,6 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, /* Consider different down_sampling_factor. * The result can be rounded to be more precise, but it takes more time. */ - //int round = cpi->oxcf.mr_down_sampling_factor.den/2; (*parent_ref_mv).as_mv.row = store_mode_info[parent_mb_index].mv.as_mv.row *cpi->oxcf.mr_down_sampling_factor.num /cpi->oxcf.mr_down_sampling_factor.den; @@ -455,10 +459,18 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x) { - if (sse < x->encode_breakout) + MACROBLOCKD *xd = &x->e_mbd; + + unsigned int threshold = (xd->block[0].dequant[1] + * xd->block[0].dequant[1] >>4); + + if(threshold < x->encode_breakout) + threshold = x->encode_breakout; + + if (sse < threshold ) { - // Check u and v to make sure skip is ok - int sse2 = 0; + /* Check u and v to make sure skip is ok */ + unsigned int sse2 = 0; sse2 = VP8_UVSSE(x); @@ -469,7 +481,8 @@ static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x) } } -static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, VP8_COMP *cpi, MACROBLOCK *x) +static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, + VP8_COMP *cpi, MACROBLOCK *x, int rd_adj) { MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; int_mv mv = x->e_mbd.mode_info_context->mbmi.mv; @@ -486,16 +499,70 @@ static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, V if((this_mode != NEWMV) || !(cpi->sf.half_pixel_search) || cpi->common.full_pixel==1) - *distortion2 = get_inter_mbpred_error(x, + *distortion2 = vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], sse, mv); this_rd = RDCOST(x->rdmult, x->rddiv, rate2, *distortion2); + /* Adjust rd to bias to ZEROMV */ + if(this_mode == ZEROMV) + { + /* Bias to ZEROMV on LAST_FRAME reference when it is available. */ + if ((cpi->ref_frame_flags & VP8_LAST_FRAME & + cpi->common.refresh_last_frame) + && x->e_mbd.mode_info_context->mbmi.ref_frame != LAST_FRAME) + rd_adj = 100; + + // rd_adj <= 100 + this_rd = ((int64_t)this_rd) * rd_adj / 100; + } + check_for_encode_breakout(*sse, x); return this_rd; } +static void calculate_zeromv_rd_adjustment(VP8_COMP *cpi, MACROBLOCK *x, + int *rd_adjustment) +{ + MODE_INFO *mic = x->e_mbd.mode_info_context; + int_mv mv_l, mv_a, mv_al; + int local_motion_check = 0; + + if (cpi->lf_zeromv_pct > 40) + { + /* left mb */ + mic -= 1; + mv_l = mic->mbmi.mv; + + if (mic->mbmi.ref_frame != INTRA_FRAME) + if( abs(mv_l.as_mv.row) < 8 && abs(mv_l.as_mv.col) < 8) + local_motion_check++; + + /* above-left mb */ + mic -= x->e_mbd.mode_info_stride; + mv_al = mic->mbmi.mv; + + if (mic->mbmi.ref_frame != INTRA_FRAME) + if( abs(mv_al.as_mv.row) < 8 && abs(mv_al.as_mv.col) < 8) + local_motion_check++; + + /* above mb */ + mic += 1; + mv_a = mic->mbmi.mv; + + if (mic->mbmi.ref_frame != INTRA_FRAME) + if( abs(mv_a.as_mv.row) < 8 && abs(mv_a.as_mv.col) < 8) + local_motion_check++; + + if (((!x->e_mbd.mb_to_top_edge || !x->e_mbd.mb_to_left_edge) + && local_motion_check >0) || local_motion_check >2 ) + *rd_adjustment = 80; + else if (local_motion_check > 0) + *rd_adjustment = 90; + } +} + void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra, int mb_row, @@ -513,7 +580,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, MB_PREDICTION_MODE this_mode; int num00; int mdcounts[4]; - int best_rd = INT_MAX; // 1 << 30; + int best_rd = INT_MAX; + int rd_adjustment = 100; int best_intra_rd = INT_MAX; int mode_index; int rate; @@ -523,14 +591,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int best_mode_index = 0; unsigned int sse = INT_MAX, best_rd_sse = INT_MAX; #if CONFIG_TEMPORAL_DENOISING - unsigned int zero_mv_sse = 0, best_sse = INT_MAX; + unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX; #endif int_mv mvp; int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; int saddone=0; - int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7) + /* search range got from mv_pred(). It uses step_param levels. (0-7) */ + int sr=0; unsigned char *plane[4][3]; int ref_frame_map[4]; @@ -539,12 +608,39 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, #if CONFIG_MULTI_RES_ENCODING int dissim = INT_MAX; int parent_ref_frame = 0; + int parent_ref_valid = cpi->oxcf.mr_encoder_id && cpi->mr_low_res_mv_avail; int_mv parent_ref_mv; MB_PREDICTION_MODE parent_mode = 0; - if (cpi->oxcf.mr_encoder_id) + if (parent_ref_valid) + { + int parent_ref_flag; + get_lower_res_motion_info(cpi, xd, &dissim, &parent_ref_frame, &parent_mode, &parent_ref_mv, mb_row, mb_col); + + /* TODO(jkoleszar): The references available (ref_frame_flags) to the + * lower res encoder should match those available to this encoder, but + * there seems to be a situation where this mismatch can happen in the + * case of frame dropping and temporal layers. For example, + * GOLD being disallowed in ref_frame_flags, but being returned as + * parent_ref_frame. + * + * In this event, take the conservative approach of disabling the + * lower res info for this MB. + */ + parent_ref_flag = 0; + if (parent_ref_frame == LAST_FRAME) + parent_ref_flag = (cpi->ref_frame_flags & VP8_LAST_FRAME); + else if (parent_ref_frame == GOLDEN_FRAME) + parent_ref_flag = (cpi->ref_frame_flags & VP8_GOLD_FRAME); + else if (parent_ref_frame == ALTREF_FRAME) + parent_ref_flag = (cpi->ref_frame_flags & VP8_ALTR_FRAME); + + //assert(!parent_ref_frame || parent_ref_flag); + if (parent_ref_frame && !parent_ref_flag) + parent_ref_valid = 0; + } #endif mode_mv = mode_mv_sb[sign_bias]; @@ -553,6 +649,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); /* Setup search priorities */ +#if CONFIG_MULTI_RES_ENCODING + if (parent_ref_valid && parent_ref_frame && dissim < 8) + { + ref_frame_map[0] = -1; + ref_frame_map[1] = parent_ref_frame; + ref_frame_map[2] = -1; + ref_frame_map[3] = -1; + } else +#endif get_reference_search_order(cpi, ref_frame_map); /* Check to see if there is at least 1 valid reference frame that we need @@ -574,22 +679,29 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset); - cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame + /* Count of the number of MBs tested so far this frame */ + x->mbs_tested_so_far++; *returnintra = INT_MAX; x->skip = 0; x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; - // if we encode a new mv this is important - // find the best new motion vector + /* If the frame has big static background and current MB is in low + * motion area, its mode decision is biased to ZEROMV mode. + */ + calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment); + + /* if we encode a new mv this is important + * find the best new motion vector + */ for (mode_index = 0; mode_index < MAX_MODES; mode_index++) { int frame_cost; int this_rd = INT_MAX; int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]]; - if (best_rd <= cpi->rd_threshes[mode_index]) + if (best_rd <= x->rd_threshes[mode_index]) continue; if (this_ref_frame < 0) @@ -597,23 +709,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; -#if CONFIG_MULTI_RES_ENCODING - if (cpi->oxcf.mr_encoder_id) - { - /* If parent MB is intra, child MB is intra. */ - if (!parent_ref_frame && this_ref_frame) - continue; - - /* If parent MB is inter, and it is unlikely there are multiple - * objects in parent MB, we use parent ref frame as child MB's - * ref frame. */ - if (parent_ref_frame && dissim < 8 - && parent_ref_frame != this_ref_frame) - continue; - } -#endif - - // everything but intra + /* everything but intra */ if (x->e_mbd.mode_info_context->mbmi.ref_frame) { x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; @@ -628,7 +724,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, } #if CONFIG_MULTI_RES_ENCODING - if (cpi->oxcf.mr_encoder_id) + if (parent_ref_valid) { if (vp8_mode_order[mode_index] == NEARESTMV && mode_mv[NEARESTMV].as_int ==0) @@ -638,7 +734,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, continue; if (vp8_mode_order[mode_index] == NEWMV && parent_mode == ZEROMV - && best_ref_mv.as_int==0) //&& dissim==0 + && best_ref_mv.as_int==0) continue; else if(vp8_mode_order[mode_index] == NEWMV && dissim==0 && best_ref_mv.as_int==parent_ref_mv.as_int) @@ -650,22 +746,22 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, /* Check to see if the testing frequency for this mode is at its max * If so then prevent it from being tested and increase the threshold * for its testing */ - if (cpi->mode_test_hit_counts[mode_index] && + if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1)) { - if (cpi->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] * - cpi->mode_test_hit_counts[mode_index])) + if (x->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] * + x->mode_test_hit_counts[mode_index])) { /* Increase the threshold for coding this mode to make it less * likely to be chosen */ - cpi->rd_thresh_mult[mode_index] += 4; + x->rd_thresh_mult[mode_index] += 4; - if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) - cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; + if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) + x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - cpi->rd_threshes[mode_index] = + x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * - cpi->rd_thresh_mult[mode_index]; + x->rd_thresh_mult[mode_index]; continue; } } @@ -673,7 +769,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, /* We have now reached the point where we are going to test the current * mode so increment the counter for the number of times it has been * tested */ - cpi->mode_test_hit_counts[mode_index] ++; + x->mode_test_hit_counts[mode_index] ++; rate2 = 0; distortion2 = 0; @@ -728,7 +824,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, case SPLITMV: - // Split MV modes currently not supported when RD is nopt enabled. + /* Split MV modes currently not supported when RD is not enabled. */ break; case DC_PRED: @@ -777,13 +873,22 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8)? 3 : 2) : 1; - // Further step/diamond searches as necessary + /* Further step/diamond searches as necessary */ step_param = cpi->sf.first_step + speed_adjust; #if CONFIG_MULTI_RES_ENCODING - if (cpi->oxcf.mr_encoder_id) + /* If lower-res drops this frame, then higher-res encoder does + motion search without any previous knowledge. Also, since + last frame motion info is not stored, then we can not + use improved_mv_pred. */ + if (cpi->oxcf.mr_encoder_id && !parent_ref_valid) + cpi->sf.improved_mv_pred = 0; + + if (parent_ref_valid && parent_ref_frame) { - // Use parent MV as predictor. Adjust search range accordingly. + /* Use parent MV as predictor. Adjust search range + * accordingly. + */ mvp.as_int = parent_ref_mv.as_int; mvp_full.as_mv.col = parent_ref_mv.as_mv.col>>3; mvp_full.as_mv.row = parent_ref_mv.as_mv.row>>3; @@ -808,7 +913,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, &near_sadidx[0]); sr += speed_adjust; - //adjust search range according to sr from mv prediction + /* adjust search range according to sr from mv prediction */ if(sr > step_param) step_param = sr; @@ -823,7 +928,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, } #if CONFIG_MULTI_RES_ENCODING - if (cpi->oxcf.mr_encoder_id && dissim <= 2 && + if (parent_ref_valid && parent_ref_frame && dissim <= 2 && MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row), abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4) { @@ -860,7 +965,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, * change the behavior in lowest-resolution encoder. * Will improve it later. */ - if (!cpi->oxcf.mr_encoder_id) + /* Set step_param to 0 to ensure large-range motion search + when encoder drops this frame at lower-resolution. + */ + if (!parent_ref_valid) step_param = 0; #endif bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv, @@ -877,10 +985,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->mvcost, &best_ref_mv); mode_mv[NEWMV].as_int = d->bmi.mv.as_int; - // Further step/diamond searches as necessary - n = 0; - //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - + /* Further step/diamond searches as necessary */ n = num00; num00 = 0; @@ -927,7 +1032,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, mode_mv[NEWMV].as_int = d->bmi.mv.as_int; - // mv cost; + /* mv cost; */ rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, cpi->mb.mvcost, 128); } @@ -954,7 +1059,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, rate2 += vp8_cost_mv_ref(this_mode, mdcounts); x->e_mbd.mode_info_context->mbmi.mv.as_int = mode_mv[this_mode].as_int; - this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x); + this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x, + rd_adjustment); break; default: @@ -964,31 +1070,33 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { - // Store for later use by denoiser. - if (this_mode == ZEROMV && - x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) - { - zero_mv_sse = sse; - } - - // Store the best NEWMV in x for later use in the denoiser. - // We are restricted to the LAST_FRAME since the denoiser only keeps - // one filter state. - if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && - x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) - { - best_sse = sse; - x->e_mbd.best_sse_inter_mode = NEWMV; - x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; - x->e_mbd.need_to_clamp_best_mvs = - x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; - } + + /* Store for later use by denoiser. */ + if (this_mode == ZEROMV && sse < zero_mv_sse ) + { + zero_mv_sse = sse; + x->best_zeromv_reference_frame = + x->e_mbd.mode_info_context->mbmi.ref_frame; + } + + /* Store the best NEWMV in x for later use in the denoiser. */ + if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && + sse < best_sse) + { + best_sse = sse; + x->best_sse_inter_mode = NEWMV; + x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; + x->need_to_clamp_best_mvs = + x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; + x->best_reference_frame = + x->e_mbd.mode_info_context->mbmi.ref_frame; + } } #endif if (this_rd < best_rd || x->skip) { - // Note index of best mode + /* Note index of best mode */ best_mode_index = mode_index; *returnrate = rate2; @@ -1001,12 +1109,12 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, /* Testing this mode gave rise to an improvement in best error * score. Lower threshold a bit for next time */ - cpi->rd_thresh_mult[mode_index] = - (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? - cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; - cpi->rd_threshes[mode_index] = + x->rd_thresh_mult[mode_index] = + (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? + x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; + x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * - cpi->rd_thresh_mult[mode_index]; + x->rd_thresh_mult[mode_index]; } /* If the mode did not help improve the best error case then raise the @@ -1014,33 +1122,33 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, */ else { - cpi->rd_thresh_mult[mode_index] += 4; + x->rd_thresh_mult[mode_index] += 4; - if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) - cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; + if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) + x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - cpi->rd_threshes[mode_index] = + x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * - cpi->rd_thresh_mult[mode_index]; + x->rd_thresh_mult[mode_index]; } if (x->skip) break; } - // Reduce the activation RD thresholds for the best choice mode + /* Reduce the activation RD thresholds for the best choice mode */ if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { - int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 3); + int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 3); - cpi->rd_thresh_mult[best_mode_index] = - (cpi->rd_thresh_mult[best_mode_index] + x->rd_thresh_mult[best_mode_index] = + (x->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? - cpi->rd_thresh_mult[best_mode_index] - best_adjustment : + x->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; - cpi->rd_threshes[best_mode_index] = + x->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * - cpi->rd_thresh_mult[best_mode_index]; + x->rd_thresh_mult[best_mode_index]; } @@ -1052,43 +1160,54 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, this_rdbin = 1023; } - cpi->error_bins[this_rdbin] ++; + x->error_bins[this_rdbin] ++; } #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { - if (x->e_mbd.best_sse_inter_mode == DC_PRED) { - // No best MV found. - x->e_mbd.best_sse_inter_mode = best_mbmode.mode; - x->e_mbd.best_sse_mv = best_mbmode.mv; - x->e_mbd.need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs; - best_sse = best_rd_sse; - } - vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, - recon_yoffset, recon_uvoffset); - - // Reevaluate ZEROMV after denoising. - if (best_mbmode.ref_frame == INTRA_FRAME) - { - int this_rd = 0; - rate2 = 0; - distortion2 = 0; - x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME; - rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; - this_mode = ZEROMV; - rate2 += vp8_cost_mv_ref(this_mode, mdcounts); - x->e_mbd.mode_info_context->mbmi.mode = this_mode; - x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; - x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; - this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x); + if (x->best_sse_inter_mode == DC_PRED) + { + /* No best MV found. */ + x->best_sse_inter_mode = best_mbmode.mode; + x->best_sse_mv = best_mbmode.mv; + x->need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs; + x->best_reference_frame = best_mbmode.ref_frame; + best_sse = best_rd_sse; + } + vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, + recon_yoffset, recon_uvoffset); - if (this_rd < best_rd || x->skip) + + /* Reevaluate ZEROMV after denoising. */ + if (best_mbmode.ref_frame == INTRA_FRAME && + x->best_zeromv_reference_frame != INTRA_FRAME) { - vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, - sizeof(MB_MODE_INFO)); + int this_rd = 0; + int this_ref_frame = x->best_zeromv_reference_frame; + rate2 = x->ref_frame_cost[this_ref_frame] + + vp8_cost_mv_ref(ZEROMV, mdcounts); + distortion2 = 0; + + /* set up the proper prediction buffers for the frame */ + x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; + x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; + x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; + x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; + + x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; + x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; + x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; + this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x, + rd_adjustment); + + if (this_rd < best_rd) + { + vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, + sizeof(MB_MODE_INFO)); + } } - } + } #endif @@ -1122,11 +1241,11 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int; - update_mvcount(cpi, &x->e_mbd, &best_ref_mv); + update_mvcount(cpi, x, &best_ref_mv); } -void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) +void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_) { int error4x4, error16x16 = INT_MAX; int rate, best_rate = 0, distortion, best_sse; diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h index 3d83782..35011ca 100644 --- a/vp8/encoder/pickinter.h +++ b/vp8/encoder/pickinter.h @@ -18,6 +18,10 @@ extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra, int mb_row, int mb_col); -extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate); +extern void vp8_pick_intra_mode(MACROBLOCK *x, int *rate); +extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb, + const vp8_variance_fn_ptr_t *vfp, + unsigned int *sse, + int_mv this_mv); #endif diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c index 21af45a..4121349 100644 --- a/vp8/encoder/picklpf.c +++ b/vp8/encoder/picklpf.c @@ -74,7 +74,9 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, src += srcoffset; dst += dstoffset; - // Loop through the Y plane raw and reconstruction data summing (square differences) + /* Loop through the Y plane raw and reconstruction data summing + * (square differences) + */ for (i = 0; i < linestocopy; i += 16) { for (j = 0; j < source->y_width; j += 16) @@ -92,7 +94,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, return Total; } -// Enforce a minimum filter level based upon baseline Q +/* Enforce a minimum filter level based upon baseline Q */ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex) { int min_filter_level; @@ -113,14 +115,15 @@ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex) return min_filter_level; } -// Enforce a maximum filter level based upon baseline Q +/* Enforce a maximum filter level based upon baseline Q */ static int get_max_filter_level(VP8_COMP *cpi, int base_qindex) { - // PGW August 2006: Highest filter values almost always a bad idea + /* PGW August 2006: Highest filter values almost always a bad idea */ - // jbb chg: 20100118 - not so any more with this overquant stuff allow high values - // with lots of intra coming in. - int max_filter_level = MAX_LOOP_FILTER ;//* 3 / 4; + /* jbb chg: 20100118 - not so any more with this overquant stuff allow + * high values with lots of intra coming in. + */ + int max_filter_level = MAX_LOOP_FILTER; (void)base_qindex; if (cpi->twopass.section_intra_rating > 8) @@ -155,7 +158,9 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) cm->last_sharpness_level = cm->sharpness_level; } - // Start the search at the previous frame filter level unless it is now out of range. + /* Start the search at the previous frame filter level unless it is + * now out of range. + */ if (cm->filter_level < min_filter_level) cm->filter_level = min_filter_level; else if (cm->filter_level > max_filter_level) @@ -164,7 +169,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) filt_val = cm->filter_level; best_filt_val = filt_val; - // Get the err using the previous frame's filter value. + /* Get the err using the previous frame's filter value. */ /* Copy the unfiltered / processed recon buffer to the new buffer */ vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show); @@ -174,17 +179,17 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) filt_val -= 1 + (filt_val > 10); - // Search lower filter levels + /* Search lower filter levels */ while (filt_val >= min_filter_level) { - // Apply the loop filter + /* Apply the loop filter */ vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show); vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - // Get the err for filtered frame + /* Get the err for filtered frame */ filt_err = calc_partial_ssl_err(sd, cm->frame_to_show); - // Update the best case record or exit loop. + /* Update the best case record or exit loop. */ if (filt_err < best_err) { best_err = filt_err; @@ -193,32 +198,34 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) else break; - // Adjust filter level + /* Adjust filter level */ filt_val -= 1 + (filt_val > 10); } - // Search up (note that we have already done filt_val = cm->filter_level) + /* Search up (note that we have already done filt_val = cm->filter_level) */ filt_val = cm->filter_level + 1 + (filt_val > 10); if (best_filt_val == cm->filter_level) { - // Resist raising filter level for very small gains + /* Resist raising filter level for very small gains */ best_err -= (best_err >> 10); while (filt_val < max_filter_level) { - // Apply the loop filter + /* Apply the loop filter */ vp8_yv12_copy_partial_frame(saved_frame, cm->frame_to_show); vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - // Get the err for filtered frame + /* Get the err for filtered frame */ filt_err = calc_partial_ssl_err(sd, cm->frame_to_show); - // Update the best case record or exit loop. + /* Update the best case record or exit loop. */ if (filt_err < best_err) { - // Do not raise filter level if improvement is < 1 part in 4096 + /* Do not raise filter level if improvement is < 1 part + * in 4096 + */ best_err = filt_err - (filt_err >> 10); best_filt_val = filt_val; @@ -226,7 +233,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) else break; - // Adjust filter level + /* Adjust filter level */ filt_val += 1 + (filt_val > 10); } } @@ -243,7 +250,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) cm->frame_to_show = saved_frame; } -// Stub function for now Alt LF not used +/* Stub function for now Alt LF not used */ void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val) { MACROBLOCKD *mbd = &cpi->mb.e_mbd; @@ -266,12 +273,14 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) int filter_step; int filt_high = 0; - int filt_mid = cm->filter_level; // Start search at previous frame filter level + /* Start search at previous frame filter level */ + int filt_mid = cm->filter_level; int filt_low = 0; int filt_best; int filt_direction = 0; - int Bias = 0; // Bias against raising loop filter and in favor of lowering it + /* Bias against raising loop filter and in favor of lowering it */ + int Bias = 0; int ss_err[MAX_LOOP_FILTER + 1]; @@ -287,7 +296,9 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) else cm->sharpness_level = cpi->oxcf.Sharpness; - // Start the search at the previous frame filter level unless it is now out of range. + /* Start the search at the previous frame filter level unless it is + * now out of range. + */ filt_mid = cm->filter_level; if (filt_mid < min_filter_level) @@ -295,10 +306,10 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) else if (filt_mid > max_filter_level) filt_mid = max_filter_level; - // Define the initial step size + /* Define the initial step size */ filter_step = (filt_mid < 16) ? 4 : filt_mid / 4; - // Get baseline error score + /* Get baseline error score */ /* Copy the unfiltered / processed recon buffer to the new buffer */ vp8_yv12_copy_y(saved_frame, cm->frame_to_show); @@ -314,9 +325,8 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) while (filter_step > 0) { - Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; //PGW change 12/12/06 for small images + Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; - // jbb chg: 20100118 - in sections with lots of new material coming in don't bias as much to a low filter value if (cpi->twopass.section_intra_rating < 20) Bias = Bias * cpi->twopass.section_intra_rating / 20; @@ -327,7 +337,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { if(ss_err[filt_low] == 0) { - // Get Low filter error score + /* Get Low filter error score */ vp8_yv12_copy_y(saved_frame, cm->frame_to_show); vp8cx_set_alt_lf_level(cpi, filt_low); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low); @@ -338,10 +348,12 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) else filt_err = ss_err[filt_low]; - // If value is close to the best so far then bias towards a lower loop filter value. + /* If value is close to the best so far then bias towards a + * lower loop filter value. + */ if ((filt_err - Bias) < best_err) { - // Was it actually better than the previous best? + /* Was it actually better than the previous best? */ if (filt_err < best_err) best_err = filt_err; @@ -349,7 +361,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) } } - // Now look at filt_high + /* Now look at filt_high */ if ((filt_direction >= 0) && (filt_high != filt_mid)) { if(ss_err[filt_high] == 0) @@ -364,7 +376,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) else filt_err = ss_err[filt_high]; - // Was it better than the previous best? + /* Was it better than the previous best? */ if (filt_err < (best_err - Bias)) { best_err = filt_err; @@ -372,7 +384,9 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) } } - // Half the step distance if the best filter value was the same as last time + /* Half the step distance if the best filter value was the same + * as last time + */ if (filt_best == filt_mid) { filter_step = filter_step / 2; diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c index 5119bb8..5bb49ad 100644 --- a/vp8/encoder/psnr.c +++ b/vp8/encoder/psnr.c @@ -22,7 +22,7 @@ double vp8_mse2psnr(double Samples, double Peak, double Mse) if ((double)Mse > 0.0) psnr = 10.0 * log10(Peak * Peak * Samples / Mse); else - psnr = MAX_PSNR; // Limit to prevent / 0 + psnr = MAX_PSNR; /* Limit to prevent / 0 */ if (psnr > MAX_PSNR) psnr = MAX_PSNR; diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 766d2b2..33c8ef0 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -44,21 +44,21 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) z = coeff_ptr[rc]; zbin = zbin_ptr[rc] ; - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) + sz = (z >> 31); /* sign of z */ + x = (z ^ sz) - sz; /* x = abs(z) */ if (x >= zbin) { x += round_ptr[rc]; y = (((x * quant_ptr[rc]) >> 16) + x) - >> quant_shift_ptr[rc]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + >> quant_shift_ptr[rc]; /* quantize (x) */ + x = (y ^ sz) - sz; /* get the sign back */ + qcoeff_ptr[rc] = x; /* write to destination */ + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ if (y) { - eob = i; // last nonzero coeffs + eob = i; /* last nonzero coeffs */ } } } @@ -84,17 +84,17 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) rc = vp8_default_zig_zag1d[i]; z = coeff_ptr[rc]; - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) + sz = (z >> 31); /* sign of z */ + x = (z ^ sz) - sz; /* x = abs(z) */ - y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */ + x = (y ^ sz) - sz; /* get the sign back */ + qcoeff_ptr[rc] = x; /* write to destination */ + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ if (y) { - eob = i; // last nonzero coeffs + eob = i; /* last nonzero coeffs */ } } *d->eob = (char)(eob + 1); @@ -132,22 +132,22 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d) zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; zbin_boost_ptr ++; - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) + sz = (z >> 31); /* sign of z */ + x = (z ^ sz) - sz; /* x = abs(z) */ if (x >= zbin) { x += round_ptr[rc]; y = (((x * quant_ptr[rc]) >> 16) + x) - >> quant_shift_ptr[rc]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + >> quant_shift_ptr[rc]; /* quantize (x) */ + x = (y ^ sz) - sz; /* get the sign back */ + qcoeff_ptr[rc] = x; /* write to destination */ + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ if (y) { - eob = i; // last nonzero coeffs - zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength + eob = i; /* last nonzero coeffs */ + zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */ } } } @@ -240,26 +240,23 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d) rc = vp8_default_zig_zag1d[i]; z = coeff_ptr[rc]; - //if ( i == 0 ) - // zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2; - //else zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; zbin_boost_ptr ++; - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) + sz = (z >> 31); /* sign of z */ + x = (z ^ sz) - sz; /* x = abs(z) */ if (x >= zbin) { - y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */ + x = (y ^ sz) - sz; /* get the sign back */ + qcoeff_ptr[rc] = x; /* write to destination */ + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ if (y) { - eob = i; // last nonzero coeffs - zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + eob = i; /* last nonzero coeffs */ + zbin_boost_ptr = &b->zrun_zbin_boost[0]; /* reset zrl */ } } } @@ -441,7 +438,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) for (Q = 0; Q < QINDEX_RANGE; Q++) { - // dc values + /* dc values */ quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val; invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0, @@ -469,7 +466,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) cpi->common.UVdequant[Q][0] = quant_val; cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; - // all the ac values = ; + /* all the ac values = ; */ quant_val = vp8_ac_yquant(Q); cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val; invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1, @@ -536,7 +533,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) for (Q = 0; Q < QINDEX_RANGE; Q++) { - // dc values + /* dc values */ quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); cpi->Y1quant[Q][0] = (1 << 16) / quant_val; cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; @@ -558,7 +555,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) cpi->common.UVdequant[Q][0] = quant_val; cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; - // all the ac values = ; + /* all the ac values = ; */ for (i = 1; i < 16; i++) { int rc = vp8_default_zig_zag1d[i]; @@ -590,20 +587,20 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) #define ZBIN_EXTRA_Y \ (( cpi->common.Y1dequant[QIndex][1] * \ - ( cpi->zbin_over_quant + \ - cpi->zbin_mode_boost + \ + ( x->zbin_over_quant + \ + x->zbin_mode_boost + \ x->act_zbin_adj ) ) >> 7) #define ZBIN_EXTRA_UV \ (( cpi->common.UVdequant[QIndex][1] * \ - ( cpi->zbin_over_quant + \ - cpi->zbin_mode_boost + \ + ( x->zbin_over_quant + \ + x->zbin_mode_boost + \ x->act_zbin_adj ) ) >> 7) #define ZBIN_EXTRA_Y2 \ (( cpi->common.Y2dequant[QIndex][1] * \ - ( (cpi->zbin_over_quant / 2) + \ - cpi->zbin_mode_boost + \ + ( (x->zbin_over_quant / 2) + \ + x->zbin_mode_boost + \ x->act_zbin_adj ) ) >> 7) void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) @@ -613,18 +610,18 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) MACROBLOCKD *xd = &x->e_mbd; int zbin_extra; - // Select the baseline MB Q index. + /* Select the baseline MB Q index. */ if (xd->segmentation_enabled) { - // Abs Value + /* Abs Value */ if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) - QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id]; - // Delta Value + /* Delta Value */ else { QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id]; - QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range + /* Clamp to valid range */ + QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; } } else @@ -657,13 +654,13 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) * This will also require modifications to the x86 and neon assembly. * */ for (i = 0; i < 16; i++) - x->e_mbd.block[i].dequant = xd->dequant_y1; //cpi->common.Y1dequant[QIndex]; + x->e_mbd.block[i].dequant = xd->dequant_y1; for (i = 16; i < 24; i++) - x->e_mbd.block[i].dequant = xd->dequant_uv; //cpi->common.UVdequant[QIndex]; - x->e_mbd.block[24].dequant = xd->dequant_y2; //cpi->common.Y2dequant[QIndex]; + x->e_mbd.block[i].dequant = xd->dequant_uv; + x->e_mbd.block[24].dequant = xd->dequant_y2; #endif - // Y + /* Y */ zbin_extra = ZBIN_EXTRA_Y; for (i = 0; i < 16; i++) @@ -677,7 +674,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) x->block[i].zbin_extra = (short)zbin_extra; } - // UV + /* UV */ zbin_extra = ZBIN_EXTRA_UV; for (i = 16; i < 24; i++) @@ -691,7 +688,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) x->block[i].zbin_extra = (short)zbin_extra; } - // Y2 + /* Y2 */ zbin_extra = ZBIN_EXTRA_Y2; x->block[24].quant_fast = cpi->Y2quant_fast[QIndex]; @@ -705,35 +702,35 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) /* save this macroblock QIndex for vp8_update_zbin_extra() */ x->q_index = QIndex; - cpi->last_zbin_over_quant = cpi->zbin_over_quant; - cpi->last_zbin_mode_boost = cpi->zbin_mode_boost; + x->last_zbin_over_quant = x->zbin_over_quant; + x->last_zbin_mode_boost = x->zbin_mode_boost; x->last_act_zbin_adj = x->act_zbin_adj; } - else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant - || cpi->last_zbin_mode_boost != cpi->zbin_mode_boost + else if(x->last_zbin_over_quant != x->zbin_over_quant + || x->last_zbin_mode_boost != x->zbin_mode_boost || x->last_act_zbin_adj != x->act_zbin_adj) { - // Y + /* Y */ zbin_extra = ZBIN_EXTRA_Y; for (i = 0; i < 16; i++) x->block[i].zbin_extra = (short)zbin_extra; - // UV + /* UV */ zbin_extra = ZBIN_EXTRA_UV; for (i = 16; i < 24; i++) x->block[i].zbin_extra = (short)zbin_extra; - // Y2 + /* Y2 */ zbin_extra = ZBIN_EXTRA_Y2; x->block[24].zbin_extra = (short)zbin_extra; - cpi->last_zbin_over_quant = cpi->zbin_over_quant; - cpi->last_zbin_mode_boost = cpi->zbin_mode_boost; + x->last_zbin_over_quant = x->zbin_over_quant; + x->last_zbin_mode_boost = x->zbin_mode_boost; x->last_act_zbin_adj = x->act_zbin_adj; } } @@ -744,19 +741,19 @@ void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x) int QIndex = x->q_index; int zbin_extra; - // Y + /* Y */ zbin_extra = ZBIN_EXTRA_Y; for (i = 0; i < 16; i++) x->block[i].zbin_extra = (short)zbin_extra; - // UV + /* UV */ zbin_extra = ZBIN_EXTRA_UV; for (i = 16; i < 24; i++) x->block[i].zbin_extra = (short)zbin_extra; - // Y2 + /* Y2 */ zbin_extra = ZBIN_EXTRA_Y2; x->block[24].zbin_extra = (short)zbin_extra; } @@ -766,10 +763,10 @@ void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x) void vp8cx_frame_init_quantizer(VP8_COMP *cpi) { - // Clear Zbin mode boost for default case - cpi->zbin_mode_boost = 0; + /* Clear Zbin mode boost for default case */ + cpi->mb.zbin_mode_boost = 0; - // MB level quantizer setup + /* MB level quantizer setup */ vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0); } @@ -801,7 +798,7 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q) cm->y2dc_delta_q = new_delta_q; - // Set Segment specific quatizers + /* Set Segment specific quatizers */ mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0]; mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1]; mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2]; diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 472e85f..a399a38 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -41,15 +41,16 @@ extern int inter_uv_modes[4]; extern int inter_b_modes[10]; #endif -// Bits Per MB at different Q (Multiplied by 512) +/* Bits Per MB at different Q (Multiplied by 512) */ #define BPER_MB_NORMBITS 9 -// Work in progress recalibration of baseline rate tables based on -// the assumption that bits per mb is inversely proportional to the -// quantizer value. +/* Work in progress recalibration of baseline rate tables based on + * the assumption that bits per mb is inversely proportional to the + * quantizer value. + */ const int vp8_bits_per_mb[2][QINDEX_RANGE] = { - // Intra case 450000/Qintra + /* Intra case 450000/Qintra */ { 1125000,900000, 750000, 642857, 562500, 500000, 450000, 450000, 409090, 375000, 346153, 321428, 300000, 281250, 264705, 264705, @@ -68,7 +69,7 @@ const int vp8_bits_per_mb[2][QINDEX_RANGE] = 36885, 36290, 35714, 35156, 34615, 34090, 33582, 33088, 32608, 32142, 31468, 31034, 30405, 29801, 29220, 28662, }, - // Inter case 285000/Qinter + /* Inter case 285000/Qinter */ { 712500, 570000, 475000, 407142, 356250, 316666, 285000, 259090, 237500, 219230, 203571, 190000, 178125, 167647, 158333, 150000, @@ -109,7 +110,7 @@ static const int kf_boost_qadjustment[QINDEX_RANGE] = 220, 220, 220, 220, 220, 220, 220, 220, }; -//#define GFQ_ADJUSTMENT (Q+100) +/* #define GFQ_ADJUSTMENT (Q+100) */ #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q] const int vp8_gf_boost_qadjustment[QINDEX_RANGE] = { @@ -173,7 +174,7 @@ static const int kf_gf_boost_qlimits[QINDEX_RANGE] = 600, 600, 600, 600, 600, 600, 600, 600, }; -// % adjustment to target kf size based on seperation from previous frame +/* % adjustment to target kf size based on seperation from previous frame */ static const int kf_boost_seperation_adjustment[16] = { 30, 40, 50, 55, 60, 65, 70, 75, @@ -224,10 +225,11 @@ void vp8_save_coding_context(VP8_COMP *cpi) { CODING_CONTEXT *const cc = & cpi->coding_context; - // Stores a snapshot of key state variables which can subsequently be - // restored with a call to vp8_restore_coding_context. These functions are - // intended for use in a re-code loop in vp8_compress_frame where the - // quantizer value is adjusted between loop iterations. + /* Stores a snapshot of key state variables which can subsequently be + * restored with a call to vp8_restore_coding_context. These functions are + * intended for use in a re-code loop in vp8_compress_frame where the + * quantizer value is adjusted between loop iterations. + */ cc->frames_since_key = cpi->frames_since_key; cc->filter_level = cpi->common.filter_level; @@ -235,18 +237,16 @@ void vp8_save_coding_context(VP8_COMP *cpi) cc->frames_since_golden = cpi->common.frames_since_golden; vp8_copy(cc->mvc, cpi->common.fc.mvc); - vp8_copy(cc->mvcosts, cpi->mb.mvcosts); + vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts); - vp8_copy(cc->kf_ymode_prob, cpi->common.kf_ymode_prob); vp8_copy(cc->ymode_prob, cpi->common.fc.ymode_prob); - vp8_copy(cc->kf_uv_mode_prob, cpi->common.kf_uv_mode_prob); vp8_copy(cc->uv_mode_prob, cpi->common.fc.uv_mode_prob); - vp8_copy(cc->ymode_count, cpi->ymode_count); - vp8_copy(cc->uv_mode_count, cpi->uv_mode_count); + vp8_copy(cc->ymode_count, cpi->mb.ymode_count); + vp8_copy(cc->uv_mode_count, cpi->mb.uv_mode_count); - // Stats + /* Stats */ #ifdef MODE_STATS vp8_copy(cc->y_modes, y_modes); vp8_copy(cc->uv_modes, uv_modes); @@ -264,8 +264,9 @@ void vp8_restore_coding_context(VP8_COMP *cpi) { CODING_CONTEXT *const cc = & cpi->coding_context; - // Restore key state variables to the snapshot state stored in the - // previous call to vp8_save_coding_context. + /* Restore key state variables to the snapshot state stored in the + * previous call to vp8_save_coding_context. + */ cpi->frames_since_key = cc->frames_since_key; cpi->common.filter_level = cc->filter_level; @@ -274,17 +275,15 @@ void vp8_restore_coding_context(VP8_COMP *cpi) vp8_copy(cpi->common.fc.mvc, cc->mvc); - vp8_copy(cpi->mb.mvcosts, cc->mvcosts); + vp8_copy(cpi->rd_costs.mvcosts, cc->mvcosts); - vp8_copy(cpi->common.kf_ymode_prob, cc->kf_ymode_prob); vp8_copy(cpi->common.fc.ymode_prob, cc->ymode_prob); - vp8_copy(cpi->common.kf_uv_mode_prob, cc->kf_uv_mode_prob); vp8_copy(cpi->common.fc.uv_mode_prob, cc->uv_mode_prob); - vp8_copy(cpi->ymode_count, cc->ymode_count); - vp8_copy(cpi->uv_mode_count, cc->uv_mode_count); + vp8_copy(cpi->mb.ymode_count, cc->ymode_count); + vp8_copy(cpi->mb.uv_mode_count, cc->uv_mode_count); - // Stats + /* Stats */ #ifdef MODE_STATS vp8_copy(y_modes, cc->y_modes); vp8_copy(uv_modes, cc->uv_modes); @@ -301,36 +300,30 @@ void vp8_restore_coding_context(VP8_COMP *cpi) void vp8_setup_key_frame(VP8_COMP *cpi) { - // Setup for Key frame: + /* Setup for Key frame: */ vp8_default_coef_probs(& cpi->common); - - vp8_kf_default_bmode_probs(cpi->common.kf_bmode_prob); - vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); { int flag[2] = {1, 1}; vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag); } - vpx_memset(cpi->common.fc.pre_mvc, 0, sizeof(cpi->common.fc.pre_mvc)); //initialize pre_mvc to all zero. - - // Make sure we initialize separate contexts for altref,gold, and normal. - // TODO shouldn't need 3 different copies of structure to do this! + /* Make sure we initialize separate contexts for altref,gold, and normal. + * TODO shouldn't need 3 different copies of structure to do this! + */ vpx_memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc)); vpx_memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc)); vpx_memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc)); - //cpi->common.filter_level = 0; // Reset every key frame. cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ; - // Provisional interval before next GF + /* Provisional interval before next GF */ if (cpi->auto_gold) - //cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; else - cpi->frames_till_gf_update_due = cpi->goldfreq; + cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; cpi->common.refresh_golden_frame = 1; cpi->common.refresh_alt_ref_frame = 1; @@ -355,12 +348,12 @@ static int estimate_bits_at_q(int frame_kind, int Q, int MBs, static void calc_iframe_target_size(VP8_COMP *cpi) { - // boost defaults to half second + /* boost defaults to half second */ int kf_boost; - int target; + uint64_t target; - // Clear down mmx registers to allow floating point in what follows - vp8_clear_system_state(); //__asm emms; + /* Clear down mmx registers to allow floating point in what follows */ + vp8_clear_system_state(); if (cpi->oxcf.fixed_q >= 0) { @@ -371,10 +364,10 @@ static void calc_iframe_target_size(VP8_COMP *cpi) } else if (cpi->pass == 2) { - // New Two pass RC + /* New Two pass RC */ target = cpi->per_frame_bandwidth; } - // First Frame is a special case + /* First Frame is a special case */ else if (cpi->common.current_video_frame == 0) { /* 1 Pass there is no information on which to base size so use @@ -388,29 +381,29 @@ static void calc_iframe_target_size(VP8_COMP *cpi) } else { - // if this keyframe was forced, use a more recent Q estimate + /* if this keyframe was forced, use a more recent Q estimate */ int Q = (cpi->common.frame_flags & FRAMEFLAGS_KEY) ? cpi->avg_frame_qindex : cpi->ni_av_qi; - int initial_boost = 24; // Corresponds to: |2.5 * per_frame_bandwidth| - // Boost depends somewhat on frame rate: only used for 1 layer case. + int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */ + /* Boost depends somewhat on frame rate: only used for 1 layer case. */ if (cpi->oxcf.number_of_layers == 1) { kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16)); } else { - // Initial factor: set target size to: |2.5 * per_frame_bandwidth|. + /* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */ kf_boost = initial_boost; } - // adjustment up based on q: this factor ranges from ~1.2 to 2.2. + /* adjustment up based on q: this factor ranges from ~1.2 to 2.2. */ kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100; - // frame separation adjustment ( down) + /* frame separation adjustment ( down) */ if (cpi->frames_since_key < cpi->output_frame_rate / 2) kf_boost = (int)(kf_boost * cpi->frames_since_key / (cpi->output_frame_rate / 2)); - // Minimal target size is |2* per_frame_bandwidth|. + /* Minimal target size is |2* per_frame_bandwidth|. */ if (kf_boost < 16) kf_boost = 16; @@ -427,10 +420,11 @@ static void calc_iframe_target_size(VP8_COMP *cpi) target = max_rate; } - cpi->this_frame_target = target; + cpi->this_frame_target = (int)target; - // TODO: if we separate rate targeting from Q targetting, move this. - // Reset the active worst quality to the baseline value for key frames. + /* TODO: if we separate rate targeting from Q targetting, move this. + * Reset the active worst quality to the baseline value for key frames. + */ if (cpi->pass != 2) cpi->active_worst_quality = cpi->worst_quality; @@ -439,9 +433,6 @@ static void calc_iframe_target_size(VP8_COMP *cpi) FILE *f; f = fopen("kf_boost.stt", "a"); - //fprintf(f, " %8d %10d %10d %10d %10d %10d %10d\n", - // cpi->common.current_video_frame, cpi->target_bandwidth, cpi->frames_to_key, kf_boost_qadjustment[cpi->ni_av_qi], cpi->kf_boost, (cpi->this_frame_target *100 / cpi->per_frame_bandwidth), cpi->this_frame_target ); - fprintf(f, " %8u %10d %10d %10d\n", cpi->common.current_video_frame, cpi->gfu_boost, cpi->baseline_gf_interval, cpi->source_alt_ref_pending); @@ -451,14 +442,15 @@ static void calc_iframe_target_size(VP8_COMP *cpi) } -// Do the best we can to define the parameters for the next GF based on what -// information we have available. +/* Do the best we can to define the parameters for the next GF based on what + * information we have available. + */ static void calc_gf_params(VP8_COMP *cpi) { int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; int Boost = 0; - int gf_frame_useage = 0; // Golden frame useage since last GF + int gf_frame_useage = 0; /* Golden frame useage since last GF */ int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] + cpi->recent_ref_frame_usage[LAST_FRAME] + cpi->recent_ref_frame_usage[GOLDEN_FRAME] + @@ -466,33 +458,30 @@ static void calc_gf_params(VP8_COMP *cpi) int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); - // Reset the last boost indicator - //cpi->last_boost = 100; - if (tot_mbs) gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs; if (pct_gf_active > gf_frame_useage) gf_frame_useage = pct_gf_active; - // Not two pass + /* Not two pass */ if (cpi->pass != 2) { - // Single Pass lagged mode: TBD + /* Single Pass lagged mode: TBD */ if (0) { } - // Single Pass compression: Has to use current and historical data + /* Single Pass compression: Has to use current and historical data */ else { #if 0 - // Experimental code + /* Experimental code */ int index = cpi->one_pass_frame_index; int frames_to_scan = (cpi->max_gf_interval <= MAX_LAG_BUFFERS) ? cpi->max_gf_interval : MAX_LAG_BUFFERS; + /* ************** Experimental code - incomplete */ /* - // *************** Experimental code - incomplete double decay_val = 1.0; double IIAccumulator = 0.0; double last_iiaccumulator = 0.0; @@ -535,48 +524,51 @@ static void calc_gf_params(VP8_COMP *cpi) #else /*************************************************************/ - // OLD code + /* OLD code */ - // Adjust boost based upon ambient Q + /* Adjust boost based upon ambient Q */ Boost = GFQ_ADJUSTMENT; - // Adjust based upon most recently measure intra useage + /* Adjust based upon most recently measure intra useage */ Boost = Boost * gf_intra_usage_adjustment[(cpi->this_frame_percent_intra < 15) ? cpi->this_frame_percent_intra : 14] / 100; - // Adjust gf boost based upon GF usage since last GF + /* Adjust gf boost based upon GF usage since last GF */ Boost = Boost * gf_adjust_table[gf_frame_useage] / 100; #endif } - // golden frame boost without recode loop often goes awry. be safe by keeping numbers down. + /* golden frame boost without recode loop often goes awry. be + * safe by keeping numbers down. + */ if (!cpi->sf.recode_loop) { if (cpi->compressor_speed == 2) Boost = Boost / 2; } - // Apply an upper limit based on Q for 1 pass encodes + /* Apply an upper limit based on Q for 1 pass encodes */ if (Boost > kf_gf_boost_qlimits[Q] && (cpi->pass == 0)) Boost = kf_gf_boost_qlimits[Q]; - // Apply lower limits to boost. + /* Apply lower limits to boost. */ else if (Boost < 110) Boost = 110; - // Note the boost used + /* Note the boost used */ cpi->last_boost = Boost; } - // Estimate next interval - // This is updated once the real frame size/boost is known. + /* Estimate next interval + * This is updated once the real frame size/boost is known. + */ if (cpi->oxcf.fixed_q == -1) { - if (cpi->pass == 2) // 2 Pass + if (cpi->pass == 2) /* 2 Pass */ { cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; } - else // 1 Pass + else /* 1 Pass */ { cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; @@ -602,10 +594,10 @@ static void calc_gf_params(VP8_COMP *cpi) else cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; - // ARF on or off + /* ARF on or off */ if (cpi->pass != 2) { - // For now Alt ref is not allowed except in 2 pass modes. + /* For now Alt ref is not allowed except in 2 pass modes. */ cpi->source_alt_ref_pending = 0; /*if ( cpi->oxcf.fixed_q == -1) @@ -642,89 +634,34 @@ static void calc_pframe_target_size(VP8_COMP *cpi) min_frame_target = cpi->per_frame_bandwidth / 4; - // Special alt reference frame case + /* Special alt reference frame case */ if((cpi->common.refresh_alt_ref_frame) && (cpi->oxcf.number_of_layers == 1)) { if (cpi->pass == 2) { - cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame + /* Per frame bit target for the alt ref frame */ + cpi->per_frame_bandwidth = cpi->twopass.gf_bits; cpi->this_frame_target = cpi->per_frame_bandwidth; } /* One Pass ??? TBD */ - /*else - { - int frames_in_section; - int allocation_chunks; - int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; - int alt_boost; - int max_arf_rate; - - alt_boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100); - alt_boost += (cpi->frames_till_gf_update_due * 50); - - // If alt ref is not currently active then we have a pottential double hit with GF and ARF so reduce the boost a bit. - // A similar thing is done on GFs that preceed a arf update. - if ( !cpi->source_alt_ref_active ) - alt_boost = alt_boost * 3 / 4; - - frames_in_section = cpi->frames_till_gf_update_due+1; // Standard frames + GF - allocation_chunks = (frames_in_section * 100) + alt_boost; - - // Normalize Altboost and allocations chunck down to prevent overflow - while ( alt_boost > 1000 ) - { - alt_boost /= 2; - allocation_chunks /= 2; - } - - else - { - int bits_in_section; - - if ( cpi->kf_overspend_bits > 0 ) - { - Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits; - - if ( Adjustment > (cpi->per_frame_bandwidth - min_frame_target) ) - Adjustment = (cpi->per_frame_bandwidth - min_frame_target); - - cpi->kf_overspend_bits -= Adjustment; - - // Calculate an inter frame bandwidth target for the next few frames designed to recover - // any extra bits spent on the key frame. - cpi->inter_frame_target = cpi->per_frame_bandwidth - Adjustment; - if ( cpi->inter_frame_target < min_frame_target ) - cpi->inter_frame_target = min_frame_target; - } - else - cpi->inter_frame_target = cpi->per_frame_bandwidth; - - bits_in_section = cpi->inter_frame_target * frames_in_section; - - // Avoid loss of precision but avoid overflow - if ( (bits_in_section>>7) > allocation_chunks ) - cpi->this_frame_target = alt_boost * (bits_in_section / allocation_chunks); - else - cpi->this_frame_target = (alt_boost * bits_in_section) / allocation_chunks; - } - } - */ } - // Normal frames (gf,and inter) + /* Normal frames (gf,and inter) */ else { - // 2 pass + /* 2 pass */ if (cpi->pass == 2) { cpi->this_frame_target = cpi->per_frame_bandwidth; } - // 1 pass + /* 1 pass */ else { - // Make rate adjustment to recover bits spent in key frame - // Test to see if the key frame inter data rate correction should still be in force + /* Make rate adjustment to recover bits spent in key frame + * Test to see if the key frame inter data rate correction + * should still be in force + */ if (cpi->kf_overspend_bits > 0) { Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits; @@ -734,8 +671,10 @@ static void calc_pframe_target_size(VP8_COMP *cpi) cpi->kf_overspend_bits -= Adjustment; - // Calculate an inter frame bandwidth target for the next few frames designed to recover - // any extra bits spent on the key frame. + /* Calculate an inter frame bandwidth target for the next + * few frames designed to recover any extra bits spent on + * the key frame. + */ cpi->this_frame_target = cpi->per_frame_bandwidth - Adjustment; if (cpi->this_frame_target < min_frame_target) @@ -744,7 +683,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi) else cpi->this_frame_target = cpi->per_frame_bandwidth; - // If appropriate make an adjustment to recover bits spent on a recent GF + /* If appropriate make an adjustment to recover bits spent on a + * recent GF + */ if ((cpi->gf_overspend_bits > 0) && (cpi->this_frame_target > min_frame_target)) { int Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits; @@ -756,11 +697,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi) cpi->this_frame_target -= Adjustment; } - // Apply small + and - boosts for non gf frames + /* Apply small + and - boosts for non gf frames */ if ((cpi->last_boost > 150) && (cpi->frames_till_gf_update_due > 0) && (cpi->current_gf_interval >= (MIN_GF_INTERVAL << 1))) { - // % Adjustment limited to the range 1% to 10% + /* % Adjustment limited to the range 1% to 10% */ Adjustment = (cpi->last_boost - 100) >> 5; if (Adjustment < 1) @@ -768,7 +709,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) else if (Adjustment > 10) Adjustment = 10; - // Convert to bits + /* Convert to bits */ Adjustment = (cpi->this_frame_target * Adjustment) / 100; if (Adjustment > (cpi->this_frame_target - min_frame_target)) @@ -782,47 +723,53 @@ static void calc_pframe_target_size(VP8_COMP *cpi) } } - // Sanity check that the total sum of adjustments is not above the maximum allowed - // That is that having allowed for KF and GF penalties we have not pushed the - // current interframe target to low. If the adjustment we apply here is not capable of recovering - // all the extra bits we have spent in the KF or GF then the remainder will have to be recovered over - // a longer time span via other buffer / rate control mechanisms. + /* Sanity check that the total sum of adjustments is not above the + * maximum allowed That is that having allowed for KF and GF penalties + * we have not pushed the current interframe target to low. If the + * adjustment we apply here is not capable of recovering all the extra + * bits we have spent in the KF or GF then the remainder will have to + * be recovered over a longer time span via other buffer / rate control + * mechanisms. + */ if (cpi->this_frame_target < min_frame_target) cpi->this_frame_target = min_frame_target; if (!cpi->common.refresh_alt_ref_frame) - // Note the baseline target data rate for this inter frame. + /* Note the baseline target data rate for this inter frame. */ cpi->inter_frame_target = cpi->this_frame_target; - // One Pass specific code + /* One Pass specific code */ if (cpi->pass == 0) { - // Adapt target frame size with respect to any buffering constraints: + /* Adapt target frame size with respect to any buffering constraints: */ if (cpi->buffered_mode) { - int one_percent_bits = 1 + cpi->oxcf.optimal_buffer_level / 100; + int one_percent_bits = (int) + (1 + cpi->oxcf.optimal_buffer_level / 100); if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) || (cpi->bits_off_target < cpi->oxcf.optimal_buffer_level)) { int percent_low = 0; - // Decide whether or not we need to adjust the frame data rate target. - // - // If we are are below the optimal buffer fullness level and adherence - // to buffering constraints is important to the end usage then adjust - // the per frame target. + /* Decide whether or not we need to adjust the frame data + * rate target. + * + * If we are are below the optimal buffer fullness level + * and adherence to buffering constraints is important to + * the end usage then adjust the per frame target. + */ if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level < cpi->oxcf.optimal_buffer_level)) { - percent_low = - (cpi->oxcf.optimal_buffer_level - cpi->buffer_level) / - one_percent_bits; + percent_low = (int) + ((cpi->oxcf.optimal_buffer_level - cpi->buffer_level) / + one_percent_bits); } - // Are we overshooting the long term clip data rate... + /* Are we overshooting the long term clip data rate... */ else if (cpi->bits_off_target < 0) { - // Adjust per frame data target downwards to compensate. + /* Adjust per frame data target downwards to compensate. */ percent_low = (int)(100 * -cpi->bits_off_target / (cpi->total_byte_count * 8)); } @@ -832,40 +779,46 @@ static void calc_pframe_target_size(VP8_COMP *cpi) else if (percent_low < 0) percent_low = 0; - // lower the target bandwidth for this frame. + /* lower the target bandwidth for this frame. */ cpi->this_frame_target -= (cpi->this_frame_target * percent_low) / 200; - // Are we using allowing control of active_worst_allowed_q - // according to buffer level. + /* Are we using allowing control of active_worst_allowed_q + * according to buffer level. + */ if (cpi->auto_worst_q && cpi->ni_frames > 150) { - int critical_buffer_level; - - // For streaming applications the most important factor is - // cpi->buffer_level as this takes into account the - // specified short term buffering constraints. However, - // hitting the long term clip data rate target is also - // important. + int64_t critical_buffer_level; + + /* For streaming applications the most important factor is + * cpi->buffer_level as this takes into account the + * specified short term buffering constraints. However, + * hitting the long term clip data rate target is also + * important. + */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - // Take the smaller of cpi->buffer_level and - // cpi->bits_off_target + /* Take the smaller of cpi->buffer_level and + * cpi->bits_off_target + */ critical_buffer_level = (cpi->buffer_level < cpi->bits_off_target) ? cpi->buffer_level : cpi->bits_off_target; } - // For local file playback short term buffering constraints - // are less of an issue + /* For local file playback short term buffering constraints + * are less of an issue + */ else { - // Consider only how we are doing for the clip as a - // whole + /* Consider only how we are doing for the clip as a + * whole + */ critical_buffer_level = cpi->bits_off_target; } - // Set the active worst quality based upon the selected - // buffer fullness number. + /* Set the active worst quality based upon the selected + * buffer fullness number. + */ if (critical_buffer_level < cpi->oxcf.optimal_buffer_level) { if ( critical_buffer_level > @@ -877,15 +830,16 @@ static void calc_pframe_target_size(VP8_COMP *cpi) (critical_buffer_level - (cpi->oxcf.optimal_buffer_level >> 2)); - // Step active worst quality down from - // cpi->ni_av_qi when (critical_buffer_level == - // cpi->optimal_buffer_level) to - // cpi->worst_quality when - // (critical_buffer_level == - // cpi->optimal_buffer_level >> 2) + /* Step active worst quality down from + * cpi->ni_av_qi when (critical_buffer_level == + * cpi->optimal_buffer_level) to + * cpi->worst_quality when + * (critical_buffer_level == + * cpi->optimal_buffer_level >> 2) + */ cpi->active_worst_quality = cpi->worst_quality - - ((qadjustment_range * above_base) / + (int)((qadjustment_range * above_base) / (cpi->oxcf.optimal_buffer_level*3>>2)); } else @@ -910,9 +864,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi) if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level > cpi->oxcf.optimal_buffer_level)) { - percent_high = (cpi->buffer_level + percent_high = (int)((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) - / one_percent_bits; + / one_percent_bits); } else if (cpi->bits_off_target > cpi->oxcf.optimal_buffer_level) { @@ -928,11 +882,14 @@ static void calc_pframe_target_size(VP8_COMP *cpi) cpi->this_frame_target += (cpi->this_frame_target * percent_high) / 200; - // Are we allowing control of active_worst_allowed_q according - // to buffer level. + /* Are we allowing control of active_worst_allowed_q according + * to buffer level. + */ if (cpi->auto_worst_q && cpi->ni_frames > 150) { - // When using the relaxed buffer model stick to the user specified value + /* When using the relaxed buffer model stick to the + * user specified value + */ cpi->active_worst_quality = cpi->ni_av_qi; } else @@ -941,26 +898,27 @@ static void calc_pframe_target_size(VP8_COMP *cpi) } } - // Set active_best_quality to prevent quality rising too high + /* Set active_best_quality to prevent quality rising too high */ cpi->active_best_quality = cpi->best_quality; - // Worst quality obviously must not be better than best quality + /* Worst quality obviously must not be better than best quality */ if (cpi->active_worst_quality <= cpi->active_best_quality) cpi->active_worst_quality = cpi->active_best_quality + 1; if(cpi->active_worst_quality > 127) cpi->active_worst_quality = 127; } - // Unbuffered mode (eg. video conferencing) + /* Unbuffered mode (eg. video conferencing) */ else { - // Set the active worst quality + /* Set the active worst quality */ cpi->active_worst_quality = cpi->worst_quality; } - // Special trap for constrained quality mode - // "active_worst_quality" may never drop below cq level - // for any frame type. + /* Special trap for constrained quality mode + * "active_worst_quality" may never drop below cq level + * for any frame type. + */ if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && cpi->active_worst_quality < cpi->cq_target_quality) { @@ -968,16 +926,19 @@ static void calc_pframe_target_size(VP8_COMP *cpi) } } - // Test to see if we have to drop a frame - // The auto-drop frame code is only used in buffered mode. - // In unbufferd mode (eg vide conferencing) the descision to - // code or drop a frame is made outside the codec in response to real - // world comms or buffer considerations. - if (cpi->drop_frames_allowed && cpi->buffered_mode && + /* Test to see if we have to drop a frame + * The auto-drop frame code is only used in buffered mode. + * In unbufferd mode (eg vide conferencing) the descision to + * code or drop a frame is made outside the codec in response to real + * world comms or buffer considerations. + */ + if (cpi->drop_frames_allowed && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && - ((cpi->common.frame_type != KEY_FRAME))) //|| !cpi->oxcf.allow_spatial_resampling) ) + ((cpi->common.frame_type != KEY_FRAME))) { - // Check for a buffer underun-crisis in which case we have to drop a frame + /* Check for a buffer underun-crisis in which case we have to drop + * a frame + */ if ((cpi->buffer_level < 0)) { #if 0 @@ -988,41 +949,23 @@ static void calc_pframe_target_size(VP8_COMP *cpi) (cpi->buffer_level * 100) / cpi->oxcf.optimal_buffer_level); fclose(f); #endif - //vpx_log("Decoder: Drop frame due to bandwidth: %d \n",cpi->buffer_level, cpi->av_per_frame_bandwidth); - - cpi->drop_frame = 1; - } - -#if 0 - // Check for other drop frame crtieria (Note 2 pass cbr uses decimation on whole KF sections) - else if ((cpi->buffer_level < cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100) && - (cpi->drop_count < cpi->max_drop_count) && (cpi->pass == 0)) - { cpi->drop_frame = 1; - } - -#endif - if (cpi->drop_frame) - { - // Update the buffer level variable. + /* Update the buffer level variable. */ cpi->bits_off_target += cpi->av_per_frame_bandwidth; if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) - cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; + cpi->bits_off_target = (int)cpi->oxcf.maximum_buffer_size; cpi->buffer_level = cpi->bits_off_target; } - else - cpi->drop_count = 0; } - // Adjust target frame size for Golden Frames: + /* Adjust target frame size for Golden Frames: */ if (cpi->oxcf.error_resilient_mode == 0 && (cpi->frames_till_gf_update_due == 0) && !cpi->drop_frame) { - //int Boost = 0; int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; - int gf_frame_useage = 0; // Golden frame useage since last GF + int gf_frame_useage = 0; /* Golden frame useage since last GF */ int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] + cpi->recent_ref_frame_usage[LAST_FRAME] + cpi->recent_ref_frame_usage[GOLDEN_FRAME] + @@ -1030,30 +973,29 @@ static void calc_pframe_target_size(VP8_COMP *cpi) int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); - // Reset the last boost indicator - //cpi->last_boost = 100; - if (tot_mbs) gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs; if (pct_gf_active > gf_frame_useage) gf_frame_useage = pct_gf_active; - // Is a fixed manual GF frequency being used + /* Is a fixed manual GF frequency being used */ if (cpi->auto_gold) { - // For one pass throw a GF if recent frame intra useage is low or the GF useage is high + /* For one pass throw a GF if recent frame intra useage is + * low or the GF useage is high + */ if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5)) cpi->common.refresh_golden_frame = 1; - // Two pass GF descision + /* Two pass GF descision */ else if (cpi->pass == 2) cpi->common.refresh_golden_frame = 1; } #if 0 - // Debug stats + /* Debug stats */ if (0) { FILE *f; @@ -1070,7 +1012,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) { #if 0 - if (0) // p_gw + if (0) { FILE *f; @@ -1086,16 +1028,20 @@ static void calc_pframe_target_size(VP8_COMP *cpi) calc_gf_params(cpi); } - // If we are using alternate ref instead of gf then do not apply the boost - // It will instead be applied to the altref update - // Jims modified boost + /* If we are using alternate ref instead of gf then do not apply the + * boost It will instead be applied to the altref update Jims + * modified boost + */ if (!cpi->source_alt_ref_active) { if (cpi->oxcf.fixed_q < 0) { if (cpi->pass == 2) { - cpi->this_frame_target = cpi->per_frame_bandwidth; // The spend on the GF is defined in the two pass code for two pass encodes + /* The spend on the GF is defined in the two pass + * code for two pass encodes + */ + cpi->this_frame_target = cpi->per_frame_bandwidth; } else { @@ -1104,14 +1050,16 @@ static void calc_pframe_target_size(VP8_COMP *cpi) int allocation_chunks = (frames_in_section * 100) + (Boost - 100); int bits_in_section = cpi->inter_frame_target * frames_in_section; - // Normalize Altboost and allocations chunck down to prevent overflow + /* Normalize Altboost and allocations chunck down to + * prevent overflow + */ while (Boost > 1000) { Boost /= 2; allocation_chunks /= 2; } - // Avoid loss of precision but avoid overflow + /* Avoid loss of precision but avoid overflow */ if ((bits_in_section >> 7) > allocation_chunks) cpi->this_frame_target = Boost * (bits_in_section / allocation_chunks); else @@ -1124,10 +1072,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi) * cpi->last_boost) / 100; } - // If there is an active ARF at this location use the minimum - // bits on this frame even if it is a contructed arf. - // The active maximum quantizer insures that an appropriate - // number of bits will be spent if needed for contstructed ARFs. + /* If there is an active ARF at this location use the minimum + * bits on this frame even if it is a contructed arf. + * The active maximum quantizer insures that an appropriate + * number of bits will be spent if needed for contstructed ARFs. + */ else { cpi->this_frame_target = 0; @@ -1151,8 +1100,8 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) int projected_size_based_on_q = 0; - // Clear down mmx registers to allow floating point in what follows - vp8_clear_system_state(); //__asm emms; + /* Clear down mmx registers to allow floating point in what follows */ + vp8_clear_system_state(); if (cpi->common.frame_type == KEY_FRAME) { @@ -1160,23 +1109,26 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) } else { - if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) + if (cpi->oxcf.number_of_layers == 1 && + (cpi->common.refresh_alt_ref_frame || + cpi->common.refresh_golden_frame)) rate_correction_factor = cpi->gf_rate_correction_factor; else rate_correction_factor = cpi->rate_correction_factor; } - // Work out how big we would have expected the frame to be at this Q given the current correction factor. - // Stay in double to avoid int overflow when values are large - //projected_size_based_on_q = ((int)(.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) >> BPER_MB_NORMBITS; + /* Work out how big we would have expected the frame to be at this Q + * given the current correction factor. Stay in double to avoid int + * overflow when values are large + */ projected_size_based_on_q = (int)(((.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) / (1 << BPER_MB_NORMBITS)); - // Make some allowance for cpi->zbin_over_quant - if (cpi->zbin_over_quant > 0) + /* Make some allowance for cpi->zbin_over_quant */ + if (cpi->mb.zbin_over_quant > 0) { - int Z = cpi->zbin_over_quant; + int Z = cpi->mb.zbin_over_quant; double Factor = 0.99; - double factor_adjustment = 0.01 / 256.0; //(double)ZBIN_OQ_MAX; + double factor_adjustment = 0.01 / 256.0; while (Z > 0) { @@ -1190,13 +1142,13 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) } } - // Work out a size correction factor. - //if ( cpi->this_frame_target > 0 ) - // correction_factor = (100 * cpi->projected_frame_size) / cpi->this_frame_target; + /* Work out a size correction factor. */ if (projected_size_based_on_q > 0) correction_factor = (100 * cpi->projected_frame_size) / projected_size_based_on_q; - // More heavily damped adjustment used if we have been oscillating either side of target + /* More heavily damped adjustment used if we have been oscillating + * either side of target + */ switch (damp_var) { case 0: @@ -1211,25 +1163,23 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) break; } - //if ( (correction_factor > 102) && (Q < cpi->active_worst_quality) ) if (correction_factor > 102) { - // We are not already at the worst allowable quality + /* We are not already at the worst allowable quality */ correction_factor = (int)(100.5 + ((correction_factor - 100) * adjustment_limit)); rate_correction_factor = ((rate_correction_factor * correction_factor) / 100); - // Keep rate_correction_factor within limits + /* Keep rate_correction_factor within limits */ if (rate_correction_factor > MAX_BPB_FACTOR) rate_correction_factor = MAX_BPB_FACTOR; } - //else if ( (correction_factor < 99) && (Q > cpi->active_best_quality) ) else if (correction_factor < 99) { - // We are not already at the best allowable quality + /* We are not already at the best allowable quality */ correction_factor = (int)(100.5 - ((100 - correction_factor) * adjustment_limit)); rate_correction_factor = ((rate_correction_factor * correction_factor) / 100); - // Keep rate_correction_factor within limits + /* Keep rate_correction_factor within limits */ if (rate_correction_factor < MIN_BPB_FACTOR) rate_correction_factor = MIN_BPB_FACTOR; } @@ -1238,7 +1188,9 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) cpi->key_frame_rate_correction_factor = rate_correction_factor; else { - if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) + if (cpi->oxcf.number_of_layers == 1 && + (cpi->common.refresh_alt_ref_frame || + cpi->common.refresh_golden_frame)) cpi->gf_rate_correction_factor = rate_correction_factor; else cpi->rate_correction_factor = rate_correction_factor; @@ -1250,8 +1202,8 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) { int Q = cpi->active_worst_quality; - // Reset Zbin OQ value - cpi->zbin_over_quant = 0; + /* Reset Zbin OQ value */ + cpi->mb.zbin_over_quant = 0; if (cpi->oxcf.fixed_q >= 0) { @@ -1261,11 +1213,13 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) { Q = cpi->oxcf.key_q; } - else if (cpi->common.refresh_alt_ref_frame) + else if (cpi->oxcf.number_of_layers == 1 && + cpi->common.refresh_alt_ref_frame) { Q = cpi->oxcf.alt_q; } - else if (cpi->common.refresh_golden_frame) + else if (cpi->oxcf.number_of_layers == 1 && + cpi->common.refresh_golden_frame) { Q = cpi->oxcf.gold_q; } @@ -1279,20 +1233,25 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) int bits_per_mb_at_this_q; double correction_factor; - // Select the appropriate correction factor based upon type of frame. + /* Select the appropriate correction factor based upon type of frame. */ if (cpi->common.frame_type == KEY_FRAME) correction_factor = cpi->key_frame_rate_correction_factor; else { - if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) + if (cpi->oxcf.number_of_layers == 1 && + (cpi->common.refresh_alt_ref_frame || + cpi->common.refresh_golden_frame)) correction_factor = cpi->gf_rate_correction_factor; else correction_factor = cpi->rate_correction_factor; } - // Calculate required scaling factor based on target frame size and size of frame produced using previous Q + /* Calculate required scaling factor based on target frame size and + * size of frame produced using previous Q + */ if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS)) - target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS; // Case where we would overflow int + /* Case where we would overflow int */ + target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS; else target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs; @@ -1317,18 +1276,23 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) while (++i <= cpi->active_worst_quality); - // If we are at MAXQ then enable Q over-run which seeks to claw back additional bits through things like - // the RD multiplier and zero bin size. + /* If we are at MAXQ then enable Q over-run which seeks to claw + * back additional bits through things like the RD multiplier + * and zero bin size. + */ if (Q >= MAXQ) { int zbin_oqmax; double Factor = 0.99; - double factor_adjustment = 0.01 / 256.0; //(double)ZBIN_OQ_MAX; + double factor_adjustment = 0.01 / 256.0; if (cpi->common.frame_type == KEY_FRAME) - zbin_oqmax = 0; //ZBIN_OQ_MAX/16 - else if (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active)) + zbin_oqmax = 0; + else if (cpi->oxcf.number_of_layers == 1 && + (cpi->common.refresh_alt_ref_frame || + (cpi->common.refresh_golden_frame && + !cpi->source_alt_ref_active))) zbin_oqmax = 16; else zbin_oqmax = ZBIN_OQ_MAX; @@ -1347,25 +1311,29 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) cpi->zbin_over_quant = (int)Oq; }*/ - // Each incrment in the zbin is assumed to have a fixed effect on bitrate. This is not of course true. - // The effect will be highly clip dependent and may well have sudden steps. - // The idea here is to acheive higher effective quantizers than the normal maximum by expanding the zero - // bin and hence decreasing the number of low magnitude non zero coefficients. - while (cpi->zbin_over_quant < zbin_oqmax) + /* Each incrment in the zbin is assumed to have a fixed effect + * on bitrate. This is not of course true. The effect will be + * highly clip dependent and may well have sudden steps. The + * idea here is to acheive higher effective quantizers than the + * normal maximum by expanding the zero bin and hence + * decreasing the number of low magnitude non zero coefficients. + */ + while (cpi->mb.zbin_over_quant < zbin_oqmax) { - cpi->zbin_over_quant ++; + cpi->mb.zbin_over_quant ++; - if (cpi->zbin_over_quant > zbin_oqmax) - cpi->zbin_over_quant = zbin_oqmax; + if (cpi->mb.zbin_over_quant > zbin_oqmax) + cpi->mb.zbin_over_quant = zbin_oqmax; - // Adjust bits_per_mb_at_this_q estimate + /* Adjust bits_per_mb_at_this_q estimate */ bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q); Factor += factor_adjustment; if (Factor >= 0.999) Factor = 0.999; - if (bits_per_mb_at_this_q <= target_bits_per_mb) // Break out if we get down to the target rate + /* Break out if we get down to the target rate */ + if (bits_per_mb_at_this_q <= target_bits_per_mb) break; } @@ -1380,7 +1348,7 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) { int i; - // Average key frame frequency + /* Average key frame frequency */ int av_key_frame_frequency = 0; /* First key frame at start of sequence is a special case. We have no @@ -1431,11 +1399,11 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) void vp8_adjust_key_frame_context(VP8_COMP *cpi) { - // Clear down mmx registers to allow floating point in what follows + /* Clear down mmx registers to allow floating point in what follows */ vp8_clear_system_state(); - // Do we have any key frame overspend to recover? - // Two-pass overspend handled elsewhere. + /* Do we have any key frame overspend to recover? */ + /* Two-pass overspend handled elsewhere. */ if ((cpi->pass != 2) && (cpi->projected_frame_size > cpi->per_frame_bandwidth)) { @@ -1469,10 +1437,12 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi) void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit) { - // Set-up bounds on acceptable frame size: + /* Set-up bounds on acceptable frame size: */ if (cpi->oxcf.fixed_q >= 0) { - // Fixed Q scenario: frame size never outranges target (there is no target!) + /* Fixed Q scenario: frame size never outranges target + * (there is no target!) + */ *frame_under_shoot_limit = 0; *frame_over_shoot_limit = INT_MAX; } @@ -1494,18 +1464,22 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, } else { - // For CBR take buffer fullness into account + /* For CBR take buffer fullness into account */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { if (cpi->buffer_level >= ((cpi->oxcf.optimal_buffer_level + cpi->oxcf.maximum_buffer_size) >> 1)) { - // Buffer is too full so relax overshoot and tighten undershoot + /* Buffer is too full so relax overshoot and tighten + * undershoot + */ *frame_over_shoot_limit = cpi->this_frame_target * 12 / 8; *frame_under_shoot_limit = cpi->this_frame_target * 6 / 8; } else if (cpi->buffer_level <= (cpi->oxcf.optimal_buffer_level >> 1)) { - // Buffer is too low so relax undershoot and tighten overshoot + /* Buffer is too low so relax undershoot and tighten + * overshoot + */ *frame_over_shoot_limit = cpi->this_frame_target * 10 / 8; *frame_under_shoot_limit = cpi->this_frame_target * 4 / 8; } @@ -1515,11 +1489,13 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; } } - // VBR and CQ mode - // Note that tighter restrictions here can help quality but hurt encode speed + /* VBR and CQ mode */ + /* Note that tighter restrictions here can help quality + * but hurt encode speed + */ else { - // Stron overshoot limit for constrained quality + /* Stron overshoot limit for constrained quality */ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; @@ -1534,9 +1510,10 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, } } - // For very small rate targets where the fractional adjustment - // (eg * 7/8) may be tiny make sure there is at least a minimum - // range. + /* For very small rate targets where the fractional adjustment + * (eg * 7/8) may be tiny make sure there is at least a minimum + * range. + */ *frame_over_shoot_limit += 200; *frame_under_shoot_limit -= 200; if ( *frame_under_shoot_limit < 0 ) @@ -1546,7 +1523,7 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, } -// return of 0 means drop frame +/* return of 0 means drop frame */ int vp8_pick_frame_size(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; @@ -1557,11 +1534,10 @@ int vp8_pick_frame_size(VP8_COMP *cpi) { calc_pframe_target_size(cpi); - // Check if we're dropping the frame: + /* Check if we're dropping the frame: */ if (cpi->drop_frame) { cpi->drop_frame = 0; - cpi->drop_count++; return 0; } } diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h index d4f7796..c43f08d 100644 --- a/vp8/encoder/ratectrl.h +++ b/vp8/encoder/ratectrl.h @@ -22,7 +22,7 @@ extern int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame); extern void vp8_adjust_key_frame_context(VP8_COMP *cpi); extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit); -// return of 0 means drop frame +/* return of 0 means drop frame */ extern int vp8_pick_frame_size(VP8_COMP *cpi); #endif diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 2b706ba..ceb817c 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -21,6 +21,7 @@ #include "onyx_int.h" #include "modecosts.h" #include "encodeintra.h" +#include "pickinter.h" #include "vp8/common/entropymode.h" #include "vp8/common/reconinter.h" #include "vp8/common/reconintra4x4.h" @@ -36,7 +37,6 @@ #if CONFIG_TEMPORAL_DENOISING #include "denoising.h" #endif - extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x); #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) @@ -149,8 +149,8 @@ const int vp8_ref_frame_order[MAX_MODES] = }; static void fill_token_costs( - unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS], - const vp8_prob p [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] + int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], + const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES] ) { int i, j, k; @@ -159,21 +159,26 @@ static void fill_token_costs( for (i = 0; i < BLOCK_TYPES; i++) for (j = 0; j < COEF_BANDS; j++) for (k = 0; k < PREV_COEF_CONTEXTS; k++) - // check for pt=0 and band > 1 if block type 0 and 0 if blocktype 1 - if(k==0 && j>(i==0) ) - vp8_cost_tokens2((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree,2); + + /* check for pt=0 and band > 1 if block type 0 + * and 0 if blocktype 1 + */ + if (k == 0 && j > (i == 0)) + vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2); else - vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree); + vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree); } -static int rd_iifactor [ 32 ] = { 4, 4, 3, 2, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }; +static const int rd_iifactor[32] = +{ + 4, 4, 3, 2, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; /* values are now correlated to quantizer */ -static int sad_per_bit16lut[QINDEX_RANGE] = +static const int sad_per_bit16lut[QINDEX_RANGE] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -192,7 +197,7 @@ static int sad_per_bit16lut[QINDEX_RANGE] = 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14 }; -static int sad_per_bit4lut[QINDEX_RANGE] = +static const int sad_per_bit4lut[QINDEX_RANGE] = { 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -218,30 +223,30 @@ void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]; } -void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) +void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) { int q; int i; double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0; double rdconst = 2.80; - vp8_clear_system_state(); //__asm emms; + vp8_clear_system_state(); - // Further tests required to see if optimum is different - // for key frames, golden frames and arf frames. - // if (cpi->common.refresh_golden_frame || - // cpi->common.refresh_alt_ref_frame) + /* Further tests required to see if optimum is different + * for key frames, golden frames and arf frames. + */ cpi->RDMULT = (int)(rdconst * (capped_q * capped_q)); - // Extend rate multiplier along side quantizer zbin increases - if (cpi->zbin_over_quant > 0) + /* Extend rate multiplier along side quantizer zbin increases */ + if (cpi->mb.zbin_over_quant > 0) { double oq_factor; double modq; - // Experimental code using the same basic equation as used for Q above - // The units of cpi->zbin_over_quant are 1/128 of Q bin size - oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant); + /* Experimental code using the same basic equation as used for Q above + * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size + */ + oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant); modq = (int)((double)capped_q * oq_factor); cpi->RDMULT = (int)(rdconst * (modq * modq)); } @@ -260,6 +265,11 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) vp8_set_speed_features(cpi); + for (i = 0; i < MAX_MODES; i++) + { + x->mode_test_hit_counts[i] = 0; + } + q = (int)pow(Qvalue, 1.25); if (q < 8) @@ -274,14 +284,14 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) { if (cpi->sf.thresh_mult[i] < INT_MAX) { - cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; + x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; } else { - cpi->rd_threshes[i] = INT_MAX; + x->rd_threshes[i] = INT_MAX; } - cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; + cpi->rd_baseline_thresh[i] = x->rd_threshes[i]; } } else @@ -292,19 +302,19 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) { if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) { - cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; + x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; } else { - cpi->rd_threshes[i] = INT_MAX; + x->rd_threshes[i] = INT_MAX; } - cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; + cpi->rd_baseline_thresh[i] = x->rd_threshes[i]; } } { - // build token cost array for the type of frame we have now + /* build token cost array for the type of frame we have now */ FRAME_CONTEXT *l = &cpi->lfc_n; if(cpi->common.refresh_alt_ref_frame) @@ -323,12 +333,8 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) */ - // TODO make these mode costs depend on last,alt or gold too. (jbb) + /* TODO make these mode costs depend on last,alt or gold too. (jbb) */ vp8_init_mode_costs(cpi); - - // TODO figure onnnnuut why making mv cost frame type dependent didn't help (jbb) - //vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) l->mvc, flags); - } } @@ -353,14 +359,6 @@ void vp8_auto_select_speed(VP8_COMP *cpi) #endif - /* - // this is done during parameter valid check - if( cpi->oxcf.cpu_used > 16) - cpi->oxcf.cpu_used = 16; - if( cpi->oxcf.cpu_used < -16) - cpi->oxcf.cpu_used = -16; - */ - if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress) { if (cpi->avg_pick_mode_time == 0) @@ -387,10 +385,10 @@ void vp8_auto_select_speed(VP8_COMP *cpi) cpi->avg_pick_mode_time = 0; cpi->avg_encode_time = 0; - // In real-time mode, cpi->speed is in [4, 16]. - if (cpi->Speed < 4) //if ( cpi->Speed < 0 ) + /* In real-time mode, cpi->speed is in [4, 16]. */ + if (cpi->Speed < 4) { - cpi->Speed = 4; //cpi->Speed = 0; + cpi->Speed = 4; } } } @@ -546,7 +544,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, if (c < 16) cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN]; - pt = (c != !type); // is eob first coefficient; + pt = (c != !type); /* is eob first coefficient; */ *a = *l = pt; return cost; @@ -592,7 +590,7 @@ static void macro_block_yrd( MACROBLOCK *mb, vp8_subtract_mby( mb->src_diff, *(mb->block[0].base_src), mb->block[0].src_stride, mb->e_mbd.predictor, 16); - // Fdct and building the 2nd order block + /* Fdct and building the 2nd order block */ for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) { mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32); @@ -600,25 +598,25 @@ static void macro_block_yrd( MACROBLOCK *mb, *Y2DCPtr++ = beptr->coeff[16]; } - // 2nd order fdct + /* 2nd order fdct */ mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8); - // Quantization + /* Quantization */ for (b = 0; b < 16; b++) { mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]); } - // DC predication and Quantization of 2nd Order block + /* DC predication and Quantization of 2nd Order block */ mb->quantize_b(mb_y2, x_y2); - // Distortion + /* Distortion */ d = vp8_mbblock_error(mb, 1) << 2; d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff); *Distortion = (d >> 4); - // rate + /* rate */ *Rate = vp8_rdcost_mby(mb); } @@ -632,12 +630,11 @@ static void copy_predictor(unsigned char *dst, const unsigned char *predictor) d[12] = p[12]; } static int rd_pick_intra4x4block( - VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be, BLOCKD *b, B_PREDICTION_MODE *best_mode, - unsigned int *bmode_costs, + const int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, @@ -660,7 +657,11 @@ static int rd_pick_intra4x4block( DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16*4); DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16); int dst_stride = x->e_mbd.dst.y_stride; - unsigned char *base_dst = x->e_mbd.dst.y_buffer; + unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; + + unsigned char *Above = dst - dst_stride; + unsigned char *yleft = dst - 1; + unsigned char top_left = Above[-1]; for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++) { @@ -669,8 +670,8 @@ static int rd_pick_intra4x4block( rate = bmode_costs[mode]; - vp8_intra4x4_predict(base_dst + b->offset, dst_stride, mode, - b->predictor, 16); + vp8_intra4x4_predict(Above, yleft, dst_stride, mode, + b->predictor, 16, top_left); vp8_subtract_b(be, b, 16); x->short_fdct4x4(be->src_diff, be->coeff, 32); x->quantize_b(be, b); @@ -697,15 +698,14 @@ static int rd_pick_intra4x4block( vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); } } - b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode); + b->bmi.as_mode = *best_mode; - vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, base_dst + b->offset, - dst_stride); + vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride); return best_rd; } -static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, +static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y, int *Distortion, int best_rd) { MACROBLOCKD *const xd = &mb->e_mbd; @@ -717,7 +717,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; - unsigned int *bmode_costs; + const int *bmode_costs; vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); @@ -745,7 +745,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, } total_rd += rd_pick_intra4x4block( - cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs, + mb, mb->block + i, xd->block + i, &best_mode, bmode_costs, ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d); @@ -770,8 +770,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, } -static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi, - MACROBLOCK *x, +static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y, int *Distortion) @@ -784,7 +783,7 @@ static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi, int this_rd; MACROBLOCKD *xd = &x->e_mbd; - //Y Search for 16x16 intra prediction mode + /* Y Search for 16x16 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { xd->mode_info_context->mbmi.mode = mode; @@ -873,7 +872,8 @@ static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion) +static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate, + int *rate_tokenonly, int *distortion) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); @@ -981,8 +981,9 @@ static int labels2mode( m = ABOVE4X4; else { - // the only time we should do costing for new motion vector or mode - // is when we are on a new label (jbb May 08, 2007) + /* the only time we should do costing for new motion vector + * or mode is when we are on a new label (jbb May 08, 2007) + */ switch (m = this_mode) { case NEW4X4 : @@ -1001,7 +1002,7 @@ static int labels2mode( break; } - if (m == ABOVE4X4) // replace above with left if same + if (m == ABOVE4X4) /* replace above with left if same */ { int_mv left_mv; @@ -1062,9 +1063,6 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict); vp8_subtract_b(be, bd, 16); x->short_fdct4x4(be->src_diff, be->coeff, 32); - - // set to 0 no way to account for 2nd order DC so discount - //be->coeff[0] = 0; x->quantize_b(be, bd); distortion += vp8_block_error(be->coeff, bd->dqcoeff); @@ -1095,8 +1093,8 @@ typedef struct int mvthresh; int *mdcounts; - int_mv sv_mvp[4]; // save 4 mvp from 8x8 - int sv_istep[2]; // save 2 initial step_param for 16x8/8x16 + int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */ + int sv_istep[2]; /* save 2 initial step_param for 16x8/8x16 */ } BEST_SEG_INFO; @@ -1143,13 +1141,13 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, labels = vp8_mbsplits[segmentation]; label_count = vp8_mbsplit_count[segmentation]; - // 64 makes this threshold really big effectively - // making it so that we very rarely check mvs on - // segments. setting this to 1 would make mv thresh - // roughly equal to what it is for macroblocks + /* 64 makes this threshold really big effectively making it so that we + * very rarely check mvs on segments. setting this to 1 would make mv + * thresh roughly equal to what it is for macroblocks + */ label_mv_thresh = 1 * bsi->mvthresh / label_count ; - // Segmentation method overheads + /* Segmentation method overheads */ rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation); rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts); this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); @@ -1162,7 +1160,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, B_PREDICTION_MODE mode_selected = ZERO4X4; int bestlabelyrate = 0; - // search for the best motion vector on this segment + /* search for the best motion vector on this segment */ for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++) { int this_rd; @@ -1191,7 +1189,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *c; BLOCKD *e; - // Is the best so far sufficiently good that we cant justify doing and new motion search. + /* Is the best so far sufficiently good that we cant justify + * doing a new motion search. + */ if (best_label_rd < label_mv_thresh) break; @@ -1206,7 +1206,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, step_param = bsi->sv_istep[i]; } - // use previous block's result as next block's MV predictor. + /* use previous block's result as next block's MV + * predictor. + */ if (segmentation == BLOCK_4X4 && i>0) { bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int; @@ -1225,7 +1227,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3; mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3; - // find first label + /* find first label */ n = vp8_mbsplit_offset[segmentation][i]; c = &x->block[n]; @@ -1265,7 +1267,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, sseshift = segmentation_to_sseshift[segmentation]; - // Should we do a full search (best quality only) + /* Should we do a full search (best quality only) */ if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { /* Check if mvp_full is within the range. */ @@ -1282,7 +1284,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, } else { - // The full search result is actually worse so re-instate the previous best vector + /* The full search result is actually worse so + * re-instate the previous best vector + */ e->bmi.mv.as_int = mode_mv[NEW4X4].as_int; } } @@ -1302,7 +1306,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], bsi->ref_mv, x->mvcost); - // Trap vectors that reach beyond the UMV borders + /* Trap vectors that reach beyond the UMV borders */ if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { @@ -1354,7 +1358,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, bsi->segment_rd = this_segment_rd; bsi->segment_num = segmentation; - // store everything needed to come back to this!! + /* store everything needed to come back to this!! */ for (i = 0; i < 16; i++) { bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; @@ -1516,7 +1520,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, return bsi.segment_rd; } -//The improved MV prediction +/* The improved MV prediction */ void vp8_mv_pred ( VP8_COMP *cpi, @@ -1550,7 +1554,9 @@ void vp8_mv_pred near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0; near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0; - // read in 3 nearby block's MVs from current frame as prediction candidates. + /* read in 3 nearby block's MVs from current frame as prediction + * candidates. + */ if (above->mbmi.ref_frame != INTRA_FRAME) { near_mvs[vcnt].as_int = above->mbmi.mv.as_int; @@ -1573,12 +1579,12 @@ void vp8_mv_pred } vcnt++; - // read in 5 nearby block's MVs from last frame. + /* read in 5 nearby block's MVs from last frame. */ if(cpi->common.last_frame_type != KEY_FRAME) { mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ; - // current in last frame + /* current in last frame */ if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) { near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int; @@ -1587,7 +1593,7 @@ void vp8_mv_pred } vcnt++; - // above in last frame + /* above in last frame */ if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME) { near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int; @@ -1596,7 +1602,7 @@ void vp8_mv_pred } vcnt++; - // left in last frame + /* left in last frame */ if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME) { near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int; @@ -1605,7 +1611,7 @@ void vp8_mv_pred } vcnt++; - // right in last frame + /* right in last frame */ if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME) { near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int; @@ -1614,7 +1620,7 @@ void vp8_mv_pred } vcnt++; - // below in last frame + /* below in last frame */ if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME) { near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int; @@ -1655,7 +1661,9 @@ void vp8_mv_pred mv.as_mv.col = mvy[vcnt/2]; find = 1; - //sr is set to 0 to allow calling function to decide the search range. + /* sr is set to 0 to allow calling function to decide the search + * range. + */ *sr = 0; } } @@ -1667,33 +1675,36 @@ void vp8_mv_pred void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]) { - - int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below + /* near_sad indexes: + * 0-cf above, 1-cf left, 2-cf aboveleft, + * 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below + */ + int near_sad[8] = {0}; BLOCK *b = &x->block[0]; unsigned char *src_y_ptr = *(b->base_src); - //calculate sad for current frame 3 nearby MBs. + /* calculate sad for current frame 3 nearby MBs. */ if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0) { near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX; }else if(xd->mb_to_top_edge==0) - { //only has left MB for sad calculation. + { /* only has left MB for sad calculation. */ near_sad[0] = near_sad[2] = INT_MAX; - near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff); + near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX); }else if(xd->mb_to_left_edge ==0) - { //only has left MB for sad calculation. + { /* only has left MB for sad calculation. */ near_sad[1] = near_sad[2] = INT_MAX; - near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff); + near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX); }else { - near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff); - near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff); - near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff); + near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX); + near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX); + near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX); } if(cpi->common.last_frame_type != KEY_FRAME) { - //calculate sad for last frame 5 nearby MBs. + /* calculate sad for last frame 5 nearby MBs. */ unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset; int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride; @@ -1703,14 +1714,14 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX; if(near_sad[4] != INT_MAX) - near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff); + near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX); if(near_sad[5] != INT_MAX) - near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff); - near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, 0x7fffffff); + near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX); + near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX); if(near_sad[6] != INT_MAX) - near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff); + near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX); if(near_sad[7] != INT_MAX) - near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff); + near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX); } if(cpi->common.last_frame_type != KEY_FRAME) @@ -1732,18 +1743,18 @@ static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) { if (x->partition_info->bmi[i].mode == NEW4X4) { - cpi->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row + x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row - best_ref_mv->as_mv.row) >> 1)]++; - cpi->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col + x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col - best_ref_mv->as_mv.col) >> 1)]++; } } } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) { - cpi->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row + x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row - best_ref_mv->as_mv.row) >> 1)]++; - cpi->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col + x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col - best_ref_mv->as_mv.col) >> 1)]++; } } @@ -1766,7 +1777,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4], { unsigned int sse; unsigned int var; - int threshold = (xd->block[0].dequant[1] + unsigned int threshold = (xd->block[0].dequant[1] * xd->block[0].dequant[1] >>4); if(threshold < x->encode_breakout) @@ -1784,8 +1795,8 @@ static int evaluate_inter_mode_rd(int mdcounts[4], if ((sse - var < q2dc * q2dc >>4) || (sse /2 > var && sse-var < 64)) { - // Check u and v to make sure skip is ok - int sse2= VP8_UVSSE(x); + /* Check u and v to make sure skip is ok */ + unsigned int sse2 = VP8_UVSSE(x); if (sse2 * 2 < threshold) { x->skip = 1; @@ -1805,17 +1816,15 @@ static int evaluate_inter_mode_rd(int mdcounts[4], } - //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts); // Experimental debug code - - // Add in the Mv/mode cost + /* Add in the Mv/mode cost */ rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts); - // Y cost and distortion + /* Y cost and distortion */ macro_block_yrd(x, &rd->rate_y, &distortion); rd->rate2 += rd->rate_y; rd->distortion2 += distortion; - // UV cost and distortion + /* UV cost and distortion */ rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv, cpi->common.full_pixel); rd->rate2 += rd->rate_uv; @@ -1832,9 +1841,11 @@ static int calculate_final_rd_costs(int this_rd, VP8_COMP *cpi, MACROBLOCK *x) { MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; - // Where skip is allowable add in the default per mb cost for the no skip case. - // where we then decide to skip we have to delete this and replace it with the - // cost of signallying a skip + + /* Where skip is allowable add in the default per mb cost for the no + * skip case. where we then decide to skip we have to delete this and + * replace it with the cost of signalling a skip + */ if (cpi->common.mb_no_coeff_skip) { *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0); @@ -1849,7 +1860,10 @@ static int calculate_final_rd_costs(int this_rd, if (!disable_skip) { - // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate + /* Test for the condition where skip block will be activated + * because there are no non zero coefficients and make any + * necessary adjustment for rate + */ if (cpi->common.mb_no_coeff_skip) { int i; @@ -1874,10 +1888,10 @@ static int calculate_final_rd_costs(int this_rd, if (tteob == 0) { rd->rate2 -= (rd->rate_y + rd->rate_uv); - //for best_yrd calculation + /* for best_yrd calculation */ rd->rate_uv = 0; - // Back out no skip flag costing and add in skip flag costing + /* Back out no skip flag costing and add in skip flag costing */ if (cpi->prob_skip_false) { int prob_skip_cost; @@ -1889,7 +1903,7 @@ static int calculate_final_rd_costs(int this_rd, } } } - // Calculate the final RD estimate for this mode + /* Calculate the final RD estimate for this mode */ this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2); if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) @@ -1953,7 +1967,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int_mv mvp; int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; int saddone=0; - int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7) + /* search range got from mv_pred(). It uses step_param levels. (0-7) */ + int sr=0; unsigned char *plane[4][3]; int ref_frame_map[4]; @@ -1962,6 +1977,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int intra_rd_penalty = 10* vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q); +#if CONFIG_TEMPORAL_DENOISING + unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX, + best_rd_sse = INT_MAX; +#endif + mode_mv = mode_mv_sb[sign_bias]; best_ref_mv.as_int = 0; best_mode.rd = INT_MAX; @@ -1994,7 +2014,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset); *returnintra = INT_MAX; - cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame + /* Count of the number of MBs tested so far this frame */ + x->mbs_tested_so_far++; x->skip = 0; @@ -2005,14 +2026,16 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int other_cost = 0; int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]]; - // Test best rd so far against threshold for trying this mode. - if (best_mode.rd <= cpi->rd_threshes[mode_index]) + /* Test best rd so far against threshold for trying this mode. */ + if (best_mode.rd <= x->rd_threshes[mode_index]) continue; if (this_ref_frame < 0) continue; - // These variables hold are rolling total cost and distortion for this mode + /* These variables hold are rolling total cost and distortion for + * this mode + */ rd.rate2 = 0; rd.distortion2 = 0; @@ -2021,9 +2044,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->e_mbd.mode_info_context->mbmi.mode = this_mode; x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; - // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, - // unless ARNR filtering is enabled in which case we want - // an unfiltered alternative + /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame, + * unless ARNR filtering is enabled in which case we want + * an unfiltered alternative + */ if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) @@ -2045,45 +2069,56 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, } } - // Check to see if the testing frequency for this mode is at its max - // If so then prevent it from being tested and increase the threshold for its testing - if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1)) + /* Check to see if the testing frequency for this mode is at its + * max If so then prevent it from being tested and increase the + * threshold for its testing + */ + if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1)) { - if (cpi->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index]) + if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index]) { - // Increase the threshold for coding this mode to make it less likely to be chosen - cpi->rd_thresh_mult[mode_index] += 4; + /* Increase the threshold for coding this mode to make it + * less likely to be chosen + */ + x->rd_thresh_mult[mode_index] += 4; - if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) - cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; + if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) + x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; + x->rd_threshes[mode_index] = + (cpi->rd_baseline_thresh[mode_index] >> 7) * + x->rd_thresh_mult[mode_index]; continue; } } - // We have now reached the point where we are going to test the current mode so increment the counter for the number of times it has been tested - cpi->mode_test_hit_counts[mode_index] ++; + /* We have now reached the point where we are going to test the + * current mode so increment the counter for the number of times + * it has been tested + */ + x->mode_test_hit_counts[mode_index] ++; - // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise - if (cpi->zbin_mode_boost_enabled) + /* Experimental code. Special case for gf and arf zeromv modes. + * Increase zbin size to supress noise + */ + if (x->zbin_mode_boost_enabled) { if ( this_ref_frame == INTRA_FRAME ) - cpi->zbin_mode_boost = 0; + x->zbin_mode_boost = 0; else { if (vp8_mode_order[mode_index] == ZEROMV) { if (this_ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; } else if (vp8_mode_order[mode_index] == SPLITMV) - cpi->zbin_mode_boost = 0; + x->zbin_mode_boost = 0; else - cpi->zbin_mode_boost = MV_ZBIN_BOOST; + x->zbin_mode_boost = MV_ZBIN_BOOST; } vp8_update_zbin_extra(cpi, x); @@ -2091,7 +2126,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, if(!uv_intra_done && this_ref_frame == INTRA_FRAME) { - rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, + rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion); uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode; @@ -2113,9 +2148,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, { int tmp_rd; - // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED]; + /* Note the rate value returned here includes the cost of + * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED] + */ int distortion; - tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rd.rate_y, &distortion, best_mode.yrd); + tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd); rd.rate2 += rate; rd.distortion2 += distortion; @@ -2140,8 +2177,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int this_rd_thresh; int distortion; - this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? cpi->rd_threshes[THR_NEW1] : cpi->rd_threshes[THR_NEW3]; - this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? cpi->rd_threshes[THR_NEW2] : this_rd_thresh; + this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? + x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3]; + this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? + x->rd_threshes[THR_NEW2] : this_rd_thresh; tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, @@ -2150,10 +2189,12 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, rd.rate2 += rate; rd.distortion2 += distortion; - // If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV + /* If even the 'Y' rd value of split is higher than best so far + * then dont bother looking at UV + */ if (tmp_rd < best_mode.yrd) { - // Now work out UV cost and add it in + /* Now work out UV cost and add it in */ rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel); rd.rate2 += rd.rate_uv; rd.distortion2 += rd.distortion_uv; @@ -2225,7 +2266,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, mvp_full.as_mv.col = mvp.as_mv.col>>3; mvp_full.as_mv.row = mvp.as_mv.row>>3; - // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. + /* Get intersection of UMV window and valid MV window to + * reduce # of checks in diamond search. + */ if (x->mv_col_min < col_min ) x->mv_col_min = col_min; if (x->mv_col_max > col_max ) @@ -2235,11 +2278,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, if (x->mv_row_max > row_max ) x->mv_row_max = row_max; - //adjust search range according to sr from mv prediction + /* adjust search range according to sr from mv prediction */ if(sr > step_param) step_param = sr; - // Initial step/diamond search + /* Initial step/diamond search */ { bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00, @@ -2247,7 +2290,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->mvcost, &best_ref_mv); mode_mv[NEWMV].as_int = d->bmi.mv.as_int; - // Further step/diamond searches as necessary + /* Further step/diamond searches as necessary */ n = 0; further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; @@ -2293,11 +2336,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, { int search_range; - //It seems not a good way to set search_range. Need further investigation. - //search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col)); search_range = 8; - //thissme = cpi->full_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); @@ -2330,24 +2370,31 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, mode_mv[NEWMV].as_int = d->bmi.mv.as_int; - // Add the new motion vector cost to our rolling cost variable + /* Add the new motion vector cost to our rolling cost variable */ rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); } case NEARESTMV: case NEARMV: - // Clip "next_nearest" so that it does not extend to far out of image + /* Clip "next_nearest" so that it does not extend to far out + * of image + */ vp8_clamp_mv2(&mode_mv[this_mode], xd); - // Do not bother proceeding if the vector (from newmv,nearest or near) is 0,0 as this should then be coded using the zeromv mode. + /* Do not bother proceeding if the vector (from newmv, nearest + * or near) is 0,0 as this should then be coded using the zeromv + * mode. + */ if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0)) continue; case ZEROMV: - // Trap vectors that reach beyond the UMV borders - // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point - // because of the lack of break statements in the previous two cases. + /* Trap vectors that reach beyond the UMV borders + * Note that ALL New MV, Nearest MV Near MV and Zero MV code + * drops through to this point because of the lack of break + * statements in the previous two cases. + */ if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) continue; @@ -2365,35 +2412,52 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, disable_skip, uv_intra_tteob, intra_rd_penalty, cpi, x); - // Keep record of best intra distortion + /* Keep record of best intra distortion */ if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) && (this_rd < best_mode.intra_rd) ) { best_mode.intra_rd = this_rd; *returnintra = rd.distortion2 ; } - #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { - // Store the best NEWMV in x for later use in the denoiser. - // We are restricted to the LAST_FRAME since the denoiser only keeps - // one filter state. - if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && - x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) - { - x->e_mbd.best_sse_inter_mode = NEWMV; - x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; - x->e_mbd.need_to_clamp_best_mvs = - x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; - } + unsigned int sse; + vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse, + mode_mv[this_mode]); + + if (sse < best_rd_sse) + best_rd_sse = sse; + + /* Store for later use by denoiser. */ + if (this_mode == ZEROMV && sse < zero_mv_sse ) + { + zero_mv_sse = sse; + x->best_zeromv_reference_frame = + x->e_mbd.mode_info_context->mbmi.ref_frame; + } + + /* Store the best NEWMV in x for later use in the denoiser. */ + if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && + sse < best_sse) + { + best_sse = sse; + vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse, + mode_mv[this_mode]); + x->best_sse_inter_mode = NEWMV; + x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; + x->need_to_clamp_best_mvs = + x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; + x->best_reference_frame = + x->e_mbd.mode_info_context->mbmi.ref_frame; + } } #endif - // Did this mode help.. i.i is it the new best mode + /* Did this mode help.. i.i is it the new best mode */ if (this_rd < best_mode.rd || x->skip) { - // Note index of best mode so far + /* Note index of best mode so far */ best_mode_index = mode_index; *returnrate = rd.rate2; *returndistortion = rd.distortion2; @@ -2406,95 +2470,103 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, update_best_mode(&best_mode, this_rd, &rd, other_cost, x); - // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time - cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; + /* Testing this mode gave rise to an improvement in best error + * score. Lower threshold a bit for next time + */ + x->rd_thresh_mult[mode_index] = + (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? + x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; } - // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around. + /* If the mode did not help improve the best error case then raise + * the threshold for testing that mode next time around. + */ else { - cpi->rd_thresh_mult[mode_index] += 4; - - if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) - cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; + x->rd_thresh_mult[mode_index] += 4; - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; + if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) + x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; } + x->rd_threshes[mode_index] = + (cpi->rd_baseline_thresh[mode_index] >> 7) * + x->rd_thresh_mult[mode_index]; if (x->skip) break; } - // Reduce the activation RD thresholds for the best choice mode + /* Reduce the activation RD thresholds for the best choice mode */ if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { - int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2); - - cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; - cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index]; - - // If we chose a split mode then reset the new MV thresholds as well - /*if ( vp8_mode_order[best_mode_index] == SPLITMV ) - { - best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWMV] >> 4); - cpi->rd_thresh_mult[THR_NEWMV] = (cpi->rd_thresh_mult[THR_NEWMV] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWMV]-best_adjustment: MIN_THRESHMULT; - cpi->rd_threshes[THR_NEWMV] = (cpi->rd_baseline_thresh[THR_NEWMV] >> 7) * cpi->rd_thresh_mult[THR_NEWMV]; - - best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWG] >> 4); - cpi->rd_thresh_mult[THR_NEWG] = (cpi->rd_thresh_mult[THR_NEWG] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWG]-best_adjustment: MIN_THRESHMULT; - cpi->rd_threshes[THR_NEWG] = (cpi->rd_baseline_thresh[THR_NEWG] >> 7) * cpi->rd_thresh_mult[THR_NEWG]; - - best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWA] >> 4); - cpi->rd_thresh_mult[THR_NEWA] = (cpi->rd_thresh_mult[THR_NEWA] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWA]-best_adjustment: MIN_THRESHMULT; - cpi->rd_threshes[THR_NEWA] = (cpi->rd_baseline_thresh[THR_NEWA] >> 7) * cpi->rd_thresh_mult[THR_NEWA]; - }*/ - + int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2); + + x->rd_thresh_mult[best_mode_index] = + (x->rd_thresh_mult[best_mode_index] >= + (MIN_THRESHMULT + best_adjustment)) ? + x->rd_thresh_mult[best_mode_index] - best_adjustment : + MIN_THRESHMULT; + x->rd_threshes[best_mode_index] = + (cpi->rd_baseline_thresh[best_mode_index] >> 7) * + x->rd_thresh_mult[best_mode_index]; } - // Note how often each mode chosen as best + /* Note how often each mode chosen as best */ cpi->mode_chosen_counts[best_mode_index] ++; #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { - if (x->e_mbd.best_sse_inter_mode == DC_PRED) { - // No best MV found. - x->e_mbd.best_sse_inter_mode = best_mode.mbmode.mode; - x->e_mbd.best_sse_mv = best_mode.mbmode.mv; - x->e_mbd.need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs; - } - - // TODO(holmer): No SSEs are calculated in rdopt.c. What else can be used? - vp8_denoiser_denoise_mb(&cpi->denoiser, x, 0, 0, - recon_yoffset, recon_uvoffset); - // Reevalute ZEROMV if the current mode is INTRA. - if (best_mode.mbmode.ref_frame == INTRA_FRAME) - { - int this_rd = INT_MAX; - int disable_skip = 0; - int other_cost = 0; - vpx_memset(&rd, 0, sizeof(rd)); - x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME; - rd.rate2 += x->ref_frame_cost[LAST_FRAME]; - rd.rate2 += vp8_cost_mv_ref(ZEROMV, mdcounts); - x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; - x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; - x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; - this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); - this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost, - disable_skip, uv_intra_tteob, - intra_rd_penalty, cpi, x); - if (this_rd < best_mode.rd || x->skip) + if (x->best_sse_inter_mode == DC_PRED) { - // Note index of best mode so far - best_mode_index = mode_index; - *returnrate = rd.rate2; - *returndistortion = rd.distortion2; - update_best_mode(&best_mode, this_rd, &rd, other_cost, x); + /* No best MV found. */ + x->best_sse_inter_mode = best_mode.mbmode.mode; + x->best_sse_mv = best_mode.mbmode.mv; + x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs; + x->best_reference_frame = best_mode.mbmode.ref_frame; + best_sse = best_rd_sse; + } + vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, + recon_yoffset, recon_uvoffset); + + + /* Reevaluate ZEROMV after denoising. */ + if (best_mode.mbmode.ref_frame == INTRA_FRAME && + x->best_zeromv_reference_frame != INTRA_FRAME) + { + int this_rd = INT_MAX; + int disable_skip = 0; + int other_cost = 0; + int this_ref_frame = x->best_zeromv_reference_frame; + rd.rate2 = x->ref_frame_cost[this_ref_frame] + + vp8_cost_mv_ref(ZEROMV, mdcounts); + rd.distortion2 = 0; + + /* set up the proper prediction buffers for the frame */ + x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; + x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; + x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; + x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; + + x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; + x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; + x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; + + this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); + this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost, + disable_skip, uv_intra_tteob, + intra_rd_penalty, cpi, x); + if (this_rd < best_mode.rd || x->skip) + { + /* Note index of best mode so far */ + best_mode_index = mode_index; + *returnrate = rd.rate2; + *returndistortion = rd.distortion2; + update_best_mode(&best_mode, this_rd, &rd, other_cost, x); + } } - } + } #endif @@ -2512,7 +2584,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, } - // macroblock modes + /* macroblock modes */ vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO)); if (best_mode.mbmode.mode == B_PRED) @@ -2539,7 +2611,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, rd_update_mvcount(cpi, x, &best_ref_mv); } -void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) +void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_) { int error4x4, error16x16; int rate4x4, rate16x16 = 0, rateuv; @@ -2551,15 +2623,13 @@ void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; - rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv); + rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv); rate = rateuv; - error16x16 = rd_pick_intra16x16mby_mode(cpi, x, - &rate16x16, &rate16x16_tokenonly, + error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly, &dist16x16); - error4x4 = rd_pick_intra4x4mby_modes(cpi, x, - &rate4x4, &rate4x4_tokenonly, + error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly, &dist4x4, error16x16); if (error4x4 < error16x16) diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h index db939f9..1e11fa7 100644 --- a/vp8/encoder/rdopt.h +++ b/vp8/encoder/rdopt.h @@ -65,9 +65,9 @@ static void insertsortsad(int arr[],int idx[], int len) } } -extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); +extern void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue); extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); -extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate); +extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate); static void get_plane_pointers(const YV12_BUFFER_CONFIG *fb, @@ -86,15 +86,15 @@ static void get_predictor_pointers(const VP8_COMP *cpi, unsigned int recon_yoffset, unsigned int recon_uvoffset) { - if (cpi->ref_frame_flags & VP8_LAST_FLAG) + if (cpi->ref_frame_flags & VP8_LAST_FRAME) get_plane_pointers(&cpi->common.yv12_fb[cpi->common.lst_fb_idx], plane[LAST_FRAME], recon_yoffset, recon_uvoffset); - if (cpi->ref_frame_flags & VP8_GOLD_FLAG) + if (cpi->ref_frame_flags & VP8_GOLD_FRAME) get_plane_pointers(&cpi->common.yv12_fb[cpi->common.gld_fb_idx], plane[GOLDEN_FRAME], recon_yoffset, recon_uvoffset); - if (cpi->ref_frame_flags & VP8_ALT_FLAG) + if (cpi->ref_frame_flags & VP8_ALTR_FRAME) get_plane_pointers(&cpi->common.yv12_fb[cpi->common.alt_fb_idx], plane[ALTREF_FRAME], recon_yoffset, recon_uvoffset); } @@ -106,11 +106,11 @@ static void get_reference_search_order(const VP8_COMP *cpi, int i=0; ref_frame_map[i++] = INTRA_FRAME; - if (cpi->ref_frame_flags & VP8_LAST_FLAG) + if (cpi->ref_frame_flags & VP8_LAST_FRAME) ref_frame_map[i++] = LAST_FRAME; - if (cpi->ref_frame_flags & VP8_GOLD_FLAG) + if (cpi->ref_frame_flags & VP8_GOLD_FRAME) ref_frame_map[i++] = GOLDEN_FRAME; - if (cpi->ref_frame_flags & VP8_ALT_FLAG) + if (cpi->ref_frame_flags & VP8_ALTR_FRAME) ref_frame_map[i++] = ALTREF_FRAME; for(; i<4; i++) ref_frame_map[i] = -1; diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c index fc0967d..37972e2 100644 --- a/vp8/encoder/segmentation.c +++ b/vp8/encoder/segmentation.c @@ -22,22 +22,24 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x) if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame)) { - // Reset Gf useage monitors + /* Reset Gf useage monitors */ vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; } else { - // for each macroblock row in image + /* for each macroblock row in image */ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - // for each macroblock col in image + /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - // If using golden then set GF active flag if not already set. - // If using last frame 0,0 mode then leave flag as it is - // else if using non 0,0 motion or intra modes then clear flag if it is currently set + /* If using golden then set GF active flag if not already set. + * If using last frame 0,0 mode then leave flag as it is + * else if using non 0,0 motion or intra modes then clear + * flag if it is currently set + */ if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME)) { if (*(x->gf_active_ptr) == 0) @@ -52,12 +54,12 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x) cpi->gf_active_count--; } - x->gf_active_ptr++; // Step onto next entry - this_mb_mode_info++; // skip to next mb + x->gf_active_ptr++; /* Step onto next entry */ + this_mb_mode_info++; /* skip to next mb */ } - // this is to account for the border + /* this is to account for the border */ this_mb_mode_info++; } } diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index 6c61b36..b83ae89 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -30,8 +30,8 @@ #include #include -#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering -#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering +#define ALT_REF_MC_ENABLED 1 /* dis/enable MC in AltRef filtering */ +#define ALT_REF_SUBPEL_ENABLED 1 /* dis/enable subpel in MC AltRef filtering */ #if VP8_TEMPORAL_ALT_REF @@ -50,7 +50,7 @@ static void vp8_temporal_filter_predictors_mb_c int offset; unsigned char *yptr, *uptr, *vptr; - // Y + /* Y */ yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3); if ((mv_row | mv_col) & 7) @@ -63,7 +63,7 @@ static void vp8_temporal_filter_predictors_mb_c vp8_copy_mem16x16(yptr, stride, &pred[0], 16); } - // U & V + /* U & V */ mv_row >>= 1; mv_col >>= 1; stride = (stride + 1) >> 1; @@ -109,9 +109,10 @@ void vp8_temporal_filter_apply_c int pixel_value = *frame2++; modifier = src_byte - pixel_value; - // This is an integer approximation of: - // float coeff = (3.0 * modifer * modifier) / pow(2, strength); - // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); + /* This is an integer approximation of: + * float coeff = (3.0 * modifer * modifier) / pow(2, strength); + * modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); + */ modifier *= modifier; modifier *= 3; modifier += 1 << (strength - 1); @@ -134,7 +135,6 @@ void vp8_temporal_filter_apply_c } #if ALT_REF_MC_ENABLED -static int dummy_cost[2*mv_max+1]; static int vp8_temporal_filter_find_matching_mb_c ( @@ -155,10 +155,7 @@ static int vp8_temporal_filter_find_matching_mb_c int_mv best_ref_mv1; int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ - int *mvcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; - int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; - - // Save input state + /* Save input state */ unsigned char **base_src = b->base_src; int src = b->src; int src_stride = b->src_stride; @@ -170,7 +167,7 @@ static int vp8_temporal_filter_find_matching_mb_c best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >>3; best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >>3; - // Setup frame pointers + /* Setup frame pointers */ b->base_src = &arf_frame->y_buffer; b->src_stride = arf_frame->y_stride; b->src = mb_offset; @@ -179,7 +176,7 @@ static int vp8_temporal_filter_find_matching_mb_c x->e_mbd.pre.y_stride = frame_ptr->y_stride; d->offset = mb_offset; - // Further step/diamond searches as necessary + /* Further step/diamond searches as necessary */ if (cpi->Speed < 8) { step_param = cpi->sf.first_step + (cpi->Speed > 5); @@ -189,29 +186,29 @@ static int vp8_temporal_filter_find_matching_mb_c step_param = cpi->sf.first_step + 2; } - /*cpi->sf.search_method == HEX*/ - // TODO Check that the 16x16 vf & sdf are selected here - bestsme = vp8_hex_search(x, b, d, - &best_ref_mv1_full, &d->bmi.mv, - step_param, - sadpb, - &cpi->fn_ptr[BLOCK_16X16], - mvsadcost, mvcost, &best_ref_mv1); + /* TODO Check that the 16x16 vf & sdf are selected here */ + /* Ignore mv costing by sending NULL cost arrays */ + bestsme = vp8_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.mv, + step_param, sadpb, + &cpi->fn_ptr[BLOCK_16X16], + NULL, NULL, &best_ref_mv1); #if ALT_REF_SUBPEL_ENABLED - // Try sub-pixel MC? - //if (bestsme > error_thresh && bestsme < INT_MAX) + /* Try sub-pixel MC? */ { int distortion; unsigned int sse; + /* Ignore mv costing by sending NULL cost array */ bestsme = cpi->find_fractional_mv_step(x, b, d, - &d->bmi.mv, &best_ref_mv1, - x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], - mvcost, &distortion, &sse); + &d->bmi.mv, + &best_ref_mv1, + x->errorperbit, + &cpi->fn_ptr[BLOCK_16X16], + NULL, &distortion, &sse); } #endif - // Save input state + /* Save input state */ b->base_src = base_src; b->src = src; b->src_stride = src_stride; @@ -246,7 +243,7 @@ static void vp8_temporal_filter_iterate_c unsigned char *dst1, *dst2; DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8); - // Save input state + /* Save input state */ unsigned char *y_buffer = mbd->pre.y_buffer; unsigned char *u_buffer = mbd->pre.u_buffer; unsigned char *v_buffer = mbd->pre.v_buffer; @@ -254,16 +251,17 @@ static void vp8_temporal_filter_iterate_c for (mb_row = 0; mb_row < mb_rows; mb_row++) { #if ALT_REF_MC_ENABLED - // Source frames are extended to 16 pixels. This is different than - // L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS) - // A 6 tap filter is used for motion search. This requires 2 pixels - // before and 3 pixels after. So the largest Y mv on a border would - // then be 16 - 3. The UV blocks are half the size of the Y and - // therefore only extended by 8. The largest mv that a UV block - // can support is 8 - 3. A UV mv is half of a Y mv. - // (16 - 3) >> 1 == 6 which is greater than 8 - 3. - // To keep the mv in play for both Y and UV planes the max that it - // can be on a border is therefore 16 - 5. + /* Source frames are extended to 16 pixels. This is different than + * L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS) + * A 6 tap filter is used for motion search. This requires 2 pixels + * before and 3 pixels after. So the largest Y mv on a border would + * then be 16 - 3. The UV blocks are half the size of the Y and + * therefore only extended by 8. The largest mv that a UV block + * can support is 8 - 3. A UV mv is half of a Y mv. + * (16 - 3) >> 1 == 6 which is greater than 8 - 3. + * To keep the mv in play for both Y and UV planes the max that it + * can be on a border is therefore 16 - 5. + */ cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5)); cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) + (16 - 5); @@ -285,36 +283,41 @@ static void vp8_temporal_filter_iterate_c for (frame = 0; frame < frame_count; frame++) { - int err = 0; - if (cpi->frames[frame] == NULL) continue; mbd->block[0].bmi.mv.as_mv.row = 0; mbd->block[0].bmi.mv.as_mv.col = 0; + if (frame == alt_ref_index) + { + filter_weight = 2; + } + else + { + int err = 0; #if ALT_REF_MC_ENABLED #define THRESH_LOW 10000 #define THRESH_HIGH 20000 - - // Find best match in this frame by MC - err = vp8_temporal_filter_find_matching_mb_c - (cpi, - cpi->frames[alt_ref_index], - cpi->frames[frame], - mb_y_offset, - THRESH_LOW); - + /* Find best match in this frame by MC */ + err = vp8_temporal_filter_find_matching_mb_c + (cpi, + cpi->frames[alt_ref_index], + cpi->frames[frame], + mb_y_offset, + THRESH_LOW); #endif - // Assign higher weight to matching MB if it's error - // score is lower. If not applying MC default behavior - // is to weight all MBs equal. - filter_weight = errframes[frame]->y_buffer + mb_y_offset, @@ -325,7 +328,7 @@ static void vp8_temporal_filter_iterate_c mbd->block[0].bmi.mv.as_mv.col, predictor); - // Apply the filter (YUV) + /* Apply the filter (YUV) */ vp8_temporal_filter_apply (f->y_buffer + mb_y_offset, f->y_stride, @@ -358,7 +361,7 @@ static void vp8_temporal_filter_iterate_c } } - // Normalize filter output to produce AltRef frame + /* Normalize filter output to produce AltRef frame */ dst1 = cpi->alt_ref_buffer.y_buffer; stride = cpi->alt_ref_buffer.y_stride; byte = mb_y_offset; @@ -372,7 +375,7 @@ static void vp8_temporal_filter_iterate_c dst1[byte] = (unsigned char)pval; - // move to next pixel + /* move to next pixel */ byte++; } @@ -389,19 +392,19 @@ static void vp8_temporal_filter_iterate_c { int m=k+64; - // U + /* U */ unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= cpi->fixed_divide[count[k]]; pval >>= 19; dst1[byte] = (unsigned char)pval; - // V + /* V */ pval = accumulator[m] + (count[m] >> 1); pval *= cpi->fixed_divide[count[m]]; pval >>= 19; dst2[byte] = (unsigned char)pval; - // move to next pixel + /* move to next pixel */ byte++; } @@ -416,7 +419,7 @@ static void vp8_temporal_filter_iterate_c mb_uv_offset += 8*(f->uv_stride-mb_cols); } - // Restore input state + /* Restore input state */ mbd->pre.y_buffer = y_buffer; mbd->pre.u_buffer = u_buffer; mbd->pre.v_buffer = v_buffer; @@ -450,8 +453,7 @@ void vp8_temporal_filter_prepare_c switch (blur_type) { case 1: - ///////////////////////////////////////// - // Backward Blur + /* Backward Blur */ frames_to_blur_backward = num_frames_backward; @@ -462,8 +464,7 @@ void vp8_temporal_filter_prepare_c break; case 2: - ///////////////////////////////////////// - // Forward Blur + /* Forward Blur */ frames_to_blur_forward = num_frames_forward; @@ -475,8 +476,7 @@ void vp8_temporal_filter_prepare_c case 3: default: - ///////////////////////////////////////// - // Center Blur + /* Center Blur */ frames_to_blur_forward = num_frames_forward; frames_to_blur_backward = num_frames_backward; @@ -486,7 +486,7 @@ void vp8_temporal_filter_prepare_c if (frames_to_blur_backward > frames_to_blur_forward) frames_to_blur_backward = frames_to_blur_forward; - // When max_frames is even we have 1 more frame backward than forward + /* When max_frames is even we have 1 more frame backward than forward */ if (frames_to_blur_forward > (max_frames - 1) / 2) frames_to_blur_forward = ((max_frames - 1) / 2); @@ -499,21 +499,7 @@ void vp8_temporal_filter_prepare_c start_frame = distance + frames_to_blur_forward; -#ifdef DEBUGFWG - // DEBUG FWG - printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d" - , max_frames - , num_frames_backward - , num_frames_forward - , frames_to_blur - , frames_to_blur_backward - , frames_to_blur_forward - , cpi->source_encode_index - , cpi->last_alt_ref_sei - , start_frame); -#endif - - // Setup frame pointers, NULL indicates frame not included in filter + /* Setup frame pointers, NULL indicates frame not included in filter */ vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *)); for (frame = 0; frame < frames_to_blur; frame++) { diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index ef41fa8..3b5268b 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -23,7 +23,7 @@ #ifdef ENTROPY_STATS _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; #endif -void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; +void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ; void vp8_fix_contexts(MACROBLOCKD *x); #include "dct_value_tokens.h" @@ -102,11 +102,12 @@ static void fill_value_tokens() static void tokenize2nd_order_b ( - MACROBLOCKD *x, + MACROBLOCK *x, TOKENEXTRA **tp, VP8_COMP *cpi ) { + MACROBLOCKD *xd = &x->e_mbd; int pt; /* near block/prev token context index */ int c; /* start at DC */ TOKENEXTRA *t = *tp;/* store tokens starting here */ @@ -117,11 +118,11 @@ static void tokenize2nd_order_b int band, rc, v, token; int eob; - b = x->block + 24; + b = xd->block + 24; qcoeff_ptr = b->qcoeff; - a = (ENTROPY_CONTEXT *)x->above_context + 8; - l = (ENTROPY_CONTEXT *)x->left_context + 8; - eob = x->eobs[24]; + a = (ENTROPY_CONTEXT *)xd->above_context + 8; + l = (ENTROPY_CONTEXT *)xd->left_context + 8; + eob = xd->eobs[24]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); if(!eob) @@ -131,7 +132,7 @@ static void tokenize2nd_order_b t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; t->skip_eob_node = 0; - ++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; + ++x->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; t++; *tp = t; *a = *l = 0; @@ -145,7 +146,7 @@ static void tokenize2nd_order_b t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; t->skip_eob_node = 0; - ++cpi->coef_counts [1] [0] [pt] [token]; + ++x->coef_counts [1] [0] [pt] [token]; pt = vp8_prev_token_class[token]; t++; c = 1; @@ -164,7 +165,7 @@ static void tokenize2nd_order_b t->skip_eob_node = ((pt == 0)); - ++cpi->coef_counts [1] [band] [pt] [token]; + ++x->coef_counts [1] [band] [pt] [token]; pt = vp8_prev_token_class[token]; t++; @@ -177,7 +178,7 @@ static void tokenize2nd_order_b t->skip_eob_node = 0; - ++cpi->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN]; + ++x->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN]; t++; } @@ -189,12 +190,13 @@ static void tokenize2nd_order_b static void tokenize1st_order_b ( - MACROBLOCKD *x, + MACROBLOCK *x, TOKENEXTRA **tp, int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ VP8_COMP *cpi ) { + MACROBLOCKD *xd = &x->e_mbd; unsigned int block; const BLOCKD *b; int pt; /* near block/prev token context index */ @@ -207,15 +209,15 @@ static void tokenize1st_order_b int band, rc, v; int tmp1, tmp2; - b = x->block; + b = xd->block; /* Luma */ for (block = 0; block < 16; block++, b++) { tmp1 = vp8_block2above[block]; tmp2 = vp8_block2left[block]; qcoeff_ptr = b->qcoeff; - a = (ENTROPY_CONTEXT *)x->above_context + tmp1; - l = (ENTROPY_CONTEXT *)x->left_context + tmp2; + a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; + l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); @@ -228,7 +230,7 @@ static void tokenize1st_order_b t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt]; t->skip_eob_node = 0; - ++cpi->coef_counts [type] [c] [pt] [DCT_EOB_TOKEN]; + ++x->coef_counts [type] [c] [pt] [DCT_EOB_TOKEN]; t++; *tp = t; *a = *l = 0; @@ -243,7 +245,7 @@ static void tokenize1st_order_b t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt]; t->skip_eob_node = 0; - ++cpi->coef_counts [type] [c] [pt] [token]; + ++x->coef_counts [type] [c] [pt] [token]; pt = vp8_prev_token_class[token]; t++; c++; @@ -261,7 +263,7 @@ static void tokenize1st_order_b t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; t->skip_eob_node = (pt == 0); - ++cpi->coef_counts [type] [band] [pt] [token]; + ++x->coef_counts [type] [band] [pt] [token]; pt = vp8_prev_token_class[token]; t++; @@ -273,7 +275,7 @@ static void tokenize1st_order_b t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; t->skip_eob_node = 0; - ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; + ++x->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; t++; } @@ -287,8 +289,8 @@ static void tokenize1st_order_b tmp1 = vp8_block2above[block]; tmp2 = vp8_block2left[block]; qcoeff_ptr = b->qcoeff; - a = (ENTROPY_CONTEXT *)x->above_context + tmp1; - l = (ENTROPY_CONTEXT *)x->left_context + tmp2; + a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; + l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); @@ -299,7 +301,7 @@ static void tokenize1st_order_b t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; t->skip_eob_node = 0; - ++cpi->coef_counts [2] [0] [pt] [DCT_EOB_TOKEN]; + ++x->coef_counts [2] [0] [pt] [DCT_EOB_TOKEN]; t++; *tp = t; *a = *l = 0; @@ -314,7 +316,7 @@ static void tokenize1st_order_b t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; t->skip_eob_node = 0; - ++cpi->coef_counts [2] [0] [pt] [token]; + ++x->coef_counts [2] [0] [pt] [token]; pt = vp8_prev_token_class[token]; t++; c = 1; @@ -333,7 +335,7 @@ static void tokenize1st_order_b t->skip_eob_node = (pt == 0); - ++cpi->coef_counts [2] [band] [pt] [token]; + ++x->coef_counts [2] [band] [pt] [token]; pt = vp8_prev_token_class[token]; t++; @@ -346,7 +348,7 @@ static void tokenize1st_order_b t->skip_eob_node = 0; - ++cpi->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN]; + ++x->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN]; t++; } @@ -374,16 +376,18 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) } -void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) +void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { + MACROBLOCKD *xd = &x->e_mbd; int plane_type; int has_y2_block; - has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED - && x->mode_info_context->mbmi.mode != SPLITMV); + has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED + && xd->mode_info_context->mbmi.mode != SPLITMV); - x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block); - if (x->mode_info_context->mbmi.mb_skip_coeff) + xd->mode_info_context->mbmi.mb_skip_coeff = + mb_is_skippable(xd, has_y2_block); + if (xd->mode_info_context->mbmi.mb_skip_coeff) { if (!cpi->common.mb_no_coeff_skip) { @@ -391,8 +395,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) } else { - vp8_fix_contexts(x); - cpi->skip_true_count++; + vp8_fix_contexts(xd); + x->skip_true_count++; } return; @@ -488,7 +492,8 @@ static void stuff2nd_order_b TOKENEXTRA **tp, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - VP8_COMP *cpi + VP8_COMP *cpi, + MACROBLOCK *x ) { int pt; /* near block/prev token context index */ @@ -498,13 +503,12 @@ static void stuff2nd_order_b t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; t->skip_eob_node = 0; - ++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; + ++x->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; ++t; *tp = t; pt = 0; *a = *l = pt; - } static void stuff1st_order_b @@ -513,7 +517,8 @@ static void stuff1st_order_b ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int type, - VP8_COMP *cpi + VP8_COMP *cpi, + MACROBLOCK *x ) { int pt; /* near block/prev token context index */ @@ -524,20 +529,21 @@ static void stuff1st_order_b t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; t->skip_eob_node = 0; - ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; + ++x->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; ++t; *tp = t; pt = 0; /* 0 <-> all coeff data is zero */ *a = *l = pt; - } + static void stuff1st_order_buv ( TOKENEXTRA **tp, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - VP8_COMP *cpi + VP8_COMP *cpi, + MACROBLOCK *x ) { int pt; /* near block/prev token context index */ @@ -547,38 +553,38 @@ void stuff1st_order_buv t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; t->skip_eob_node = 0; - ++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN]; + ++x->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN]; ++t; *tp = t; pt = 0; /* 0 <-> all coeff data is zero */ *a = *l = pt; - } -void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) +void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { - ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; - ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + MACROBLOCKD *xd = &x->e_mbd; + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context; int plane_type; int b; plane_type = 3; - if((x->mode_info_context->mbmi.mode != B_PRED - && x->mode_info_context->mbmi.mode != SPLITMV)) + if((xd->mode_info_context->mbmi.mode != B_PRED + && xd->mode_info_context->mbmi.mode != SPLITMV)) { stuff2nd_order_b(t, - A + vp8_block2above[24], L + vp8_block2left[24], cpi); + A + vp8_block2above[24], L + vp8_block2left[24], cpi, x); plane_type = 0; } for (b = 0; b < 16; b++) stuff1st_order_b(t, A + vp8_block2above[b], - L + vp8_block2left[b], plane_type, cpi); + L + vp8_block2left[b], plane_type, cpi, x); for (b = 16; b < 24; b++) stuff1st_order_buv(t, A + vp8_block2above[b], - L + vp8_block2left[b], cpi); + L + vp8_block2left[b], cpi, x); } void vp8_fix_contexts(MACROBLOCKD *x) diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm index f07b030..6f188cb 100644 --- a/vp8/encoder/x86/dct_mmx.asm +++ b/vp8/encoder/x86/dct_mmx.asm @@ -12,7 +12,7 @@ %include "vpx_ports/x86_abi_support.asm" ;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch) -global sym(vp8_short_fdct4x4_mmx) +global sym(vp8_short_fdct4x4_mmx) PRIVATE sym(vp8_short_fdct4x4_mmx): push rbp mov rbp, rsp diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm index 3d52a5d..d880ce0 100644 --- a/vp8/encoder/x86/dct_sse2.asm +++ b/vp8/encoder/x86/dct_sse2.asm @@ -61,7 +61,7 @@ %endmacro ;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch) -global sym(vp8_short_fdct4x4_sse2) +global sym(vp8_short_fdct4x4_sse2) PRIVATE sym(vp8_short_fdct4x4_sse2): STACK_FRAME_CREATE @@ -166,7 +166,7 @@ sym(vp8_short_fdct4x4_sse2): STACK_FRAME_DESTROY ;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch) -global sym(vp8_short_fdct8x4_sse2) +global sym(vp8_short_fdct8x4_sse2) PRIVATE sym(vp8_short_fdct8x4_sse2): STACK_FRAME_CREATE diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c new file mode 100644 index 0000000..c1ac6c1 --- /dev/null +++ b/vp8/encoder/x86/denoising_sse2.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp8/encoder/denoising.h" +#include "vp8/common/reconinter.h" +#include "vpx/vpx_integer.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_rtcd.h" + +#include + +union sum_union { + __m128i v; + signed char e[16]; +}; + +int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg, + YV12_BUFFER_CONFIG *running_avg, + MACROBLOCK *signal, unsigned int motion_magnitude, + int y_offset, int uv_offset) +{ + unsigned char *sig = signal->thismb; + int sig_stride = 16; + unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; + int mc_avg_y_stride = mc_running_avg->y_stride; + unsigned char *running_avg_y = running_avg->y_buffer + y_offset; + int avg_y_stride = running_avg->y_stride; + int r; + __m128i acc_diff = _mm_setzero_si128(); + const __m128i k_0 = _mm_setzero_si128(); + const __m128i k_4 = _mm_set1_epi8(4); + const __m128i k_8 = _mm_set1_epi8(8); + const __m128i k_16 = _mm_set1_epi8(16); + /* Modify each level's adjustment according to motion_magnitude. */ + const __m128i l3 = _mm_set1_epi8( + (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 : 6); + /* Difference between level 3 and level 2 is 2. */ + const __m128i l32 = _mm_set1_epi8(2); + /* Difference between level 2 and level 1 is 1. */ + const __m128i l21 = _mm_set1_epi8(1); + + for (r = 0; r < 16; ++r) + { + /* Calculate differences */ + const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0])); + const __m128i v_mc_running_avg_y = _mm_loadu_si128( + (__m128i *)(&mc_running_avg_y[0])); + __m128i v_running_avg_y; + const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); + const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); + /* Obtain the sign. FF if diff is negative. */ + const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); + /* Clamp absolute difference to 16 to be used to get mask. Doing this + * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */ + const __m128i clamped_absdiff = _mm_min_epu8( + _mm_or_si128(pdiff, ndiff), k_16); + /* Get masks for l2 l1 and l0 adjustments */ + const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff); + const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff); + const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff); + /* Get adjustments for l2, l1, and l0 */ + __m128i adj2 = _mm_and_si128(mask2, l32); + const __m128i adj1 = _mm_and_si128(mask1, l21); + const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff); + __m128i adj, padj, nadj; + + /* Combine the adjustments and get absolute adjustments. */ + adj2 = _mm_add_epi8(adj2, adj1); + adj = _mm_sub_epi8(l3, adj2); + adj = _mm_andnot_si128(mask0, adj); + adj = _mm_or_si128(adj, adj0); + + /* Restore the sign and get positive and negative adjustments. */ + padj = _mm_andnot_si128(diff_sign, adj); + nadj = _mm_and_si128(diff_sign, adj); + + /* Calculate filtered value. */ + v_running_avg_y = _mm_adds_epu8(v_sig, padj); + v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj); + _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y); + + /* Adjustments <=7, and each element in acc_diff can fit in signed + * char. + */ + acc_diff = _mm_adds_epi8(acc_diff, padj); + acc_diff = _mm_subs_epi8(acc_diff, nadj); + + /* Update pointers for next iteration. */ + sig += sig_stride; + mc_running_avg_y += mc_avg_y_stride; + running_avg_y += avg_y_stride; + } + + { + /* Compute the sum of all pixel differences of this MB. */ + union sum_union s; + int sum_diff = 0; + s.v = acc_diff; + sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] + s.e[4] + s.e[5] + + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11] + + s.e[12] + s.e[13] + s.e[14] + s.e[15]; + + if (abs(sum_diff) > SUM_DIFF_THRESHOLD) + { + return COPY_BLOCK; + } + } + + vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride, + signal->thismb, sig_stride); + return FILTER_BLOCK; +} diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm index 7ec7d60..fe26b18 100644 --- a/vp8/encoder/x86/encodeopt.asm +++ b/vp8/encoder/x86/encodeopt.asm @@ -12,7 +12,7 @@ %include "vpx_ports/x86_abi_support.asm" ;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr) -global sym(vp8_block_error_xmm) +global sym(vp8_block_error_xmm) PRIVATE sym(vp8_block_error_xmm): push rbp mov rbp, rsp @@ -60,7 +60,7 @@ sym(vp8_block_error_xmm): ret ;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr) -global sym(vp8_block_error_mmx) +global sym(vp8_block_error_mmx) PRIVATE sym(vp8_block_error_mmx): push rbp mov rbp, rsp @@ -126,7 +126,7 @@ sym(vp8_block_error_mmx): ;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -global sym(vp8_mbblock_error_mmx_impl) +global sym(vp8_mbblock_error_mmx_impl) PRIVATE sym(vp8_mbblock_error_mmx_impl): push rbp mov rbp, rsp @@ -203,7 +203,7 @@ sym(vp8_mbblock_error_mmx_impl): ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -global sym(vp8_mbblock_error_xmm_impl) +global sym(vp8_mbblock_error_xmm_impl) PRIVATE sym(vp8_mbblock_error_xmm_impl): push rbp mov rbp, rsp @@ -273,7 +273,7 @@ sym(vp8_mbblock_error_xmm_impl): ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -global sym(vp8_mbuverror_mmx_impl) +global sym(vp8_mbuverror_mmx_impl) PRIVATE sym(vp8_mbuverror_mmx_impl): push rbp mov rbp, rsp @@ -330,7 +330,7 @@ sym(vp8_mbuverror_mmx_impl): ;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); -global sym(vp8_mbuverror_xmm_impl) +global sym(vp8_mbuverror_xmm_impl) PRIVATE sym(vp8_mbuverror_xmm_impl): push rbp mov rbp, rsp diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm index 71efd56..f498927 100644 --- a/vp8/encoder/x86/fwalsh_sse2.asm +++ b/vp8/encoder/x86/fwalsh_sse2.asm @@ -12,7 +12,7 @@ %include "vpx_ports/x86_abi_support.asm" ;void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch) -global sym(vp8_short_walsh4x4_sse2) +global sym(vp8_short_walsh4x4_sse2) PRIVATE sym(vp8_short_walsh4x4_sse2): push rbp mov rbp, rsp diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm index f29a54e..2864ce1 100644 --- a/vp8/encoder/x86/quantize_mmx.asm +++ b/vp8/encoder/x86/quantize_mmx.asm @@ -15,7 +15,7 @@ ; short *qcoeff_ptr,short *dequant_ptr, ; short *scan_mask, short *round_ptr, ; short *quant_ptr, short *dqcoeff_ptr); -global sym(vp8_fast_quantize_b_impl_mmx) +global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE sym(vp8_fast_quantize_b_impl_mmx): push rbp mov rbp, rsp diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm index 7c249ff..724e54c 100644 --- a/vp8/encoder/x86/quantize_sse2.asm +++ b/vp8/encoder/x86/quantize_sse2.asm @@ -16,7 +16,7 @@ ; (BLOCK *b, | 0 ; BLOCKD *d) | 1 -global sym(vp8_regular_quantize_b_sse2) +global sym(vp8_regular_quantize_b_sse2) PRIVATE sym(vp8_regular_quantize_b_sse2): push rbp mov rbp, rsp @@ -240,7 +240,7 @@ ZIGZAG_LOOP 15 ; (BLOCK *b, | 0 ; BLOCKD *d) | 1 -global sym(vp8_fast_quantize_b_sse2) +global sym(vp8_fast_quantize_b_sse2) PRIVATE sym(vp8_fast_quantize_b_sse2): push rbp mov rbp, rsp diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm index 70eac0c..f0e5d40 100644 --- a/vp8/encoder/x86/quantize_sse4.asm +++ b/vp8/encoder/x86/quantize_sse4.asm @@ -16,7 +16,7 @@ ; (BLOCK *b, | 0 ; BLOCKD *d) | 1 -global sym(vp8_regular_quantize_b_sse4) +global sym(vp8_regular_quantize_b_sse4) PRIVATE sym(vp8_regular_quantize_b_sse4): %if ABI_IS_32BIT diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm index e698e90..dd526f4 100644 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ b/vp8/encoder/x86/quantize_ssse3.asm @@ -17,7 +17,7 @@ ; BLOCKD *d) | 1 ; -global sym(vp8_fast_quantize_b_ssse3) +global sym(vp8_fast_quantize_b_ssse3) PRIVATE sym(vp8_fast_quantize_b_ssse3): push rbp mov rbp, rsp diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm index c6db3d1..5964a85 100644 --- a/vp8/encoder/x86/ssim_opt.asm +++ b/vp8/encoder/x86/ssim_opt.asm @@ -61,7 +61,7 @@ ; or pavgb At this point this is just meant to be first pass for calculating ; all the parms needed for 16x16 ssim so we can play with dssim as distortion ; in mode selection code. -global sym(vp8_ssim_parms_16x16_sse2) +global sym(vp8_ssim_parms_16x16_sse2) PRIVATE sym(vp8_ssim_parms_16x16_sse2): push rbp mov rbp, rsp @@ -151,7 +151,7 @@ sym(vp8_ssim_parms_16x16_sse2): ; or pavgb At this point this is just meant to be first pass for calculating ; all the parms needed for 16x16 ssim so we can play with dssim as distortion ; in mode selection code. -global sym(vp8_ssim_parms_8x8_sse2) +global sym(vp8_ssim_parms_8x8_sse2) PRIVATE sym(vp8_ssim_parms_8x8_sse2): push rbp mov rbp, rsp diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm index 75e8aa3..794dd22 100644 --- a/vp8/encoder/x86/subtract_mmx.asm +++ b/vp8/encoder/x86/subtract_mmx.asm @@ -14,7 +14,7 @@ ;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride, ; short *diff, unsigned char *Predictor, ; int pitch); -global sym(vp8_subtract_b_mmx_impl) +global sym(vp8_subtract_b_mmx_impl) PRIVATE sym(vp8_subtract_b_mmx_impl): push rbp mov rbp, rsp @@ -75,7 +75,7 @@ sym(vp8_subtract_b_mmx_impl): ;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, ;unsigned char *pred, int pred_stride) -global sym(vp8_subtract_mby_mmx) +global sym(vp8_subtract_mby_mmx) PRIVATE sym(vp8_subtract_mby_mmx): push rbp mov rbp, rsp @@ -150,7 +150,7 @@ sym(vp8_subtract_mby_mmx): ; int src_stride, unsigned char *upred, ; unsigned char *vpred, int pred_stride) -global sym(vp8_subtract_mbuv_mmx) +global sym(vp8_subtract_mbuv_mmx) PRIVATE sym(vp8_subtract_mbuv_mmx): push rbp mov rbp, rsp diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm index 008e9c7..a5d17f5 100644 --- a/vp8/encoder/x86/subtract_sse2.asm +++ b/vp8/encoder/x86/subtract_sse2.asm @@ -14,7 +14,7 @@ ;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, ; short *diff, unsigned char *Predictor, ; int pitch); -global sym(vp8_subtract_b_sse2_impl) +global sym(vp8_subtract_b_sse2_impl) PRIVATE sym(vp8_subtract_b_sse2_impl): push rbp mov rbp, rsp @@ -73,7 +73,7 @@ sym(vp8_subtract_b_sse2_impl): ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, ;unsigned char *pred, int pred_stride) -global sym(vp8_subtract_mby_sse2) +global sym(vp8_subtract_mby_sse2) PRIVATE sym(vp8_subtract_mby_sse2): push rbp mov rbp, rsp @@ -146,7 +146,7 @@ sym(vp8_subtract_mby_sse2): ;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, ; int src_stride, unsigned char *upred, ; unsigned char *vpred, int pred_stride) -global sym(vp8_subtract_mbuv_sse2) +global sym(vp8_subtract_mbuv_sse2) PRIVATE sym(vp8_subtract_mbuv_sse2): push rbp mov rbp, rsp diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm index b97c694..ce9d983 100644 --- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm +++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm @@ -20,7 +20,7 @@ ; int filter_weight, | 5 ; unsigned int *accumulator, | 6 ; unsigned short *count) | 7 -global sym(vp8_temporal_filter_apply_sse2) +global sym(vp8_temporal_filter_apply_sse2) PRIVATE sym(vp8_temporal_filter_apply_sse2): push rbp diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 3a7b146..a328f46 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -30,7 +30,6 @@ VP8_COMMON_SRCS-yes += common/findnearmv.c VP8_COMMON_SRCS-yes += common/generic/systemdependent.c VP8_COMMON_SRCS-yes += common/idct_blk.c VP8_COMMON_SRCS-yes += common/idctllm.c -VP8_COMMON_SRCS-yes += common/idctllm_test.cc VP8_COMMON_SRCS-yes += common/alloccommon.h VP8_COMMON_SRCS-yes += common/blockd.h VP8_COMMON_SRCS-yes += common/common.h @@ -85,7 +84,6 @@ VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm -VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx_test.cc VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm @@ -122,6 +120,14 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2.asm endif # common (c) +VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idctllm_dspr2.c +VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/filter_dspr2.c +VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/loopfilter_filters_dspr2.c +VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/reconinter_dspr2.c +VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idct_blk_dspr2.c +VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/dequantize_dspr2.c + +# common (c) VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 5fb74c4..eeac3a8 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -9,6 +9,7 @@ */ +#include "vpx_rtcd.h" #include "vpx/vpx_codec.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" @@ -22,7 +23,6 @@ struct vp8_extracfg { struct vpx_codec_pkt_list *pkt_list; - vp8e_encoding_mode encoding_mode; /** best, good, realtime */ int cpu_used; /** available cpu percentage in 1/16*/ unsigned int enable_auto_alt_ref; /** if encoder decides to uses alternate reference frame */ unsigned int noise_sensitivity; @@ -51,10 +51,8 @@ static const struct extraconfig_map extracfg_map[] = { NULL, #if !(CONFIG_REALTIME_ONLY) - VP8_BEST_QUALITY_ENCODING, /* Encoding Mode */ 0, /* cpu_used */ #else - VP8_REAL_TIME_ENCODING, /* Encoding Mode */ 4, /* cpu_used */ #endif 0, /* enable_auto_alt_ref */ @@ -88,7 +86,8 @@ struct vpx_codec_alg_priv vpx_image_t preview_img; unsigned int next_frame_flag; vp8_postproc_cfg_t preview_ppcfg; - vpx_codec_pkt_list_decl(64) pkt_list; // changed to accomendate the maximum number of lagged frames allowed + /* pkt_list size depends on the maximum number of lagged frames allowed. */ + vpx_codec_pkt_list_decl(64) pkt_list; unsigned int fixed_kf_cntr; }; @@ -146,25 +145,39 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer); RANGE_CHECK_HI(cfg, g_threads, 64); -#if !(CONFIG_REALTIME_ONLY) - RANGE_CHECK_HI(cfg, g_lag_in_frames, 25); -#else +#if CONFIG_REALTIME_ONLY RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); +#elif CONFIG_MULTI_RES_ENCODING + if (ctx->base.enc.total_encoders > 1) + RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); +#else + RANGE_CHECK_HI(cfg, g_lag_in_frames, 25); #endif RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ); RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000); RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000); RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); - //RANGE_CHECK_BOOL(cfg, g_delete_firstpassfile); - RANGE_CHECK_BOOL(cfg, rc_resize_allowed); + +/* TODO: add spatial re-sampling support and frame dropping in + * multi-res-encoder.*/ +#if CONFIG_MULTI_RES_ENCODING + if (ctx->base.enc.total_encoders > 1) + RANGE_CHECK_HI(cfg, rc_resize_allowed, 0); +#else + RANGE_CHECK_BOOL(cfg, rc_resize_allowed); +#endif RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100); RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100); RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100); -#if !(CONFIG_REALTIME_ONLY) - RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); -#else + +#if CONFIG_REALTIME_ONLY RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS); +#elif CONFIG_MULTI_RES_ENCODING + if (ctx->base.enc.total_encoders > 1) + RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS); +#else + RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); #endif /* VP8 does not support a lower bound on the keyframe interval in @@ -177,11 +190,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_BOOL(vp8_cfg, enable_auto_alt_ref); RANGE_CHECK(vp8_cfg, cpu_used, -16, 16); -#if !(CONFIG_REALTIME_ONLY) - RANGE_CHECK(vp8_cfg, encoding_mode, VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING); -#else - RANGE_CHECK(vp8_cfg, encoding_mode, VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING); -#endif #if CONFIG_REALTIME_ONLY && !CONFIG_TEMPORAL_DENOISING RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 0); @@ -189,7 +197,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6); #endif - RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION); + RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, + VP8_EIGHT_TOKENPARTITION); RANGE_CHECK_HI(vp8_cfg, Sharpness, 7); RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15); RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); @@ -203,7 +212,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, if (cfg->g_pass == VPX_RC_LAST_PASS) { size_t packet_sz = sizeof(FIRSTPASS_STATS); - int n_packets = cfg->rc_twopass_stats_in.sz / packet_sz; + int n_packets = (int)(cfg->rc_twopass_stats_in.sz / + packet_sz); FIRSTPASS_STATS *stats; if (!cfg->rc_twopass_stats_in.buf) @@ -227,7 +237,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, if (cfg->ts_number_layers > 1) { - int i; + unsigned int i; RANGE_CHECK_HI(cfg, ts_periodicity, 16); for (i=1; its_number_layers; i++) @@ -299,7 +309,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, break; } - if (cfg.g_pass == VPX_RC_FIRST_PASS) + if (cfg.g_pass == VPX_RC_FIRST_PASS || cfg.g_pass == VPX_RC_ONE_PASS) { oxcf->allow_lag = 0; oxcf->lag_in_frames = 0; @@ -355,7 +365,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, oxcf->auto_key = cfg.kf_mode == VPX_KF_AUTO && cfg.kf_min_dist != cfg.kf_max_dist; - //oxcf->kf_min_dist = cfg.kf_min_dis; oxcf->key_freq = cfg.kf_max_dist; oxcf->number_of_layers = cfg.ts_number_layers; @@ -385,9 +394,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, } #endif - //oxcf->delete_first_pass_file = cfg.g_delete_firstpassfile; - //strcpy(oxcf->first_pass_file, cfg.g_firstpass_file); - oxcf->cpu_used = vp8_cfg.cpu_used; oxcf->encode_breakout = vp8_cfg.static_thresh; oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref; @@ -447,7 +453,7 @@ static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx, vpx_codec_err_t res; if (((cfg->g_w != ctx->cfg.g_w) || (cfg->g_h != ctx->cfg.g_h)) - && cfg->g_lag_in_frames > 1) + && (cfg->g_lag_in_frames > 1 || cfg->g_pass != VPX_RC_ONE_PASS)) ERROR("Cannot change width or height after initialization"); /* Prevent increasing lag_in_frames. This check is stricter than it needs @@ -542,19 +548,27 @@ static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg, vpx_codec_err_t res = 0; #if CONFIG_MULTI_RES_ENCODING + LOWER_RES_FRAME_INFO *shared_mem_loc; int mb_rows = ((cfg->g_w + 15) >>4); int mb_cols = ((cfg->g_h + 15) >>4); - *mem_loc = calloc(mb_rows*mb_cols, sizeof(LOWER_RES_INFO)); - if(!(*mem_loc)) + shared_mem_loc = calloc(1, sizeof(LOWER_RES_FRAME_INFO)); + if(!shared_mem_loc) + { + res = VPX_CODEC_MEM_ERROR; + } + + shared_mem_loc->mb_info = calloc(mb_rows*mb_cols, sizeof(LOWER_RES_MB_INFO)); + if(!(shared_mem_loc->mb_info)) { - free(*mem_loc); res = VPX_CODEC_MEM_ERROR; } else + { + *mem_loc = (void *)shared_mem_loc; res = VPX_CODEC_OK; + } #endif - return res; } @@ -568,6 +582,8 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, struct VP8_COMP *optr; + vpx_rtcd(); + if (!ctx->priv) { priv = calloc(1, sizeof(struct vpx_codec_alg_priv)); @@ -616,15 +632,15 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, return VPX_CODEC_MEM_ERROR; } + if(mr_cfg) + ctx->priv->enc.total_encoders = mr_cfg->mr_total_resolutions; + else + ctx->priv->enc.total_encoders = 1; + res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0); if (!res) { - if(mr_cfg) - ctx->priv->enc.total_encoders = mr_cfg->mr_total_resolutions; - else - ctx->priv->enc.total_encoders = 1; - set_vp8e_config(&ctx->priv->alg_priv->oxcf, ctx->priv->alg_priv->cfg, ctx->priv->alg_priv->vp8_cfg, @@ -647,7 +663,11 @@ static vpx_codec_err_t vp8e_destroy(vpx_codec_alg_priv_t *ctx) #if CONFIG_MULTI_RES_ENCODING /* Free multi-encoder shared memory */ if (ctx->oxcf.mr_total_resolutions > 0 && (ctx->oxcf.mr_encoder_id == ctx->oxcf.mr_total_resolutions-1)) + { + LOWER_RES_FRAME_INFO *shared_mem_loc = (LOWER_RES_FRAME_INFO *)ctx->oxcf.mr_low_res_mode_info; + free(shared_mem_loc->mb_info); free(ctx->oxcf.mr_low_res_mode_info); + } #endif free(ctx->cx_data); @@ -673,7 +693,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, yv12->uv_stride = img->stride[VPX_PLANE_U]; yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; - yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); //REG_YUV = 0 + yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); return res; } @@ -733,6 +753,9 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, if (!ctx->cfg.rc_target_bitrate) return res; + if (!ctx->cfg.rc_target_bitrate) + return res; + if (img) res = validate_img(ctx, img); @@ -756,13 +779,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, int ref = 7; if (flags & VP8_EFLAG_NO_REF_LAST) - ref ^= VP8_LAST_FLAG; + ref ^= VP8_LAST_FRAME; if (flags & VP8_EFLAG_NO_REF_GF) - ref ^= VP8_GOLD_FLAG; + ref ^= VP8_GOLD_FRAME; if (flags & VP8_EFLAG_NO_REF_ARF) - ref ^= VP8_ALT_FLAG; + ref ^= VP8_ALTR_FRAME; vp8_use_as_reference(ctx->cpi, ref); } @@ -774,13 +797,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, int upd = 7; if (flags & VP8_EFLAG_NO_UPD_LAST) - upd ^= VP8_LAST_FLAG; + upd ^= VP8_LAST_FRAME; if (flags & VP8_EFLAG_NO_UPD_GF) - upd ^= VP8_GOLD_FLAG; + upd ^= VP8_GOLD_FRAME; if (flags & VP8_EFLAG_NO_UPD_ARF) - upd ^= VP8_ALT_FLAG; + upd ^= VP8_ALTR_FRAME; vp8_update_reference(ctx->cpi, upd); } @@ -869,15 +892,16 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, VP8_COMP *cpi = (VP8_COMP *)ctx->cpi; /* Add the frame packet to the list of returned packets. */ - round = 1000000 * ctx->cfg.g_timebase.num / 2 - 1; + round = (vpx_codec_pts_t)1000000 + * ctx->cfg.g_timebase.num / 2 - 1; delta = (dst_end_time_stamp - dst_time_stamp); pkt.kind = VPX_CODEC_CX_FRAME_PKT; pkt.data.frame.pts = (dst_time_stamp * ctx->cfg.g_timebase.den + round) / ctx->cfg.g_timebase.num / 10000000; - pkt.data.frame.duration = - (delta * ctx->cfg.g_timebase.den + round) - / ctx->cfg.g_timebase.num / 10000000; + pkt.data.frame.duration = (unsigned long) + ((delta * ctx->cfg.g_timebase.den + round) + / ctx->cfg.g_timebase.num / 10000000); pkt.data.frame.flags = lib_flags << 16; if (lib_flags & FRAMEFLAGS_KEY) @@ -887,10 +911,11 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, { pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE; - // This timestamp should be as close as possible to the - // prior PTS so that if a decoder uses pts to schedule when - // to do this, we start right after last frame was decoded. - // Invisible frames have no duration. + /* This timestamp should be as close as possible to the + * prior PTS so that if a decoder uses pts to schedule when + * to do this, we start right after last frame was decoded. + * Invisible frames have no duration. + */ pkt.data.frame.pts = ((cpi->last_time_stamp_seen * ctx->cfg.g_timebase.den + round) / ctx->cfg.g_timebase.num / 10000000) + 1; @@ -942,8 +967,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, cx_data += size; cx_data_sz -= size; } - - //printf("timestamp: %lld, duration: %d\n", pkt->data.frame.pts, pkt->data.frame.duration); } } } diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c index 37773db..c13d697 100644 --- a/vp8/vp8_dx_iface.c +++ b/vp8/vp8_dx_iface.c @@ -11,12 +11,19 @@ #include #include +#include "vpx_rtcd.h" #include "vpx/vpx_decoder.h" #include "vpx/vp8dx.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" #include "common/onyxd.h" #include "decoder/onyxd_int.h" +#include "common/alloccommon.h" +#include "vpx_mem/vpx_mem.h" +#if CONFIG_ERROR_CONCEALMENT +#include "decoder/error_concealment.h" +#endif +#include "decoder/decoderthreading.h" #define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) #define VP8_CAP_ERROR_CONCEALMENT (CONFIG_ERROR_CONCEALMENT ? \ @@ -69,7 +76,7 @@ struct vpx_codec_alg_priv #endif vpx_image_t img; int img_setup; - int img_avail; + void *user_priv; }; static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t flags) @@ -187,6 +194,8 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, vpx_codec_err_t res = VPX_CODEC_OK; (void) data; + vpx_rtcd(); + /* This function only allocates space for the vpx_codec_alg_priv_t * structure. More memory may be required at the time the stream * information becomes known. @@ -341,16 +350,30 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, long deadline) { vpx_codec_err_t res = VPX_CODEC_OK; - - ctx->img_avail = 0; + unsigned int resolution_change = 0; + unsigned int w, h; /* Determine the stream parameters. Note that we rely on peek_si to * validate that we have a buffer that does not wrap around the top * of the heap. */ - if (!ctx->si.h) - res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si); + w = ctx->si.w; + h = ctx->si.h; + + res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si); + + if((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf) + { + /* the peek function returns an error for non keyframes, however for + * this case, it is not an error */ + res = VPX_CODEC_OK; + } + + if(!ctx->decoder_init && !ctx->si.is_kf) + res = VPX_CODEC_UNSUP_BITSTREAM; + if ((ctx->si.h != h) || (ctx->si.w != w)) + resolution_change = 1; /* Perform deferred allocations, if required */ if (!res && ctx->defer_alloc) @@ -426,6 +449,122 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, if (!res && ctx->pbi) { + if(resolution_change) + { + VP8D_COMP *pbi = ctx->pbi; + VP8_COMMON *const pc = & pbi->common; + MACROBLOCKD *const xd = & pbi->mb; +#if CONFIG_MULTITHREAD + int i; +#endif + pc->Width = ctx->si.w; + pc->Height = ctx->si.h; + { + int prev_mb_rows = pc->mb_rows; + + if (setjmp(pbi->common.error.jmp)) + { + pbi->common.error.setjmp = 0; + /* same return value as used in vp8dx_receive_compressed_data */ + return -1; + } + + pbi->common.error.setjmp = 1; + + if (pc->Width <= 0) + { + pc->Width = w; + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid frame width"); + } + + if (pc->Height <= 0) + { + pc->Height = h; + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid frame height"); + } + + if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffers"); + + xd->pre = pc->yv12_fb[pc->lst_fb_idx]; + xd->dst = pc->yv12_fb[pc->new_fb_idx]; + +#if CONFIG_MULTITHREAD + for (i = 0; i < pbi->allocated_decoding_thread_count; i++) + { + pbi->mb_row_di[i].mbd.dst = pc->yv12_fb[pc->new_fb_idx]; + vp8_build_block_doffsets(&pbi->mb_row_di[i].mbd); + } +#endif + vp8_build_block_doffsets(&pbi->mb); + + /* allocate memory for last frame MODE_INFO array */ +#if CONFIG_ERROR_CONCEALMENT + + if (pbi->ec_enabled) + { + /* old prev_mip was released by vp8_de_alloc_frame_buffers() + * called in vp8_alloc_frame_buffers() */ + pc->prev_mip = vpx_calloc( + (pc->mb_cols + 1) * (pc->mb_rows + 1), + sizeof(MODE_INFO)); + + if (!pc->prev_mip) + { + vp8_de_alloc_frame_buffers(pc); + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate" + "last frame MODE_INFO array"); + } + + pc->prev_mi = pc->prev_mip + pc->mode_info_stride + 1; + + if (vp8_alloc_overlap_lists(pbi)) + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate overlap lists " + "for error concealment"); + } + +#endif + +#if CONFIG_MULTITHREAD + if (pbi->b_multithreaded_rd) + vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); +#else + (void)prev_mb_rows; +#endif + } + + pbi->common.error.setjmp = 0; + + /* required to get past the first get_free_fb() call */ + ctx->pbi->common.fb_idx_ref_cnt[0] = 0; + } + + ctx->user_priv = user_priv; + if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) + { + VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; + res = update_error_state(ctx, &pbi->common.error); + } + } + + return res; +} + +static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, + vpx_codec_iter_t *iter) +{ + vpx_image_t *img = NULL; + + /* iter acts as a flip flop, so an image is only returned on the first + * call to get_frame. + */ + if (!(*iter)) + { YV12_BUFFER_CONFIG sd; int64_t time_stamp = 0, time_end_stamp = 0; vp8_ppflags_t flags = {0}; @@ -451,34 +590,10 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, #endif } - if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) - { - VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; - res = update_error_state(ctx, &pbi->common.error); - } - - if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) + if (0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) { - yuvconfig2image(&ctx->img, &sd, user_priv); - ctx->img_avail = 1; - } - } + yuvconfig2image(&ctx->img, &sd, ctx->user_priv); - return res; -} - -static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter) -{ - vpx_image_t *img = NULL; - - if (ctx->img_avail) - { - /* iter acts as a flip flop, so an image is only returned on the first - * call to get_frame. - */ - if (!(*iter)) - { img = &ctx->img; *iter = img; } diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index 019edbd..0ae2f10 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -22,16 +22,9 @@ ifeq ($(ARCH_ARM),yes) include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx_arm.mk endif -VP8_CX_SRCS-yes += vp8_cx_iface.c +VP8_CX_SRCS-yes += vp8cx.mk -# encoder -#INCLUDES += algo/vpx_common/vpx_mem/include -#INCLUDES += common -#INCLUDES += common -#INCLUDES += common -#INCLUDES += algo/vpx_ref/cpu_id/include -#INCLUDES += common -#INCLUDES += encoder +VP8_CX_SRCS-yes += vp8_cx_iface.c VP8_CX_SRCS-yes += encoder/asm_enc_offsets.c VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h @@ -99,6 +92,14 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm + +ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c +ifeq ($(HAVE_SSE2),yes) +vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2 +endif +endif + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk index b16615d..b030ee5 100644 --- a/vp8/vp8cx_arm.mk +++ b/vp8/vp8cx_arm.mk @@ -9,7 +9,7 @@ ## -#VP8_CX_SRCS list is modified according to different platforms. +VP8_CX_SRCS-$(ARCH_ARM) += vp8cx_arm.mk #File list for arm # encoder diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk index 2cfd280..dd39190 100644 --- a/vp8/vp8dx.mk +++ b/vp8/vp8dx.mk @@ -18,6 +18,8 @@ VP8_DX_SRCS-no += $(VP8_COMMON_SRCS-no) VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes) VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no) +VP8_DX_SRCS-yes += vp8dx.mk + VP8_DX_SRCS-yes += vp8_dx_iface.c # common diff --git a/vp8_multi_resolution_encoder.c b/vp8_multi_resolution_encoder.c index 78f50c2..eae36a4 100644 --- a/vp8_multi_resolution_encoder.c +++ b/vp8_multi_resolution_encoder.c @@ -164,7 +164,7 @@ static void write_ivf_file_header(FILE *outfile, mem_put_le32(header+24, frame_cnt); /* length */ mem_put_le32(header+28, 0); /* unused */ - if(fwrite(header, 1, 32, outfile)); + (void) fwrite(header, 1, 32, outfile); } static void write_ivf_frame_header(FILE *outfile, @@ -181,7 +181,7 @@ static void write_ivf_frame_header(FILE *outfile, mem_put_le32(header+4, pts&0xFFFFFFFF); mem_put_le32(header+8, pts >> 32); - if(fwrite(header, 1, 12, outfile)); + (void) fwrite(header, 1, 12, outfile); } int main(int argc, char **argv) @@ -273,7 +273,7 @@ int main(int argc, char **argv) cfg[0].g_w = width; cfg[0].g_h = height; cfg[0].g_threads = 1; /* number of threads used */ - cfg[0].rc_dropframe_thresh = 0; + cfg[0].rc_dropframe_thresh = 30; cfg[0].rc_end_usage = VPX_CBR; cfg[0].rc_resize_allowed = 0; cfg[0].rc_min_quantizer = 4; @@ -283,13 +283,17 @@ int main(int argc, char **argv) cfg[0].rc_buf_initial_sz = 500; cfg[0].rc_buf_optimal_sz = 600; cfg[0].rc_buf_sz = 1000; - //cfg[0].rc_dropframe_thresh = 10; cfg[0].g_error_resilient = 1; /* Enable error resilient mode */ cfg[0].g_lag_in_frames = 0; /* Disable automatic keyframe placement */ + /* Note: These 3 settings are copied to all levels. But, except the lowest + * resolution level, all other levels are set to VPX_KF_DISABLED internally. + */ //cfg[0].kf_mode = VPX_KF_DISABLED; - cfg[0].kf_min_dist = cfg[0].kf_max_dist = 1000; + cfg[0].kf_mode = VPX_KF_AUTO; + cfg[0].kf_min_dist = 3000; + cfg[0].kf_max_dist = 3000; cfg[0].rc_target_bitrate = target_bitrate[0]; /* Set target bitrate */ cfg[0].g_timebase.num = 1; /* Set fps */ @@ -361,6 +365,12 @@ int main(int argc, char **argv) if(vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, static_thresh)) die_codec(&codec[i], "Failed to set static threshold"); } + /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */ + for ( i=0; i< NUM_ENCODERS; i++) + { + if(vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0)) + die_codec(&codec[i], "Failed to set noise_sensitivity"); + } frame_avail = 1; got_data = 0; @@ -405,8 +415,8 @@ int main(int argc, char **argv) switch(pkt[i]->kind) { case VPX_CODEC_CX_FRAME_PKT: write_ivf_frame_header(outfile[i], pkt[i]); - if(fwrite(pkt[i]->data.frame.buf, 1, pkt[i]->data.frame.sz, - outfile[i])); + (void) fwrite(pkt[i]->data.frame.buf, 1, + pkt[i]->data.frame.sz, outfile[i]); break; case VPX_CODEC_PSNR_PKT: if (show_psnr) diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c index 4311b1a..06270fe 100644 --- a/vp8_scalable_patterns.c +++ b/vp8_scalable_patterns.c @@ -93,7 +93,7 @@ static void write_ivf_file_header(FILE *outfile, mem_put_le32(header+24, frame_cnt); /* length */ mem_put_le32(header+28, 0); /* unused */ - if(fwrite(header, 1, 32, outfile)); + (void) fwrite(header, 1, 32, outfile); } @@ -111,10 +111,10 @@ static void write_ivf_frame_header(FILE *outfile, mem_put_le32(header+4, pts&0xFFFFFFFF); mem_put_le32(header+8, pts >> 32); - if(fwrite(header, 1, 12, outfile)); + (void) fwrite(header, 1, 12, outfile); } -static int mode_to_num_layers[9] = {2, 2, 3, 3, 3, 3, 5, 2, 3}; +static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3}; int main(int argc, char **argv) { FILE *infile, *outfile[VPX_TS_MAX_LAYERS]; @@ -129,8 +129,8 @@ int main(int argc, char **argv) { int got_data; int flags = 0; int i; - int pts = 0; // PTS starts at 0 - int frame_duration = 1; // 1 timebase tick per frame + int pts = 0; /* PTS starts at 0 */ + int frame_duration = 1; /* 1 timebase tick per frame */ int layering_mode = 0; int frames_in_layer[VPX_TS_MAX_LAYERS] = {0}; @@ -138,7 +138,7 @@ int main(int argc, char **argv) { int flag_periodicity; int max_intra_size_pct; - // Check usage and arguments + /* Check usage and arguments */ if (argc < 9) die("Usage: %s " " ... \n", argv[0]); @@ -150,43 +150,43 @@ int main(int argc, char **argv) { if (!sscanf(argv[7], "%d", &layering_mode)) die ("Invalid mode %s", argv[7]); - if (layering_mode<0 || layering_mode>8) - die ("Invalid mode (0..8) %s", argv[7]); + if (layering_mode<0 || layering_mode>11) + die ("Invalid mode (0..11) %s", argv[7]); if (argc != 8+mode_to_num_layers[layering_mode]) die ("Invalid number of arguments"); - if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1)) + if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 32)) die ("Failed to allocate image", width, height); printf("Using %s\n",vpx_codec_iface_name(interface)); - // Populate encoder configuration + /* Populate encoder configuration */ res = vpx_codec_enc_config_default(interface, &cfg, 0); if(res) { printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); return EXIT_FAILURE; } - // Update the default configuration with our settings + /* Update the default configuration with our settings */ cfg.g_w = width; cfg.g_h = height; - // Timebase format e.g. 30fps: numerator=1, demoninator=30 + /* Timebase format e.g. 30fps: numerator=1, demoninator=30 */ if (!sscanf (argv[5], "%d", &cfg.g_timebase.num )) die ("Invalid timebase numerator %s", argv[5]); if (!sscanf (argv[6], "%d", &cfg.g_timebase.den )) die ("Invalid timebase denominator %s", argv[6]); for (i=8; i<8+mode_to_num_layers[layering_mode]; i++) - if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8])) + if (!sscanf(argv[i], "%ud", &cfg.ts_target_bitrate[i-8])) die ("Invalid data rate %s", argv[i]); - // Real time parameters - cfg.rc_dropframe_thresh = 0; // 30 + /* Real time parameters */ + cfg.rc_dropframe_thresh = 0; cfg.rc_end_usage = VPX_CBR; cfg.rc_resize_allowed = 0; - cfg.rc_min_quantizer = 8; + cfg.rc_min_quantizer = 2; cfg.rc_max_quantizer = 56; cfg.rc_undershoot_pct = 100; cfg.rc_overshoot_pct = 15; @@ -194,25 +194,44 @@ int main(int argc, char **argv) { cfg.rc_buf_optimal_sz = 600; cfg.rc_buf_sz = 1000; - // Enable error resilient mode + /* Enable error resilient mode */ cfg.g_error_resilient = 1; cfg.g_lag_in_frames = 0; cfg.kf_mode = VPX_KF_DISABLED; - // Disable automatic keyframe placement - cfg.kf_min_dist = cfg.kf_max_dist = 1000; + /* Disable automatic keyframe placement */ + cfg.kf_min_dist = cfg.kf_max_dist = 3000; - // Temporal scaling parameters: - // NOTE: The 3 prediction frames cannot be used interchangeably due to - // differences in the way they are handled throughout the code. The - // frames should be allocated to layers in the order LAST, GF, ARF. - // Other combinations work, but may produce slightly inferior results. + /* Default setting for bitrate: used in special case of 1 layer (case 0). */ + cfg.rc_target_bitrate = cfg.ts_target_bitrate[0]; + + /* Temporal scaling parameters: */ + /* NOTE: The 3 prediction frames cannot be used interchangeably due to + * differences in the way they are handled throughout the code. The + * frames should be allocated to layers in the order LAST, GF, ARF. + * Other combinations work, but may produce slightly inferior results. + */ switch (layering_mode) { - case 0: { - // 2-layers, 2-frame period + /* 1-layer */ + int ids[1] = {0}; + cfg.ts_number_layers = 1; + cfg.ts_periodicity = 1; + cfg.ts_rate_decimator[0] = 1; + memcpy(cfg.ts_layer_id, ids, sizeof(ids)); + + flag_periodicity = cfg.ts_periodicity; + + // Update L only. + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + break; + } + case 1: + { + /* 2-layers, 2-frame period */ int ids[2] = {0,1}; cfg.ts_number_layers = 2; cfg.ts_periodicity = 2; @@ -222,14 +241,14 @@ int main(int argc, char **argv) { flag_periodicity = cfg.ts_periodicity; #if 1 - // 0=L, 1=GF, Intra-layer prediction enabled + /* 0=L, 1=GF, Intra-layer prediction enabled */ layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF; #else - // 0=L, 1=GF, Intra-layer prediction disabled + /* 0=L, 1=GF, Intra-layer prediction disabled */ layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; @@ -239,9 +258,9 @@ int main(int argc, char **argv) { break; } - case 1: + case 2: { - // 2-layers, 3-frame period + /* 2-layers, 3-frame period */ int ids[3] = {0,1,1}; cfg.ts_number_layers = 2; cfg.ts_periodicity = 3; @@ -251,7 +270,7 @@ int main(int argc, char **argv) { flag_periodicity = cfg.ts_periodicity; - // 0=L, 1=GF, Intra-layer prediction enabled + /* 0=L, 1=GF, Intra-layer prediction enabled */ layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; @@ -262,9 +281,9 @@ int main(int argc, char **argv) { break; } - case 2: + case 3: { - // 3-layers, 6-frame period + /* 3-layers, 6-frame period */ int ids[6] = {0,2,2,1,2,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 6; @@ -275,7 +294,7 @@ int main(int argc, char **argv) { flag_periodicity = cfg.ts_periodicity; - // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled + /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */ layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; @@ -288,9 +307,9 @@ int main(int argc, char **argv) { break; } - case 3: + case 4: { - // 3-layers, 4-frame period + /* 3-layers, 4-frame period */ int ids[4] = {0,2,1,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 4; @@ -301,7 +320,7 @@ int main(int argc, char **argv) { flag_periodicity = cfg.ts_periodicity; - // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled + /* 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled */ layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; @@ -315,9 +334,9 @@ int main(int argc, char **argv) { break; } - case 4: + case 5: { - // 3-layers, 4-frame period + /* 3-layers, 4-frame period */ int ids[4] = {0,2,1,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 4; @@ -328,8 +347,9 @@ int main(int argc, char **argv) { flag_periodicity = cfg.ts_periodicity; - // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, - // disabled in layer 2 + /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, + * disabled in layer 2 + */ layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; @@ -342,9 +362,9 @@ int main(int argc, char **argv) { break; } - case 5: + case 6: { - // 3-layers, 4-frame period + /* 3-layers, 4-frame period */ int ids[4] = {0,2,1,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 4; @@ -355,7 +375,7 @@ int main(int argc, char **argv) { flag_periodicity = cfg.ts_periodicity; - // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled + /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */ layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; @@ -366,11 +386,11 @@ int main(int argc, char **argv) { break; } - case 6: + case 7: { - // NOTE: Probably of academic interest only + /* NOTE: Probably of academic interest only */ - // 5-layers, 16-frame period + /* 5-layers, 16-frame period */ int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4}; cfg.ts_number_layers = 5; cfg.ts_periodicity = 16; @@ -405,9 +425,9 @@ int main(int argc, char **argv) { break; } - case 7: + case 8: { - // 2-layers + /* 2-layers, with sync point at first frame of layer 1. */ int ids[2] = {0,1}; cfg.ts_number_layers = 2; cfg.ts_periodicity = 2; @@ -417,30 +437,49 @@ int main(int argc, char **argv) { flag_periodicity = 8; - // 0=L, 1=GF + /* 0=L, 1=GF */ + // ARF is used as predictor for all frames, and is only updated on + // key frame. Sync point every 8 frames. + + // Layer 0: predict from L and ARF, update L and G. layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; - layer_flags[2] = - layer_flags[4] = - layer_flags[6] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - layer_flags[3] = - layer_flags[5] = VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; - layer_flags[7] = VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_UPD_ARF; + + // Layer 1: sync point: predict from L and ARF, and update G. + layer_flags[1] = VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ARF; + + // Layer 0, predict from L and ARF, update L. + layer_flags[2] = VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + + // Layer 1: predict from L, G and ARF, and update G. + layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; + + // Layer 0 + layer_flags[4] = layer_flags[2]; + + // Layer 1 + layer_flags[5] = layer_flags[3]; + + // Layer 0 + layer_flags[6] = layer_flags[4]; + + // Layer 1 + layer_flags[7] = layer_flags[5]; break; } - case 8: - default: + case 9: { - // 3-layers + /* 3-layers */ + // Sync points for layer 1 and 2 every 8 frames. + int ids[4] = {0,2,1,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 4; @@ -451,7 +490,7 @@ int main(int argc, char **argv) { flag_periodicity = 8; - // 0=L, 1=GF, 2=ARF + /* 0=L, 1=GF, 2=ARF */ layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; @@ -470,13 +509,109 @@ int main(int argc, char **argv) { VP8_EFLAG_NO_UPD_ENTROPY; break; } + case 10: + { + // 3-layers structure where ARF is used as predictor for all frames, + // and is only updated on key frame. + // Sync points for layer 1 and 2 every 8 frames. + + int ids[4] = {0,2,1,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 4; + cfg.ts_rate_decimator[0] = 4; + cfg.ts_rate_decimator[1] = 2; + cfg.ts_rate_decimator[2] = 1; + memcpy(cfg.ts_layer_id, ids, sizeof(ids)); + + flag_periodicity = 8; + + /* 0=L, 1=GF, 2=ARF */ + + // Layer 0: predict from L and ARF; update L and G. + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF; + + // Layer 2: sync point: predict from L and ARF; update none. + layer_flags[1] = VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + + // Layer 1: sync point: predict from L and ARF; update G. + layer_flags[2] = VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; + + // Layer 2: predict from L, G, ARF; update none. + layer_flags[3] = VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + + // Layer 0: predict from L and ARF; update L. + layer_flags[4] = VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF; + + // Layer 2: predict from L, G, ARF; update none. + layer_flags[5] = layer_flags[3]; + + // Layer 1: predict from L, G, ARF; update G. + layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; + + // Layer 2: predict from L, G, ARF; update none. + layer_flags[7] = layer_flags[3]; + break; + } + case 11: + default: + { + // 3-layers structure as in case 10, but no sync/refresh points for + // layer 1 and 2. + + int ids[4] = {0,2,1,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 4; + cfg.ts_rate_decimator[0] = 4; + cfg.ts_rate_decimator[1] = 2; + cfg.ts_rate_decimator[2] = 1; + memcpy(cfg.ts_layer_id, ids, sizeof(ids)); + + flag_periodicity = 8; + + /* 0=L, 1=GF, 2=ARF */ + + // Layer 0: predict from L and ARF; update L. + layer_flags[0] = VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF; + layer_flags[4] = layer_flags[0]; + + // Layer 1: predict from L, G, ARF; update G. + layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; + layer_flags[6] = layer_flags[2]; + + // Layer 2: predict from L, G, ARF; update none. + layer_flags[1] = VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + layer_flags[3] = layer_flags[1]; + layer_flags[5] = layer_flags[1]; + layer_flags[7] = layer_flags[1]; + break; + } } - // Open input file + /* Open input file */ if(!(infile = fopen(argv[1], "rb"))) die("Failed to open %s for reading", argv[1]); - // Open an output file for each stream + /* Open an output file for each stream */ for (i=0; i(_tokenPartitions)); frame_avail = 1; while (frame_avail || got_data) { @@ -517,8 +651,8 @@ int main(int argc, char **argv) { 1, flags, VPX_DL_REALTIME)) die_codec(&codec, "Failed to encode frame"); - // Reset KF flag - if (layering_mode != 6) + /* Reset KF flag */ + if (layering_mode != 7) layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; got_data = 0; @@ -530,29 +664,25 @@ int main(int argc, char **argv) { idata.frame.buf, 1, pkt->data.frame.sz, - outfile[i])); + (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, + outfile[i]); frames_in_layer[i]++; } break; default: break; } - printf (pkt->kind == VPX_CODEC_CX_FRAME_PKT - && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"."); - fflush (stdout); } frame_cnt++; pts += frame_duration; } - printf ("\n"); fclose (infile); printf ("Processed %d frames.\n",frame_cnt-1); if (vpx_codec_destroy(&codec)) die_codec (&codec, "Failed to destroy codec"); - // Try to rewrite the output file headers with the actual frame count + /* Try to rewrite the output file headers with the actual frame count */ for (i=0; inum; mr_cfg.mr_down_sampling_factor.den = dsf->den; + /* Force Key-frame synchronization. Namely, encoder at higher + * resolution always use the same frame_type chosen by the + * lowest-resolution encoder. + */ + if(mr_cfg.mr_encoder_id) + cfg->kf_mode = VPX_KF_DISABLED; + ctx->iface = iface; ctx->name = iface->name; ctx->priv = NULL; @@ -126,8 +133,20 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx, if (res) { - ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL; + const char *error_detail = + ctx->priv ? ctx->priv->err_detail : NULL; + /* Destroy current ctx */ + ctx->err_detail = error_detail; vpx_codec_destroy(ctx); + + /* Destroy already allocated high-level ctx */ + while (i) + { + ctx--; + ctx->err_detail = error_detail; + vpx_codec_destroy(ctx); + i--; + } } if (ctx->priv) diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index 0af631c..a3c95d2 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -204,8 +204,8 @@ typedef struct vpx_roi_map unsigned char *roi_map; /**< specify an id between 0 and 3 for each 16x16 region within a frame */ unsigned int rows; /**< number of rows */ unsigned int cols; /**< number of cols */ - int delta_q[4]; /**< quantizer delta [-64, 64] off baseline for regions with id between 0 and 3*/ - int delta_lf[4]; /**< loop filter strength delta [-32, 32] for regions with id between 0 and 3 */ + int delta_q[4]; /**< quantizer delta [-63, 63] off baseline for regions with id between 0 and 3*/ + int delta_lf[4]; /**< loop filter strength delta [-63, 63] for regions with id between 0 and 3 */ unsigned int static_threshold[4];/**< threshold for region to be treated as static */ } vpx_roi_map_t; @@ -234,18 +234,6 @@ typedef struct vpx_scaling_mode VPX_SCALING_MODE v_scaling_mode; /**< vertical scaling mode */ } vpx_scaling_mode_t; -/*!\brief VP8 encoding mode - * - * This defines VP8 encoding mode - * - */ -typedef enum -{ - VP8_BEST_QUALITY_ENCODING, - VP8_GOOD_QUALITY_ENCODING, - VP8_REAL_TIME_ENCODING -} vp8e_encoding_mode; - /*!\brief VP8 token partition mode * * This defines VP8 partitioning mode for compressed data, i.e., the number of @@ -298,12 +286,12 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int) VPX_CTRL_USE_TYPE(VP8E_SET_NOISE_SENSITIVITY, unsigned int) VPX_CTRL_USE_TYPE(VP8E_SET_SHARPNESS, unsigned int) VPX_CTRL_USE_TYPE(VP8E_SET_STATIC_THRESHOLD, unsigned int) -VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, vp8e_token_partitions) +VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, int) /* vp8e_token_partitions */ VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int) VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH , unsigned int) VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE , unsigned int) -VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, vp8e_tuning) +VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */ VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL , unsigned int) VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *) diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h index d92e165..243b7a5 100644 --- a/vpx/vpx_codec.h +++ b/vpx/vpx_codec.h @@ -49,15 +49,22 @@ extern "C" { #ifndef DEPRECATED #if defined(__GNUC__) && __GNUC__ #define DEPRECATED __attribute__ ((deprecated)) -#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ #elif defined(_MSC_VER) #define DEPRECATED -#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */ #else #define DEPRECATED -#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ #endif +#endif /* DEPRECATED */ + +#ifndef DECLSPEC_DEPRECATED +#if defined(__GNUC__) && __GNUC__ +#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ +#elif defined(_MSC_VER) +#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */ +#else +#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ #endif +#endif /* DECLSPEC_DEPRECATED */ /*!\brief Decorator indicating a function is potentially unused */ #ifdef UNUSED diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk index 427fd0f..ffa123f 100644 --- a/vpx/vpx_codec.mk +++ b/vpx/vpx_codec.mk @@ -11,6 +11,21 @@ API_EXPORTS += exports +API_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h +API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h +API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h +API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h + +API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h +API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h +API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8.h +API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h + +API_DOC_SRCS-yes += vpx_codec.h +API_DOC_SRCS-yes += vpx_decoder.h +API_DOC_SRCS-yes += vpx_encoder.h +API_DOC_SRCS-yes += vpx_image.h + API_SRCS-yes += src/vpx_decoder.c API_SRCS-yes += vpx_decoder.h API_SRCS-yes += src/vpx_encoder.c @@ -23,3 +38,4 @@ API_SRCS-yes += vpx_codec.mk API_SRCS-yes += vpx_codec_impl_bottom.h API_SRCS-yes += vpx_codec_impl_top.h API_SRCS-yes += vpx_image.h +API_SRCS-$(BUILD_LIBVPX) += vpx_integer.h diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h index 7992cc4..1ccf1c5 100644 --- a/vpx/vpx_decoder.h +++ b/vpx/vpx_decoder.h @@ -113,6 +113,10 @@ extern "C" { * function directly, to ensure that the ABI version number parameter * is properly initialized. * + * If the library was configured with --disable-multithread, this call + * is not thread safe and should be guarded with a lock if being used + * in a multithreaded context. + * * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags * parameter), the storage pointed to by the cfg parameter must be * kept readable and stable until all memory maps have been set. diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index 239036e..67d9033 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -655,6 +655,10 @@ extern "C" { * function directly, to ensure that the ABI version number parameter * is properly initialized. * + * If the library was configured with --disable-multithread, this call + * is not thread safe and should be guarded with a lock if being used + * in a multithreaded context. + * * In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags * parameter), the storage pointed to by the cfg parameter must be * kept readable and stable until all memory maps have been set. diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c index ebe428d..8ff95a1 100644 --- a/vpx_ports/arm_cpudetect.c +++ b/vpx_ports/arm_cpudetect.c @@ -32,8 +32,33 @@ static int arm_cpu_env_mask(void) return env && *env ? (int)strtol(env, NULL, 0) : ~0; } +#if !CONFIG_RUNTIME_CPU_DETECT -#if defined(_MSC_VER) +int arm_cpu_caps(void) +{ + /* This function should actually be a no-op. There is no way to adjust any of + * these because the RTCD tables do not exist: the functions are called + * statically */ + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) + { + return flags; + } + mask = arm_cpu_env_mask(); +#if HAVE_EDSP + flags |= HAS_EDSP; +#endif /* HAVE_EDSP */ +#if HAVE_MEDIA + flags |= HAS_MEDIA; +#endif /* HAVE_MEDIA */ +#if HAVE_NEON + flags |= HAS_NEON; +#endif /* HAVE_NEON */ + return flags & mask; +} + +#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ #define WIN32_LEAN_AND_MEAN #define WIN32_EXTRA_LEAN @@ -52,7 +77,7 @@ int arm_cpu_caps(void) * instructions via their assembled hex code. * All of these instructions should be essentially nops. */ -#if defined(HAVE_EDSP) +#if HAVE_EDSP if (mask & HAS_EDSP) { __try @@ -66,7 +91,7 @@ int arm_cpu_caps(void) /*Ignore exception.*/ } } -#if defined(HAVE_MEDIA) +#if HAVE_MEDIA if (mask & HAS_MEDIA) __try { @@ -79,7 +104,7 @@ int arm_cpu_caps(void) /*Ignore exception.*/ } } -#if defined(HAVE_NEON) +#if HAVE_NEON if (mask & HAS_NEON) { __try @@ -93,14 +118,13 @@ int arm_cpu_caps(void) /*Ignore exception.*/ } } -#endif -#endif -#endif +#endif /* HAVE_NEON */ +#endif /* HAVE_MEDIA */ +#endif /* HAVE_EDSP */ return flags & mask; } -#elif defined(__linux__) -#if defined(__ANDROID__) +#elif defined(__ANDROID__) /* end _MSC_VER */ #include int arm_cpu_caps(void) @@ -115,19 +139,20 @@ int arm_cpu_caps(void) mask = arm_cpu_env_mask(); features = android_getCpuFeatures(); -#if defined(HAVE_EDSP) +#if HAVE_EDSP flags |= HAS_EDSP; -#endif -#if defined(HAVE_MEDIA) +#endif /* HAVE_EDSP */ +#if HAVE_MEDIA flags |= HAS_MEDIA; -#endif -#if defined(HAVE_NEON) +#endif /* HAVE_MEDIA */ +#if HAVE_NEON if (features & ANDROID_CPU_ARM_FEATURE_NEON) flags |= HAS_NEON; -#endif +#endif /* HAVE_NEON */ return flags & mask; } -#else // !defined(__ANDROID__) + +#elif defined(__linux__) /* end __ANDROID__ */ #include int arm_cpu_caps(void) @@ -153,27 +178,27 @@ int arm_cpu_caps(void) char buf[512]; while (fgets(buf, 511, fin) != NULL) { -#if defined(HAVE_EDSP) || defined(HAVE_NEON) +#if HAVE_EDSP || HAVE_NEON if (memcmp(buf, "Features", 8) == 0) { char *p; -#if defined(HAVE_EDSP) +#if HAVE_EDSP p=strstr(buf, " edsp"); if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { flags |= HAS_EDSP; } -#if defined(HAVE_NEON) +#if HAVE_NEON p = strstr(buf, " neon"); if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { flags |= HAS_NEON; } -#endif -#endif +#endif /* HAVE_NEON */ +#endif /* HAVE_EDSP */ } -#endif -#if defined(HAVE_MEDIA) +#endif /* HAVE_EDSP || HAVE_NEON */ +#if HAVE_MEDIA if (memcmp(buf, "CPU architecture:",17) == 0){ int version; version = atoi(buf+17); @@ -182,37 +207,13 @@ int arm_cpu_caps(void) flags |= HAS_MEDIA; } } -#endif +#endif /* HAVE_MEDIA */ } fclose(fin); } return flags & mask; } -#endif // defined(__linux__) -#elif !CONFIG_RUNTIME_CPU_DETECT - -int arm_cpu_caps(void) -{ - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) - { - return flags; - } - mask = arm_cpu_env_mask(); -#if defined(HAVE_EDSP) - flags |= HAS_EDSP; -#endif -#if defined(HAVE_MEDIA) - flags |= HAS_MEDIA; -#endif -#if defined(HAVE_NEON) - flags |= HAS_NEON; -#endif - return flags & mask; -} - -#else +#else /* end __linux__ */ #error "--enable-runtime-cpu-detect selected, but no CPU detection method " \ - "available for your platform. Reconfigure without --enable-runtime-cpu-detect." + "available for your platform. Reconfigure with --disable-runtime-cpu-detect." #endif diff --git a/vpx_ports/asm_offsets.h b/vpx_ports/asm_offsets.h index d3b4fc7..7b6ae4a 100644 --- a/vpx_ports/asm_offsets.h +++ b/vpx_ports/asm_offsets.h @@ -19,11 +19,11 @@ static void assert_##name(void) {switch(0){case 0:case !!(cond):;}} #if INLINE_ASM -#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val)); +#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val)) #define BEGIN int main(void) { #define END return 0; } #else -#define DEFINE(sym, val) int sym = val; +#define DEFINE(sym, val) const int sym = val #define BEGIN #define END #endif diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm index 306e235..efad1a5 100644 --- a/vpx_ports/emms.asm +++ b/vpx_ports/emms.asm @@ -12,14 +12,14 @@ %include "vpx_ports/x86_abi_support.asm" section .text - global sym(vpx_reset_mmx_state) +global sym(vpx_reset_mmx_state) PRIVATE sym(vpx_reset_mmx_state): emms ret %ifidn __OUTPUT_FORMAT__,x64 -global sym(vpx_winx64_fldcw) +global sym(vpx_winx64_fldcw) PRIVATE sym(vpx_winx64_fldcw): sub rsp, 8 mov [rsp], rcx ; win x64 specific @@ -28,7 +28,7 @@ sym(vpx_winx64_fldcw): ret -global sym(vpx_winx64_fstcw) +global sym(vpx_winx64_fstcw) PRIVATE sym(vpx_winx64_fstcw): sub rsp, 8 fstcw [rsp] diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h index 0e52368..dec28d5 100644 --- a/vpx_ports/mem_ops.h +++ b/vpx_ports/mem_ops.h @@ -145,27 +145,27 @@ static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) #undef mem_get_sbe16 #define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16) -mem_get_s_generic(be, 16); +mem_get_s_generic(be, 16) #undef mem_get_sbe24 #define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24) -mem_get_s_generic(be, 24); +mem_get_s_generic(be, 24) #undef mem_get_sbe32 #define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32) -mem_get_s_generic(be, 32); +mem_get_s_generic(be, 32) #undef mem_get_sle16 #define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16) -mem_get_s_generic(le, 16); +mem_get_s_generic(le, 16) #undef mem_get_sle24 #define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24) -mem_get_s_generic(le, 24); +mem_get_s_generic(le, 24) #undef mem_get_sle32 #define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32) -mem_get_s_generic(le, 32); +mem_get_s_generic(le, 32) #undef mem_put_be16 #define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16) diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h index 0fbba65..fca653a 100644 --- a/vpx_ports/mem_ops_aligned.h +++ b/vpx_ports/mem_ops_aligned.h @@ -99,51 +99,51 @@ #undef mem_get_be16_aligned #define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned) -mem_get_be_aligned_generic(16); +mem_get_be_aligned_generic(16) #undef mem_get_be32_aligned #define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned) -mem_get_be_aligned_generic(32); +mem_get_be_aligned_generic(32) #undef mem_get_le16_aligned #define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned) -mem_get_le_aligned_generic(16); +mem_get_le_aligned_generic(16) #undef mem_get_le32_aligned #define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned) -mem_get_le_aligned_generic(32); +mem_get_le_aligned_generic(32) #undef mem_get_sbe16_aligned #define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned) -mem_get_sbe_aligned_generic(16); +mem_get_sbe_aligned_generic(16) #undef mem_get_sbe32_aligned #define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned) -mem_get_sbe_aligned_generic(32); +mem_get_sbe_aligned_generic(32) #undef mem_get_sle16_aligned #define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned) -mem_get_sle_aligned_generic(16); +mem_get_sle_aligned_generic(16) #undef mem_get_sle32_aligned #define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned) -mem_get_sle_aligned_generic(32); +mem_get_sle_aligned_generic(32) #undef mem_put_be16_aligned #define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned) -mem_put_be_aligned_generic(16); +mem_put_be_aligned_generic(16) #undef mem_put_be32_aligned #define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned) -mem_put_be_aligned_generic(32); +mem_put_be_aligned_generic(32) #undef mem_put_le16_aligned #define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned) -mem_put_le_aligned_generic(16); +mem_put_le_aligned_generic(16) #undef mem_put_le32_aligned #define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned) -mem_put_le_aligned_generic(32); +mem_put_le_aligned_generic(32) #undef mem_get_ne_aligned_generic #undef mem_get_se_aligned_generic diff --git a/vpx_ports/vpx_ports.mk b/vpx_ports/vpx_ports.mk new file mode 100644 index 0000000..e6cb52f --- /dev/null +++ b/vpx_ports/vpx_ports.mk @@ -0,0 +1,26 @@ +## +## Copyright (c) 2012 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## + + +PORTS_SRCS-yes += vpx_ports.mk + +PORTS_SRCS-$(BUILD_LIBVPX) += asm_offsets.h +PORTS_SRCS-$(BUILD_LIBVPX) += mem.h +PORTS_SRCS-$(BUILD_LIBVPX) += vpx_timer.h + +ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) +PORTS_SRCS-$(BUILD_LIBVPX) += emms.asm +PORTS_SRCS-$(BUILD_LIBVPX) += x86.h +PORTS_SRCS-$(BUILD_LIBVPX) += x86_abi_support.asm +PORTS_SRCS-$(BUILD_LIBVPX) += x86_cpuid.c +endif + +PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c +PORTS_SRCS-$(ARCH_ARM) += arm.h diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h index 1341c7f..9dd8c4b 100644 --- a/vpx_ports/x86.h +++ b/vpx_ports/x86.h @@ -162,7 +162,7 @@ x86_readtsc(void) return tsc; #else #if ARCH_X86_64 - return __rdtsc(); + return (unsigned int)__rdtsc(); #else __asm rdtsc; #endif diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm index cef6a0b..0c9fe37 100644 --- a/vpx_ports/x86_abi_support.asm +++ b/vpx_ports/x86_abi_support.asm @@ -88,12 +88,41 @@ %define sym(x) x %elifidn __OUTPUT_FORMAT__,elf64 %define sym(x) x +%elifidn __OUTPUT_FORMAT__,elfx32 +%define sym(x) x %elifidn __OUTPUT_FORMAT__,x64 %define sym(x) x %else %define sym(x) _ %+ x %endif +; PRIVATE +; Macro for the attribute to hide a global symbol for the target ABI. +; This is only active if CHROMIUM is defined. +; +; Chromium doesn't like exported global symbols due to symbol clashing with +; plugins among other things. +; +; Requires Chromium's patched copy of yasm: +; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 +; http://www.tortall.net/projects/yasm/ticket/236 +; +%ifdef CHROMIUM + %ifidn __OUTPUT_FORMAT__,elf32 + %define PRIVATE :hidden + %elifidn __OUTPUT_FORMAT__,elf64 + %define PRIVATE :hidden + %elifidn __OUTPUT_FORMAT__,elfx32 + %define PRIVATE :hidden + %elifidn __OUTPUT_FORMAT__,x64 + %define PRIVATE + %else + %define PRIVATE :private_extern + %endif +%else + %define PRIVATE +%endif + ; arg() ; Return the address specification of the given argument ; @@ -181,7 +210,16 @@ %endmacro %endif %endif - %define HIDDEN_DATA(x) x + + %ifdef CHROMIUM + %ifidn __OUTPUT_FORMAT__,macho32 + %define HIDDEN_DATA(x) x:private_extern + %else + %define HIDDEN_DATA(x) x + %endif + %else + %define HIDDEN_DATA(x) x + %endif %else %macro GET_GOT 1 %endmacro @@ -189,6 +227,9 @@ %ifidn __OUTPUT_FORMAT__,elf64 %define WRT_PLT wrt ..plt %define HIDDEN_DATA(x) x:data hidden + %elifidn __OUTPUT_FORMAT__,elfx32 + %define WRT_PLT wrt ..plt + %define HIDDEN_DATA(x) x:data hidden %else %define HIDDEN_DATA(x) x %endif @@ -330,5 +371,8 @@ section .text %elifidn __OUTPUT_FORMAT__,elf64 section .note.GNU-stack noalloc noexec nowrite progbits section .text +%elifidn __OUTPUT_FORMAT__,elfx32 +section .note.GNU-stack noalloc noexec nowrite progbits +section .text %endif diff --git a/vpx_scale/arm/neon/yv12extend_arm.c b/vpx_scale/arm/neon/yv12extend_arm.c index 7529fc6..eabd495 100644 --- a/vpx_scale/arm/neon/yv12extend_arm.c +++ b/vpx_scale/arm/neon/yv12extend_arm.c @@ -8,18 +8,14 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vpx_rtcd.h" -#include "vpx_scale/yv12config.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_scale/vpxscale.h" +extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc, + struct yv12_buffer_config *dst_ybc); -extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc); +void vp8_yv12_copy_frame_neon(struct yv12_buffer_config *src_ybc, + struct yv12_buffer_config *dst_ybc) { + vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc); -void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc) -{ - vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc); - - vp8_yv12_extend_frame_borders_neon(dst_ybc); + vp8_yv12_extend_frame_borders_neon(dst_ybc); } diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c index 4468e9d..c116740 100644 --- a/vpx_scale/generic/bicubic_scaler.c +++ b/vpx_scale/generic/bicubic_scaler.c @@ -46,245 +46,229 @@ static float a = -0.6; // 3 2 // C0 = a*t - a*t // -static short c0_fixed(unsigned int t) -{ - // put t in Q16 notation - unsigned short v1, v2; - - // Q16 - v1 = (a_i * t) >> 16; - v1 = (v1 * t) >> 16; - - // Q16 - v2 = (a_i * t) >> 16; - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - - // Q12 - return -((v1 - v2) >> 4); +static short c0_fixed(unsigned int t) { + // put t in Q16 notation + unsigned short v1, v2; + + // Q16 + v1 = (a_i * t) >> 16; + v1 = (v1 * t) >> 16; + + // Q16 + v2 = (a_i * t) >> 16; + v2 = (v2 * t) >> 16; + v2 = (v2 * t) >> 16; + + // Q12 + return -((v1 - v2) >> 4); } // 2 3 // C1 = a*t + (3-2*a)*t - (2-a)*t // -static short c1_fixed(unsigned int t) -{ - unsigned short v1, v2, v3; - unsigned short two, three; - - // Q16 - v1 = (a_i * t) >> 16; - - // Q13 - two = 2 << 13; - v2 = two - (a_i >> 3); - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - - // Q13 - three = 3 << 13; - v3 = three - (2 * (a_i >> 3)); - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; - - // Q12 - return (((v1 >> 3) - v2 + v3) >> 1); +static short c1_fixed(unsigned int t) { + unsigned short v1, v2, v3; + unsigned short two, three; + + // Q16 + v1 = (a_i * t) >> 16; + + // Q13 + two = 2 << 13; + v2 = two - (a_i >> 3); + v2 = (v2 * t) >> 16; + v2 = (v2 * t) >> 16; + v2 = (v2 * t) >> 16; + + // Q13 + three = 3 << 13; + v3 = three - (2 * (a_i >> 3)); + v3 = (v3 * t) >> 16; + v3 = (v3 * t) >> 16; + + // Q12 + return (((v1 >> 3) - v2 + v3) >> 1); } // 2 3 // C2 = 1 - (3-a)*t + (2-a)*t // -static short c2_fixed(unsigned int t) -{ - unsigned short v1, v2, v3; - unsigned short two, three; - - // Q13 - v1 = 1 << 13; - - // Q13 - three = 3 << 13; - v2 = three - (a_i >> 3); - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - - // Q13 - two = 2 << 13; - v3 = two - (a_i >> 3); - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; - - // Q12 - return (v1 - v2 + v3) >> 1; +static short c2_fixed(unsigned int t) { + unsigned short v1, v2, v3; + unsigned short two, three; + + // Q13 + v1 = 1 << 13; + + // Q13 + three = 3 << 13; + v2 = three - (a_i >> 3); + v2 = (v2 * t) >> 16; + v2 = (v2 * t) >> 16; + + // Q13 + two = 2 << 13; + v3 = two - (a_i >> 3); + v3 = (v3 * t) >> 16; + v3 = (v3 * t) >> 16; + v3 = (v3 * t) >> 16; + + // Q12 + return (v1 - v2 + v3) >> 1; } // 2 3 // C3 = a*t - 2*a*t + a*t // -static short c3_fixed(unsigned int t) -{ - int v1, v2, v3; +static short c3_fixed(unsigned int t) { + int v1, v2, v3; - // Q16 - v1 = (a_i * t) >> 16; + // Q16 + v1 = (a_i * t) >> 16; - // Q15 - v2 = 2 * (a_i >> 1); - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; + // Q15 + v2 = 2 * (a_i >> 1); + v2 = (v2 * t) >> 16; + v2 = (v2 * t) >> 16; - // Q16 - v3 = (a_i * t) >> 16; - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; + // Q16 + v3 = (a_i * t) >> 16; + v3 = (v3 * t) >> 16; + v3 = (v3 * t) >> 16; - // Q12 - return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3); + // Q12 + return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3); } #else // 3 2 // C0 = -a*t + a*t // -float C0(float t) -{ - return -a * t * t * t + a * t * t; +float C0(float t) { + return -a * t * t * t + a * t * t; } // 2 3 // C1 = -a*t + (2*a+3)*t - (a+2)*t // -float C1(float t) -{ - return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; +float C1(float t) { + return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; } // 2 3 // C2 = 1 - (a+3)*t + (a+2)*t // -float C2(float t) -{ - return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; +float C2(float t) { + return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; } // 2 3 // C3 = a*t - 2*a*t + a*t // -float C3(float t) -{ - return a * t * t * t - 2.0f * a * t * t + a * t; +float C3(float t) { + return a * t * t * t - 2.0f * a * t * t + a * t; } #endif #if 0 -int compare_real_fixed() -{ - int i, errors = 0; - float mult = 1.0 / 10000.0; - unsigned int fixed_mult = mult * 4294967296;//65536; - unsigned int phase_offset_int; - float phase_offset_real; - - for (i = 0; i < 10000; i++) - { - int fixed0, fixed1, fixed2, fixed3, fixed_total; - int real0, real1, real2, real3, real_total; - - phase_offset_real = (float)i * mult; - phase_offset_int = (fixed_mult * i) >> 16; +int compare_real_fixed() { + int i, errors = 0; + float mult = 1.0 / 10000.0; + unsigned int fixed_mult = mult * 4294967296;// 65536; + unsigned int phase_offset_int; + float phase_offset_real; + + for (i = 0; i < 10000; i++) { + int fixed0, fixed1, fixed2, fixed3, fixed_total; + int real0, real1, real2, real3, real_total; + + phase_offset_real = (float)i * mult; + phase_offset_int = (fixed_mult * i) >> 16; // phase_offset_int = phase_offset_real * 65536; - fixed0 = c0_fixed(phase_offset_int); - real0 = C0(phase_offset_real) * 4096.0; + fixed0 = c0_fixed(phase_offset_int); + real0 = C0(phase_offset_real) * 4096.0; - if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1))) - errors++; + if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1))) + errors++; - fixed1 = c1_fixed(phase_offset_int); - real1 = C1(phase_offset_real) * 4096.0; + fixed1 = c1_fixed(phase_offset_int); + real1 = C1(phase_offset_real) * 4096.0; - if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1))) - errors++; + if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1))) + errors++; - fixed2 = c2_fixed(phase_offset_int); - real2 = C2(phase_offset_real) * 4096.0; + fixed2 = c2_fixed(phase_offset_int); + real2 = C2(phase_offset_real) * 4096.0; - if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1))) - errors++; + if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1))) + errors++; - fixed3 = c3_fixed(phase_offset_int); - real3 = C3(phase_offset_real) * 4096.0; + fixed3 = c3_fixed(phase_offset_int); + real3 = C3(phase_offset_real) * 4096.0; - if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1))) - errors++; + if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1))) + errors++; - fixed_total = fixed0 + fixed1 + fixed2 + fixed3; - real_total = real0 + real1 + real2 + real3; + fixed_total = fixed0 + fixed1 + fixed2 + fixed3; + real_total = real0 + real1 + real2 + real3; - if ((fixed_total > 4097) || (fixed_total < 4094)) - errors ++; + if ((fixed_total > 4097) || (fixed_total < 4094)) + errors++; - if ((real_total > 4097) || (real_total < 4095)) - errors ++; - } + if ((real_total > 4097) || (real_total < 4095)) + errors++; + } - return errors; + return errors; } #endif // Find greatest common denominator between two integers. Method used here is // slow compared to Euclid's algorithm, but does not require any division. -int gcd(int a, int b) -{ - // Problem with this algorithm is that if a or b = 0 this function - // will never exit. Don't want to return 0 because any computation - // that was based on a common denoninator and tried to reduce by - // dividing by 0 would fail. Best solution that could be thought of - // would to be fail by returing a 1; - if (a <= 0 || b <= 0) - return 1; - - while (a != b) - { - if (b > a) - b = b - a; - else - { - int tmp = a;//swap large and - a = b; //small - b = tmp; - } +int gcd(int a, int b) { + // Problem with this algorithm is that if a or b = 0 this function + // will never exit. Don't want to return 0 because any computation + // that was based on a common denoninator and tried to reduce by + // dividing by 0 would fail. Best solution that could be thought of + // would to be fail by returing a 1; + if (a <= 0 || b <= 0) + return 1; + + while (a != b) { + if (b > a) + b = b - a; + else { + int tmp = a;// swap large and + a = b; // small + b = tmp; } + } - return b; + return b; } -void bicubic_coefficient_init() -{ - vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); - g_first_time = 0; +void bicubic_coefficient_init() { + vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); + g_first_time = 0; } -void bicubic_coefficient_destroy() -{ - if (!g_first_time) - { - vpx_free(g_b_scaler.l_w); +void bicubic_coefficient_destroy() { + if (!g_first_time) { + vpx_free(g_b_scaler.l_w); - vpx_free(g_b_scaler.l_h); + vpx_free(g_b_scaler.l_h); - vpx_free(g_b_scaler.l_h_uv); + vpx_free(g_b_scaler.l_h_uv); - vpx_free(g_b_scaler.c_w); + vpx_free(g_b_scaler.c_w); - vpx_free(g_b_scaler.c_h); + vpx_free(g_b_scaler.c_h); - vpx_free(g_b_scaler.c_h_uv); + vpx_free(g_b_scaler.c_h_uv); - vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); - } + vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); + } } // Create the coeffients that will be used for the cubic interpolation. @@ -292,311 +276,294 @@ void bicubic_coefficient_destroy() // regimes the phase offsets will be different. There are 4 coefficents // for each point, two on each side. The layout is that there are the // 4 coefficents for each phase in the array and then the next phase. -int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) -{ - int i; +int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) { + int i; #ifdef FIXED_POINT - int phase_offset_int; - unsigned int fixed_mult; - int product_val = 0; + int phase_offset_int; + unsigned int fixed_mult; + int product_val = 0; #else - float phase_offset; + float phase_offset; #endif - int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv; + int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv; - if (g_first_time) - bicubic_coefficient_init(); + if (g_first_time) + bicubic_coefficient_init(); - // check to see if the coefficents have already been set up correctly - if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height) - && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height)) - return 0; + // check to see if the coefficents have already been set up correctly + if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height) + && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height)) + return 0; - g_b_scaler.in_width = in_width; - g_b_scaler.in_height = in_height; - g_b_scaler.out_width = out_width; - g_b_scaler.out_height = out_height; + g_b_scaler.in_width = in_width; + g_b_scaler.in_height = in_height; + g_b_scaler.out_width = out_width; + g_b_scaler.out_height = out_height; - // Don't want to allow crazy scaling, just try and prevent a catastrophic - // failure here. Want to fail after setting the member functions so if - // if the scaler is called the member functions will not scale. - if (out_width <= 0 || out_height <= 0) - return -1; + // Don't want to allow crazy scaling, just try and prevent a catastrophic + // failure here. Want to fail after setting the member functions so if + // if the scaler is called the member functions will not scale. + if (out_width <= 0 || out_height <= 0) + return -1; - // reduce in/out width and height ratios using the gcd - gcd_w = gcd(out_width, in_width); - gcd_h = gcd(out_height, in_height); - gcd_h_uv = gcd(out_height, in_height / 2); + // reduce in/out width and height ratios using the gcd + gcd_w = gcd(out_width, in_width); + gcd_h = gcd(out_height, in_height); + gcd_h_uv = gcd(out_height, in_height / 2); - // the numerator width and height are to be saved in - // globals so they can be used during the scaling process - // without having to be recalculated. - g_b_scaler.nw = out_width / gcd_w; - d_w = in_width / gcd_w; + // the numerator width and height are to be saved in + // globals so they can be used during the scaling process + // without having to be recalculated. + g_b_scaler.nw = out_width / gcd_w; + d_w = in_width / gcd_w; - g_b_scaler.nh = out_height / gcd_h; - d_h = in_height / gcd_h; + g_b_scaler.nh = out_height / gcd_h; + d_h = in_height / gcd_h; - g_b_scaler.nh_uv = out_height / gcd_h_uv; - d_h_uv = (in_height / 2) / gcd_h_uv; + g_b_scaler.nh_uv = out_height / gcd_h_uv; + d_h_uv = (in_height / 2) / gcd_h_uv; - // allocate memory for the coefficents - vpx_free(g_b_scaler.l_w); + // allocate memory for the coefficents + vpx_free(g_b_scaler.l_w); - vpx_free(g_b_scaler.l_h); + vpx_free(g_b_scaler.l_h); - vpx_free(g_b_scaler.l_h_uv); + vpx_free(g_b_scaler.l_h_uv); - g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2); - g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2); - g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2); + g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2); + g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2); + g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2); - vpx_free(g_b_scaler.c_w); + vpx_free(g_b_scaler.c_w); - vpx_free(g_b_scaler.c_h); + vpx_free(g_b_scaler.c_h); - vpx_free(g_b_scaler.c_h_uv); + vpx_free(g_b_scaler.c_h_uv); - g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); - g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); - g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2); + g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); + g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); + g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2); - g_b_scaler.hbuf = g_hbuf; - g_b_scaler.hbuf_uv = g_hbuf_uv; + g_b_scaler.hbuf = g_hbuf; + g_b_scaler.hbuf_uv = g_hbuf_uv; - // Set up polyphase filter taps. This needs to be done before - // the scaling because of the floating point math required. The - // coefficients are multiplied by 2^12 so that fixed point math - // can be used in the main scaling loop. + // Set up polyphase filter taps. This needs to be done before + // the scaling because of the floating point math required. The + // coefficients are multiplied by 2^12 so that fixed point math + // can be used in the main scaling loop. #ifdef FIXED_POINT - fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296; + fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296; - product_val = 0; + product_val = 0; - for (i = 0; i < g_b_scaler.nw; i++) - { - if (product_val > g_b_scaler.nw) - product_val -= g_b_scaler.nw; + for (i = 0; i < g_b_scaler.nw; i++) { + if (product_val > g_b_scaler.nw) + product_val -= g_b_scaler.nw; - phase_offset_int = (fixed_mult * product_val) >> 16; + phase_offset_int = (fixed_mult * product_val) >> 16; - g_b_scaler.c_w[i*4] = c3_fixed(phase_offset_int); - g_b_scaler.c_w[i*4+1] = c2_fixed(phase_offset_int); - g_b_scaler.c_w[i*4+2] = c1_fixed(phase_offset_int); - g_b_scaler.c_w[i*4+3] = c0_fixed(phase_offset_int); + g_b_scaler.c_w[i * 4] = c3_fixed(phase_offset_int); + g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int); + g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int); + g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int); - product_val += d_w; - } + product_val += d_w; + } - fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296; + fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296; - product_val = 0; + product_val = 0; - for (i = 0; i < g_b_scaler.nh; i++) - { - if (product_val > g_b_scaler.nh) - product_val -= g_b_scaler.nh; + for (i = 0; i < g_b_scaler.nh; i++) { + if (product_val > g_b_scaler.nh) + product_val -= g_b_scaler.nh; - phase_offset_int = (fixed_mult * product_val) >> 16; + phase_offset_int = (fixed_mult * product_val) >> 16; - g_b_scaler.c_h[i*4] = c0_fixed(phase_offset_int); - g_b_scaler.c_h[i*4+1] = c1_fixed(phase_offset_int); - g_b_scaler.c_h[i*4+2] = c2_fixed(phase_offset_int); - g_b_scaler.c_h[i*4+3] = c3_fixed(phase_offset_int); + g_b_scaler.c_h[i * 4] = c0_fixed(phase_offset_int); + g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int); + g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int); + g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int); - product_val += d_h; - } + product_val += d_h; + } - fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296; + fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296; - product_val = 0; + product_val = 0; - for (i = 0; i < g_b_scaler.nh_uv; i++) - { - if (product_val > g_b_scaler.nh_uv) - product_val -= g_b_scaler.nh_uv; + for (i = 0; i < g_b_scaler.nh_uv; i++) { + if (product_val > g_b_scaler.nh_uv) + product_val -= g_b_scaler.nh_uv; - phase_offset_int = (fixed_mult * product_val) >> 16; + phase_offset_int = (fixed_mult * product_val) >> 16; - g_b_scaler.c_h_uv[i*4] = c0_fixed(phase_offset_int); - g_b_scaler.c_h_uv[i*4+1] = c1_fixed(phase_offset_int); - g_b_scaler.c_h_uv[i*4+2] = c2_fixed(phase_offset_int); - g_b_scaler.c_h_uv[i*4+3] = c3_fixed(phase_offset_int); + g_b_scaler.c_h_uv[i * 4] = c0_fixed(phase_offset_int); + g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int); + g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int); + g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int); - product_val += d_h_uv; - } + product_val += d_h_uv; + } #else - for (i = 0; i < g_nw; i++) - { - phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw; - g_c_w[i*4] = (C3(phase_offset) * 4096.0); - g_c_w[i*4+1] = (C2(phase_offset) * 4096.0); - g_c_w[i*4+2] = (C1(phase_offset) * 4096.0); - g_c_w[i*4+3] = (C0(phase_offset) * 4096.0); - } - - for (i = 0; i < g_nh; i++) - { - phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh; - g_c_h[i*4] = (C0(phase_offset) * 4096.0); - g_c_h[i*4+1] = (C1(phase_offset) * 4096.0); - g_c_h[i*4+2] = (C2(phase_offset) * 4096.0); - g_c_h[i*4+3] = (C3(phase_offset) * 4096.0); - } - - for (i = 0; i < g_nh_uv; i++) - { - phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv; - g_c_h_uv[i*4] = (C0(phase_offset) * 4096.0); - g_c_h_uv[i*4+1] = (C1(phase_offset) * 4096.0); - g_c_h_uv[i*4+2] = (C2(phase_offset) * 4096.0); - g_c_h_uv[i*4+3] = (C3(phase_offset) * 4096.0); - } + for (i = 0; i < g_nw; i++) { + phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw; + g_c_w[i * 4] = (C3(phase_offset) * 4096.0); + g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0); + g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0); + g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0); + } + + for (i = 0; i < g_nh; i++) { + phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh; + g_c_h[i * 4] = (C0(phase_offset) * 4096.0); + g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0); + g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0); + g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0); + } + + for (i = 0; i < g_nh_uv; i++) { + phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv; + g_c_h_uv[i * 4] = (C0(phase_offset) * 4096.0); + g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0); + g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0); + g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0); + } #endif - // Create an array that corresponds input lines to output lines. - // This doesn't require floating point math, but it does require - // a division and because hardware division is not present that - // is a call. - for (i = 0; i < out_width; i++) - { - g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw; + // Create an array that corresponds input lines to output lines. + // This doesn't require floating point math, but it does require + // a division and because hardware division is not present that + // is a call. + for (i = 0; i < out_width; i++) { + g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw; - if ((g_b_scaler.l_w[i] + 2) <= in_width) - g_b_scaler.max_usable_out_width = i; + if ((g_b_scaler.l_w[i] + 2) <= in_width) + g_b_scaler.max_usable_out_width = i; - } + } - for (i = 0; i < out_height + 1; i++) - { - g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh; - g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv; - } + for (i = 0; i < out_height + 1; i++) { + g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh; + g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv; + } - return 0; + return 0; } int bicubic_scale(int in_width, int in_height, int in_stride, int out_width, int out_height, int out_stride, - unsigned char *input_image, unsigned char *output_image) -{ - short *RESTRICT l_w, * RESTRICT l_h; - short *RESTRICT c_w, * RESTRICT c_h; - unsigned char *RESTRICT ip, * RESTRICT op; - unsigned char *RESTRICT hbuf; - int h, w, lw, lh; - int temp_sum; - int phase_offset_w, phase_offset_h; - - c_w = g_b_scaler.c_w; - c_h = g_b_scaler.c_h; - - op = output_image; - - l_w = g_b_scaler.l_w; - l_h = g_b_scaler.l_h; - - phase_offset_h = 0; - - for (h = 0; h < out_height; h++) - { - // select the row to work on - lh = l_h[h]; - ip = input_image + (in_stride * lh); - - // vp8_filter the row vertically into an temporary buffer. - // If the phase offset == 0 then all the multiplication - // is going to result in the output equalling the input. - // So instead point the temporary buffer to the input. - // Also handle the boundry condition of not being able to - // filter that last lines. - if (phase_offset_h && (lh < in_height - 2)) - { - hbuf = g_b_scaler.hbuf; - - for (w = 0; w < in_width; w++) - { - temp_sum = c_h[phase_offset_h*4+3] * ip[w - in_stride]; - temp_sum += c_h[phase_offset_h*4+2] * ip[w]; - temp_sum += c_h[phase_offset_h*4+1] * ip[w + in_stride]; - temp_sum += c_h[phase_offset_h*4] * ip[w + 2*in_stride]; - - hbuf[w] = temp_sum >> 12; - } - } - else - hbuf = ip; - - // increase the phase offset for the next time around. - if (++phase_offset_h >= g_b_scaler.nh) - phase_offset_h = 0; - - // now filter and expand it horizontally into the final - // output buffer + unsigned char *input_image, unsigned char *output_image) { + short *RESTRICT l_w, * RESTRICT l_h; + short *RESTRICT c_w, * RESTRICT c_h; + unsigned char *RESTRICT ip, * RESTRICT op; + unsigned char *RESTRICT hbuf; + int h, w, lw, lh; + int temp_sum; + int phase_offset_w, phase_offset_h; + + c_w = g_b_scaler.c_w; + c_h = g_b_scaler.c_h; + + op = output_image; + + l_w = g_b_scaler.l_w; + l_h = g_b_scaler.l_h; + + phase_offset_h = 0; + + for (h = 0; h < out_height; h++) { + // select the row to work on + lh = l_h[h]; + ip = input_image + (in_stride * lh); + + // vp8_filter the row vertically into an temporary buffer. + // If the phase offset == 0 then all the multiplication + // is going to result in the output equalling the input. + // So instead point the temporary buffer to the input. + // Also handle the boundry condition of not being able to + // filter that last lines. + if (phase_offset_h && (lh < in_height - 2)) { + hbuf = g_b_scaler.hbuf; + + for (w = 0; w < in_width; w++) { + temp_sum = c_h[phase_offset_h * 4 + 3] * ip[w - in_stride]; + temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w]; + temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride]; + temp_sum += c_h[phase_offset_h * 4] * ip[w + 2 * in_stride]; + + hbuf[w] = temp_sum >> 12; + } + } else + hbuf = ip; + + // increase the phase offset for the next time around. + if (++phase_offset_h >= g_b_scaler.nh) + phase_offset_h = 0; + + // now filter and expand it horizontally into the final + // output buffer + phase_offset_w = 0; + + for (w = 0; w < out_width; w++) { + // get the index to use to expand the image + lw = l_w[w]; + + temp_sum = c_w[phase_offset_w * 4] * hbuf[lw - 1]; + temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw]; + temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1]; + temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2]; + temp_sum = temp_sum >> 12; + + if (++phase_offset_w >= g_b_scaler.nw) phase_offset_w = 0; - for (w = 0; w < out_width; w++) - { - // get the index to use to expand the image - lw = l_w[w]; - - temp_sum = c_w[phase_offset_w*4] * hbuf[lw - 1]; - temp_sum += c_w[phase_offset_w*4+1] * hbuf[lw]; - temp_sum += c_w[phase_offset_w*4+2] * hbuf[lw + 1]; - temp_sum += c_w[phase_offset_w*4+3] * hbuf[lw + 2]; - temp_sum = temp_sum >> 12; + // boundry conditions + if ((lw + 2) >= in_width) + temp_sum = hbuf[lw]; - if (++phase_offset_w >= g_b_scaler.nw) - phase_offset_w = 0; + if (lw == 0) + temp_sum = hbuf[0]; - // boundry conditions - if ((lw + 2) >= in_width) - temp_sum = hbuf[lw]; - - if (lw == 0) - temp_sum = hbuf[0]; - - op[w] = temp_sum; - } - - op += out_stride; + op[w] = temp_sum; } - return 0; + op += out_stride; + } + + return 0; } -void bicubic_scale_frame_reset() -{ - g_b_scaler.out_width = 0; - g_b_scaler.out_height = 0; +void bicubic_scale_frame_reset() { + g_b_scaler.out_width = 0; + g_b_scaler.out_height = 0; } void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, - int new_width, int new_height) -{ + int new_width, int new_height) { - dst->y_width = new_width; - dst->y_height = new_height; - dst->uv_width = new_width / 2; - dst->uv_height = new_height / 2; + dst->y_width = new_width; + dst->y_height = new_height; + dst->uv_width = new_width / 2; + dst->uv_height = new_height / 2; - dst->y_stride = dst->y_width; - dst->uv_stride = dst->uv_width; + dst->y_stride = dst->y_width; + dst->uv_stride = dst->uv_width; - bicubic_scale(src->y_width, src->y_height, src->y_stride, - new_width, new_height, dst->y_stride, - src->y_buffer, dst->y_buffer); + bicubic_scale(src->y_width, src->y_height, src->y_stride, + new_width, new_height, dst->y_stride, + src->y_buffer, dst->y_buffer); - bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, - new_width / 2, new_height / 2, dst->uv_stride, - src->u_buffer, dst->u_buffer); + bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, + new_width / 2, new_height / 2, dst->uv_stride, + src->u_buffer, dst->u_buffer); - bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, - new_width / 2, new_height / 2, dst->uv_stride, - src->v_buffer, dst->v_buffer); + bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, + new_width / 2, new_height / 2, dst->uv_stride, + src->v_buffer, dst->v_buffer); } diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c index 9beb162..60c21fb 100644 --- a/vpx_scale/generic/gen_scalers.c +++ b/vpx_scale/generic/gen_scalers.c @@ -34,47 +34,42 @@ * SPECIAL NOTES : None. * ****************************************************************************/ -void vp8_horizontal_line_4_5_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 4; i += 4) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char) a; - des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = src[2] * 154; - a = src[3]; - des [2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [3] = (unsigned char)((c + 102 * a + 128) >> 8); - b = src[4]; - des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8); - - src += 4; - des += 5; - } - +void vp8_horizontal_line_4_5_scale_c(const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width) { + unsigned i; + unsigned int a, b, c; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width - 4; i += 4) { a = src[0]; b = src[1]; - des [0] = (unsigned char)(a); + des [0] = (unsigned char) a; des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); c = src[2] * 154; a = src[3]; des [2] = (unsigned char)((b * 102 + c + 128) >> 8); des [3] = (unsigned char)((c + 102 * a + 128) >> 8); - des [4] = (unsigned char)(a); + b = src[4]; + des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8); + + src += 4; + des += 5; + } + + a = src[0]; + b = src[1]; + des [0] = (unsigned char)(a); + des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); + c = src[2] * 154; + a = src[3]; + des [2] = (unsigned char)((b * 102 + c + 128) >> 8); + des [3] = (unsigned char)((c + 102 * a + 128) >> 8); + des [4] = (unsigned char)(a); } @@ -97,31 +92,31 @@ void vp8_horizontal_line_4_5_scale_c * the current band. * ****************************************************************************/ -void vp8_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d; - unsigned char *des = dest; +void vp8_vertical_band_4_5_scale_c(unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c, d; + unsigned char *des = dest; - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch]; + for (i = 0; i < dest_width; i++) { + a = des [0]; + b = des [dest_pitch]; - des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); + des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = des[dest_pitch*2] * 154; - d = des[dest_pitch*3]; + c = des[dest_pitch * 2] * 154; + d = des[dest_pitch * 3]; - des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8); + des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8); + des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8); - /* First line in next band */ - a = des [dest_pitch * 5]; - des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8); + /* First line in next band */ + a = des [dest_pitch * 5]; + des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8); - des ++; - } + des++; + } } /**************************************************************************** @@ -144,30 +139,30 @@ void vp8_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, * last band. * ****************************************************************************/ -void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d; - unsigned char *des = dest; +void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c, d; + unsigned char *des = dest; - for (i = 0; i < dest_width; ++i) - { - a = des[0]; - b = des[dest_pitch]; + for (i = 0; i < dest_width; ++i) { + a = des[0]; + b = des[dest_pitch]; - des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); + des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = des[dest_pitch*2] * 154; - d = des[dest_pitch*3]; + c = des[dest_pitch * 2] * 154; + d = des[dest_pitch * 3]; - des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8); + des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8); + des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8); - /* No other line for interplation of this line, so .. */ - des[dest_pitch*4] = (unsigned char) d; + /* No other line for interplation of this line, so .. */ + des[dest_pitch * 4] = (unsigned char) d; - des++; - } + des++; + } } /**************************************************************************** @@ -190,40 +185,35 @@ void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_p * * ****************************************************************************/ -void vp8_horizontal_line_2_3_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 2; i += 2) - { - a = src[0]; - b = src[1]; - c = src[2]; - - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); - - src += 2; - des += 3; - } - +void vp8_horizontal_line_2_3_scale_c(const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width - 2; i += 2) { a = src[0]; b = src[1]; + c = src[2]; + des [0] = (unsigned char)(a); des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [2] = (unsigned char)(b); + des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); + + src += 2; + des += 3; + } + + a = src[0]; + b = src[1]; + des [0] = (unsigned char)(a); + des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); + des [2] = (unsigned char)(b); } @@ -246,22 +236,22 @@ void vp8_horizontal_line_2_3_scale_c * the current band. * ****************************************************************************/ -void vp8_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch]; - c = des[dest_pitch*3]; - des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [dest_pitch*2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); - - des++; - } +void vp8_vertical_band_2_3_scale_c(unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + + for (i = 0; i < dest_width; i++) { + a = des [0]; + b = des [dest_pitch]; + c = des[dest_pitch * 3]; + des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); + des [dest_pitch * 2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); + + des++; + } } /**************************************************************************** @@ -284,21 +274,21 @@ void vp8_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, * last band. * ****************************************************************************/ -void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) - { - a = des [0]; - b = des [dest_pitch]; - - des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [dest_pitch*2] = (unsigned char)(b); - des++; - } +void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b; + unsigned char *des = dest; + + for (i = 0; i < dest_width; ++i) { + a = des [0]; + b = des [dest_pitch]; + + des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); + des [dest_pitch * 2] = (unsigned char)(b); + des++; + } } /**************************************************************************** @@ -321,49 +311,44 @@ void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_p * * ****************************************************************************/ -void vp8_horizontal_line_3_5_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 3; i += 3) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - - c = src[2] ; - des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - a = src[3]; - des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); - - src += 3; - des += 5; - } - +void vp8_horizontal_line_3_5_scale_c(const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width - 3; i += 3) { a = src[0]; b = src[1]; des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - c = src[2] ; + + c = src[2]; des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - des [4] = (unsigned char)(c); + a = src[3]; + des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); + + src += 3; + des += 5; + } + + a = src[0]; + b = src[1]; + des [0] = (unsigned char)(a); + + des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); + c = src[2]; + des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); + des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); + + des [4] = (unsigned char)(c); } /**************************************************************************** @@ -385,28 +370,28 @@ void vp8_horizontal_line_3_5_scale_c * the current band. * ****************************************************************************/ -void vp8_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch]; - des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - - c = des[dest_pitch*2]; - des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - /* First line in next band... */ - a = des [dest_pitch * 5]; - des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); - - des++; - } +void vp8_vertical_band_3_5_scale_c(unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + + for (i = 0; i < dest_width; i++) { + a = des [0]; + b = des [dest_pitch]; + des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); + + c = des[dest_pitch * 2]; + des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); + des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); + + /* First line in next band... */ + a = des [dest_pitch * 5]; + des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); + + des++; + } } /**************************************************************************** @@ -429,28 +414,28 @@ void vp8_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, * last band. * ****************************************************************************/ -void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; +void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; - for (i = 0; i < dest_width; ++i) - { - a = des [0]; - b = des [dest_pitch]; + for (i = 0; i < dest_width; ++i) { + a = des [0]; + b = des [dest_pitch]; - des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); + des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - c = des[dest_pitch*2]; - des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); + c = des[dest_pitch * 2]; + des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); + des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - /* No other line for interplation of this line, so .. */ - des [ dest_pitch * 4 ] = (unsigned char)(c) ; + /* No other line for interplation of this line, so .. */ + des [ dest_pitch * 4 ] = (unsigned char)(c); - des++; - } + des++; + } } /**************************************************************************** @@ -473,46 +458,41 @@ void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_p * * ****************************************************************************/ -void vp8_horizontal_line_3_4_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 3; i += 3) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - - c = src[2]; - des [2] = (unsigned char)((b + c + 1) >> 1); - - a = src[3]; - des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); - - src += 3; - des += 4; - } - +void vp8_horizontal_line_3_4_scale_c(const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width - 3; i += 3) { a = src[0]; b = src[1]; des [0] = (unsigned char)(a); des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - c = src[2] ; + c = src[2]; des [2] = (unsigned char)((b + c + 1) >> 1); - des [3] = (unsigned char)(c); + + a = src[3]; + des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); + + src += 3; + des += 4; + } + + a = src[0]; + b = src[1]; + des [0] = (unsigned char)(a); + des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); + + c = src[2]; + des [2] = (unsigned char)((b + c + 1) >> 1); + des [3] = (unsigned char)(c); } /**************************************************************************** @@ -534,27 +514,27 @@ void vp8_horizontal_line_3_4_scale_c * the current band. * ****************************************************************************/ -void vp8_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch]; - des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - - c = des[dest_pitch*2]; - des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1); - - /* First line in next band... */ - a = des [dest_pitch*4]; - des [dest_pitch*3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); - - des++; - } +void vp8_vertical_band_3_4_scale_c(unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + + for (i = 0; i < dest_width; i++) { + a = des [0]; + b = des [dest_pitch]; + des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); + + c = des[dest_pitch * 2]; + des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1); + + /* First line in next band... */ + a = des [dest_pitch * 4]; + des [dest_pitch * 3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); + + des++; + } } /**************************************************************************** @@ -577,27 +557,27 @@ void vp8_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, * last band. * ****************************************************************************/ -void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; +void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; - for (i = 0; i < dest_width; ++i) - { - a = des [0]; - b = des [dest_pitch]; + for (i = 0; i < dest_width; ++i) { + a = des [0]; + b = des [dest_pitch]; - des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); + des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - c = des[dest_pitch*2]; - des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1); + c = des[dest_pitch * 2]; + des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1); - /* No other line for interplation of this line, so .. */ - des [dest_pitch*3] = (unsigned char)(c); + /* No other line for interplation of this line, so .. */ + des [dest_pitch * 3] = (unsigned char)(c); - des++; - } + des++; + } } /**************************************************************************** @@ -619,34 +599,29 @@ void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_p * SPECIAL NOTES : None. * ****************************************************************************/ -void vp8_horizontal_line_1_2_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 1; i += 1) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a + b + 1) >> 1); - src += 1; - des += 2; - } - +void vp8_horizontal_line_1_2_scale_c(const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width - 1; i += 1) { a = src[0]; + b = src[1]; des [0] = (unsigned char)(a); - des [1] = (unsigned char)(a); + des [1] = (unsigned char)((a + b + 1) >> 1); + src += 1; + des += 2; + } + + a = src[0]; + des [0] = (unsigned char)(a); + des [1] = (unsigned char)(a); } /**************************************************************************** @@ -668,21 +643,21 @@ void vp8_horizontal_line_1_2_scale_c * the current band. * ****************************************************************************/ -void vp8_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; +void vp8_vertical_band_1_2_scale_c(unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b; + unsigned char *des = dest; - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch * 2]; + for (i = 0; i < dest_width; i++) { + a = des [0]; + b = des [dest_pitch * 2]; - des[dest_pitch] = (unsigned char)((a + b + 1) >> 1); + des[dest_pitch] = (unsigned char)((a + b + 1) >> 1); - des++; - } + des++; + } } /**************************************************************************** @@ -705,16 +680,16 @@ void vp8_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, * last band. * ****************************************************************************/ -void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) - { - des[dest_pitch] = des[0]; - des++; - } +void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned char *des = dest; + + for (i = 0; i < dest_width; ++i) { + des[dest_pitch] = des[0]; + des++; + } } @@ -740,67 +715,64 @@ void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_p * SPECIAL NOTES : None. * ****************************************************************************/ -void vp8_horizontal_line_5_4_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width; i += 5) - { - a = src[0]; - b = src[1]; - c = src[2]; - d = src[3]; - e = src[4]; - - des[0] = (unsigned char) a; - des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); - des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); - des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); - - src += 5; - des += 4; - } +void vp8_horizontal_line_5_4_scale_c(const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width) { + unsigned i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width; i += 5) { + a = src[0]; + b = src[1]; + c = src[2]; + d = src[3]; + e = src[4]; + + des[0] = (unsigned char) a; + des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); + des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); + des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); + + src += 5; + des += 4; + } } -void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - unsigned char *src = source; +void vp8_vertical_band_5_4_scale_c(unsigned char *source, + unsigned int src_pitch, + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + unsigned char *src = source; - for (i = 0; i < dest_width; i++) - { + for (i = 0; i < dest_width; i++) { - a = src[0 * src_pitch]; - b = src[1 * src_pitch]; - c = src[2 * src_pitch]; - d = src[3 * src_pitch]; - e = src[4 * src_pitch]; + a = src[0 * src_pitch]; + b = src[1 * src_pitch]; + c = src[2 * src_pitch]; + d = src[3 * src_pitch]; + e = src[4 * src_pitch]; - des[0 * dest_pitch] = (unsigned char) a; - des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); - des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); - des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); + des[0 * dest_pitch] = (unsigned char) a; + des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); + des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); + des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); - src ++; - des ++; + src++; + des++; - } + } } @@ -824,63 +796,60 @@ void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch * * ****************************************************************************/ -void vp8_horizontal_line_5_3_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b, c, d , e; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width; i += 5) - { - a = src[0]; - b = src[1]; - c = src[2]; - d = src[3]; - e = src[4]; - - des[0] = (unsigned char) a; - des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); - des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); - - src += 5; - des += 3; - } +void vp8_horizontal_line_5_3_scale_c(const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width; i += 5) { + a = src[0]; + b = src[1]; + c = src[2]; + d = src[3]; + e = src[4]; + + des[0] = (unsigned char) a; + des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); + des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); + + src += 5; + des += 3; + } } -void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - unsigned char *src = source; +void vp8_vertical_band_5_3_scale_c(unsigned char *source, + unsigned int src_pitch, + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + unsigned char *src = source; - for (i = 0; i < dest_width; i++) - { + for (i = 0; i < dest_width; i++) { - a = src[0 * src_pitch]; - b = src[1 * src_pitch]; - c = src[2 * src_pitch]; - d = src[3 * src_pitch]; - e = src[4 * src_pitch]; + a = src[0 * src_pitch]; + b = src[1 * src_pitch]; + c = src[2 * src_pitch]; + d = src[3 * src_pitch]; + e = src[4 * src_pitch]; - des[0 * dest_pitch] = (unsigned char) a; - des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); - des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); + des[0 * dest_pitch] = (unsigned char) a; + des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); + des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); - src ++; - des ++; + src++; + des++; - } + } } /**************************************************************************** @@ -902,55 +871,52 @@ void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch * SPECIAL NOTES : None. * ****************************************************************************/ -void vp8_horizontal_line_2_1_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width; i += 2) - { - a = src[0]; - des [0] = (unsigned char)(a); - src += 2; - des += 1; - } - - - -} -void vp8_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - (void) dest_pitch; - (void) src_pitch; - vpx_memcpy(dest, source, dest_width); +void vp8_horizontal_line_2_1_scale_c(const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width) { + unsigned int i; + unsigned int a; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width; i += 2) { + a = src[0]; + des [0] = (unsigned char)(a); + src += 2; + des += 1; + } } -void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - int i; - int temp; - int width = dest_width; - - (void) dest_pitch; - - for (i = 0; i < width; i++) - { - temp = 8; - temp += source[i-(int)src_pitch] * 3; - temp += source[i] * 10; - temp += source[i+src_pitch] * 3; - temp >>= 4 ; - dest[i] = (unsigned char)(temp); - } +void vp8_vertical_band_2_1_scale_c(unsigned char *source, + unsigned int src_pitch, + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + (void) dest_pitch; + (void) src_pitch; + vpx_memcpy(dest, source, dest_width); +} +void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, + unsigned int src_pitch, + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width) { + int i; + int temp; + int width = dest_width; + + (void) dest_pitch; + + for (i = 0; i < width; i++) { + temp = 8; + temp += source[i - (int)src_pitch] * 3; + temp += source[i] * 10; + temp += source[i + src_pitch] * 3; + temp >>= 4; + dest[i] = (unsigned char)(temp); + } } diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c index c02e4ff..7de85ca 100644 --- a/vpx_scale/generic/vpxscale.c +++ b/vpx_scale/generic/vpxscale.c @@ -20,23 +20,22 @@ /**************************************************************************** * Header Files ****************************************************************************/ -#include "vpx_rtcd.h" +#include "./vpx_rtcd.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/yv12config.h" #include "vpx_scale/scale_mode.h" -typedef struct -{ - int expanded_frame_width; - int expanded_frame_height; +typedef struct { + int expanded_frame_width; + int expanded_frame_height; - int HScale; - int HRatio; - int VScale; - int VRatio; + int HScale; + int HRatio; + int VScale; + int VRatio; - YV12_BUFFER_CONFIG *src_yuv_config; - YV12_BUFFER_CONFIG *dst_yuv_config; + YV12_BUFFER_CONFIG *src_yuv_config; + YV12_BUFFER_CONFIG *dst_yuv_config; } SCALE_VARS; @@ -60,15 +59,14 @@ typedef struct ****************************************************************************/ static void horizontal_line_copy( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; - - duck_memcpy(dest, source, source_width); + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; + + duck_memcpy(dest, source, source_width); } /**************************************************************************** * @@ -90,16 +88,15 @@ void horizontal_line_copy( ****************************************************************************/ static void null_scale( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - (void) dest; - (void) dest_pitch; - (void) dest_width; - - return; + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + (void) dest; + (void) dest_pitch; + (void) dest_width; + + return; } /**************************************************************************** @@ -127,35 +124,33 @@ void null_scale( static void scale1d_2t1_i ( - const unsigned char *source, - int source_step, - unsigned int source_scale, - unsigned int source_length, - unsigned char *dest, - int dest_step, - unsigned int dest_scale, - unsigned int dest_length -) -{ - unsigned int i, j; - unsigned int temp; - int source_pitch = source_step; - (void) source_length; - (void) source_scale; - (void) dest_scale; - - source_step *= 2; - dest[0] = source[0]; - - for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) - { - temp = 8; - temp += 3 * source[j-source_pitch]; - temp += 10 * source[j]; - temp += 3 * source[j+source_pitch]; - temp >>= 4; - dest[i] = (char)(temp); - } + const unsigned char *source, + int source_step, + unsigned int source_scale, + unsigned int source_length, + unsigned char *dest, + int dest_step, + unsigned int dest_scale, + unsigned int dest_length +) { + unsigned int i, j; + unsigned int temp; + int source_pitch = source_step; + (void) source_length; + (void) source_scale; + (void) dest_scale; + + source_step *= 2; + dest[0] = source[0]; + + for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) { + temp = 8; + temp += 3 * source[j - source_pitch]; + temp += 10 * source[j]; + temp += 3 * source[j + source_pitch]; + temp >>= 4; + dest[i] = (char)(temp); + } } /**************************************************************************** @@ -183,27 +178,26 @@ void scale1d_2t1_i static void scale1d_2t1_ps ( - const unsigned char *source, - int source_step, - unsigned int source_scale, - unsigned int source_length, - unsigned char *dest, - int dest_step, - unsigned int dest_scale, - unsigned int dest_length -) -{ - unsigned int i, j; - - (void) source_length; - (void) source_scale; - (void) dest_scale; - - source_step *= 2; - j = 0; - - for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step) - dest[i] = source[j]; + const unsigned char *source, + int source_step, + unsigned int source_scale, + unsigned int source_length, + unsigned char *dest, + int dest_step, + unsigned int dest_scale, + unsigned int dest_length +) { + unsigned int i, j; + + (void) source_length; + (void) source_scale; + (void) dest_scale; + + source_step *= 2; + j = 0; + + for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step) + dest[i] = source[j]; } /**************************************************************************** * @@ -230,45 +224,42 @@ void scale1d_2t1_ps static void scale1d_c ( - const unsigned char *source, - int source_step, - unsigned int source_scale, - unsigned int source_length, - unsigned char *dest, - int dest_step, - unsigned int dest_scale, - unsigned int dest_length -) -{ - unsigned int i; - unsigned int round_value = dest_scale / 2; - unsigned int left_modifier = dest_scale; - unsigned int right_modifier = 0; - unsigned char left_pixel = *source; - unsigned char right_pixel = *(source + source_step); - - (void) source_length; - - /* These asserts are needed if there are boundary issues... */ - /*assert ( dest_scale > source_scale );*/ - /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/ - - for (i = 0; i < dest_length * dest_step; i += dest_step) - { - dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale); - - right_modifier += source_scale; - - while (right_modifier > dest_scale) - { - right_modifier -= dest_scale; - source += source_step; - left_pixel = *source; - right_pixel = *(source + source_step); - } - - left_modifier = dest_scale - right_modifier; + const unsigned char *source, + int source_step, + unsigned int source_scale, + unsigned int source_length, + unsigned char *dest, + int dest_step, + unsigned int dest_scale, + unsigned int dest_length +) { + unsigned int i; + unsigned int round_value = dest_scale / 2; + unsigned int left_modifier = dest_scale; + unsigned int right_modifier = 0; + unsigned char left_pixel = *source; + unsigned char right_pixel = *(source + source_step); + + (void) source_length; + + /* These asserts are needed if there are boundary issues... */ + /*assert ( dest_scale > source_scale );*/ + /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/ + + for (i = 0; i < dest_length * dest_step; i += dest_step) { + dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale); + + right_modifier += source_scale; + + while (right_modifier > dest_scale) { + right_modifier -= dest_scale; + source += source_step; + left_pixel = *source; + right_pixel = *(source + source_step); } + + left_modifier = dest_scale - right_modifier; + } } /**************************************************************************** @@ -304,246 +295,221 @@ void scale1d_c static void Scale2D ( - /*const*/ - unsigned char *source, - int source_pitch, - unsigned int source_width, - unsigned int source_height, - unsigned char *dest, - int dest_pitch, - unsigned int dest_width, - unsigned int dest_height, - unsigned char *temp_area, - unsigned char temp_area_height, - unsigned int hscale, - unsigned int hratio, - unsigned int vscale, - unsigned int vratio, - unsigned int interlaced -) -{ - /*unsigned*/ - int i, j, k; - int bands; - int dest_band_height; - int source_band_height; - - typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length, - unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length); - - Scale1D Scale1Dv = scale1d_c; - Scale1D Scale1Dh = scale1d_c; - - void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; - void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL; - - int ratio_scalable = 1; - int interpolation = 0; - - unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */ - unsigned char *line_src; - - - source_base = (unsigned char *)source; - - if (source_pitch < 0) - { - int offset; - - offset = (source_height - 1); - offset *= source_pitch; - - source_base += offset; - } - - /* find out the ratio for each direction */ - switch (hratio * 10 / hscale) - { + /*const*/ + unsigned char *source, + int source_pitch, + unsigned int source_width, + unsigned int source_height, + unsigned char *dest, + int dest_pitch, + unsigned int dest_width, + unsigned int dest_height, + unsigned char *temp_area, + unsigned char temp_area_height, + unsigned int hscale, + unsigned int hratio, + unsigned int vscale, + unsigned int vratio, + unsigned int interlaced +) { + /*unsigned*/ + int i, j, k; + int bands; + int dest_band_height; + int source_band_height; + + typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length, + unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length); + + Scale1D Scale1Dv = scale1d_c; + Scale1D Scale1Dh = scale1d_c; + + void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; + void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL; + + int ratio_scalable = 1; + int interpolation = 0; + + unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */ + unsigned char *line_src; + + + source_base = (unsigned char *)source; + + if (source_pitch < 0) { + int offset; + + offset = (source_height - 1); + offset *= source_pitch; + + source_base += offset; + } + + /* find out the ratio for each direction */ + switch (hratio * 10 / hscale) { case 8: - /* 4-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_5_4_scale; - break; + /* 4-5 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_5_4_scale; + break; case 6: - /* 3-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_5_3_scale; - break; + /* 3-5 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_5_3_scale; + break; case 5: - /* 1-2 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_2_1_scale; - break; + /* 1-2 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_2_1_scale; + break; default: - /* The ratio is not acceptable now */ - /* throw("The ratio is not acceptable for now!"); */ - ratio_scalable = 0; - break; - } + /* The ratio is not acceptable now */ + /* throw("The ratio is not acceptable for now!"); */ + ratio_scalable = 0; + break; + } - switch (vratio * 10 / vscale) - { + switch (vratio * 10 / vscale) { case 8: - /* 4-5 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_5_4_scale; - source_band_height = 5; - dest_band_height = 4; - break; + /* 4-5 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_5_4_scale; + source_band_height = 5; + dest_band_height = 4; + break; case 6: - /* 3-5 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_5_3_scale; - source_band_height = 5; - dest_band_height = 3; - break; + /* 3-5 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_5_3_scale; + source_band_height = 5; + dest_band_height = 3; + break; case 5: - /* 1-2 Scale in vertical direction */ + /* 1-2 Scale in vertical direction */ - if (interlaced) - { - /* if the content is interlaced, point sampling is used */ - vert_band_scale = vp8_vertical_band_2_1_scale; - } - else - { + if (interlaced) { + /* if the content is interlaced, point sampling is used */ + vert_band_scale = vp8_vertical_band_2_1_scale; + } else { - interpolation = 1; - /* if the content is progressive, interplo */ - vert_band_scale = vp8_vertical_band_2_1_scale_i; + interpolation = 1; + /* if the content is progressive, interplo */ + vert_band_scale = vp8_vertical_band_2_1_scale_i; - } + } - source_band_height = 2; - dest_band_height = 1; - break; + source_band_height = 2; + dest_band_height = 1; + break; default: - /* The ratio is not acceptable now */ - /* throw("The ratio is not acceptable for now!"); */ - ratio_scalable = 0; - break; + /* The ratio is not acceptable now */ + /* throw("The ratio is not acceptable for now!"); */ + ratio_scalable = 0; + break; + } + + if (ratio_scalable) { + if (source_height == dest_height) { + /* for each band of the image */ + for (k = 0; k < (int)dest_height; k++) { + horiz_line_scale(source, source_width, dest, dest_width); + source += source_pitch; + dest += dest_pitch; + } + + return; } - if (ratio_scalable) - { - if (source_height == dest_height) - { - /* for each band of the image */ - for (k = 0; k < (int)dest_height; k++) - { - horiz_line_scale(source, source_width, dest, dest_width); - source += source_pitch; - dest += dest_pitch; - } - - return; - } - - if (interpolation) - { - if (source < source_base) - source = source_base; - - horiz_line_scale(source, source_width, temp_area, dest_width); - } - - for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) - { - /* scale one band horizontally */ - for (i = 0; i < source_band_height; i++) - { - /* Trap case where we could read off the base of the source buffer */ - - line_src = (unsigned char *)source + i * source_pitch; - - if (line_src < source_base) - line_src = source_base; - - horiz_line_scale(line_src, source_width, - temp_area + (i + 1)*dest_pitch, dest_width); - } - - /* Vertical scaling is in place */ - vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width); - - if (interpolation) - vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); - - /* Next band... */ - source += (unsigned long) source_band_height * source_pitch; - dest += (unsigned long) dest_band_height * dest_pitch; - } - - return; + if (interpolation) { + if (source < source_base) + source = source_base; + + horiz_line_scale(source, source_width, temp_area, dest_width); } - if (hscale == 2 && hratio == 1) - Scale1Dh = scale1d_2t1_ps; + for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) { + /* scale one band horizontally */ + for (i = 0; i < source_band_height; i++) { + /* Trap case where we could read off the base of the source buffer */ - if (vscale == 2 && vratio == 1) - { - if (interlaced) - Scale1Dv = scale1d_2t1_ps; - else - Scale1Dv = scale1d_2t1_i; - } + line_src = (unsigned char *)source + i * source_pitch; - if (source_height == dest_height) - { - /* for each band of the image */ - for (k = 0; k < (int)dest_height; k++) - { - Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width); - source += source_pitch; - dest += dest_pitch; - } - - return; - } + if (line_src < source_base) + line_src = source_base; + + horiz_line_scale(line_src, source_width, + temp_area + (i + 1)*dest_pitch, dest_width); + } + + /* Vertical scaling is in place */ + vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width); - if (dest_height > source_height) - { - dest_band_height = temp_area_height - 1; - source_band_height = dest_band_height * source_height / dest_height; + if (interpolation) + vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); + + /* Next band... */ + source += (unsigned long) source_band_height * source_pitch; + dest += (unsigned long) dest_band_height * dest_pitch; } + + return; + } + + if (hscale == 2 && hratio == 1) + Scale1Dh = scale1d_2t1_ps; + + if (vscale == 2 && vratio == 1) { + if (interlaced) + Scale1Dv = scale1d_2t1_ps; else - { - source_band_height = temp_area_height - 1; - dest_band_height = source_band_height * vratio / vscale; + Scale1Dv = scale1d_2t1_i; + } + + if (source_height == dest_height) { + /* for each band of the image */ + for (k = 0; k < (int)dest_height; k++) { + Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width); + source += source_pitch; + dest += dest_pitch; } - /* first row needs to be done so that we can stay one row ahead for vertical zoom */ - Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width); + return; + } + + if (dest_height > source_height) { + dest_band_height = temp_area_height - 1; + source_band_height = dest_band_height * source_height / dest_height; + } else { + source_band_height = temp_area_height - 1; + dest_band_height = source_band_height * vratio / vscale; + } - /* for each band of the image */ - bands = (dest_height + dest_band_height - 1) / dest_band_height; - - for (k = 0; k < bands; k++) - { - /* scale one band horizontally */ - for (i = 1; i < source_band_height + 1; i++) - { - if (k * source_band_height + i < (int) source_height) - { - Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1, - temp_area + i * dest_pitch, 1, hratio, dest_width); - } - else /* Duplicate the last row */ - { - /* copy temp_area row 0 over from last row in the past */ - duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); - } - } - - /* scale one band vertically */ - for (j = 0; j < (int)dest_width; j++) - { - Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1, - &dest[j], dest_pitch, vratio, dest_band_height); - } + /* first row needs to be done so that we can stay one row ahead for vertical zoom */ + Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width); + /* for each band of the image */ + bands = (dest_height + dest_band_height - 1) / dest_band_height; + + for (k = 0; k < bands; k++) { + /* scale one band horizontally */ + for (i = 1; i < source_band_height + 1; i++) { + if (k * source_band_height + i < (int) source_height) { + Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1, + temp_area + i * dest_pitch, 1, hratio, dest_width); + } else { /* Duplicate the last row */ /* copy temp_area row 0 over from last row in the past */ - duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); + duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); + } + } - /* move to the next band */ - source += source_band_height * source_pitch; - dest += dest_band_height * dest_pitch; + /* scale one band vertically */ + for (j = 0; j < (int)dest_width; j++) { + Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1, + &dest[j], dest_pitch, vratio, dest_band_height); } + + /* copy temp_area row 0 over from last row in the past */ + duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); + + /* move to the next band */ + source += source_band_height * source_pitch; + dest += dest_band_height * dest_pitch; + } } /**************************************************************************** @@ -572,57 +538,56 @@ void Scale2D ****************************************************************************/ void vp8_scale_frame ( - YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - unsigned char *temp_area, - unsigned char temp_height, - unsigned int hscale, - unsigned int hratio, - unsigned int vscale, - unsigned int vratio, - unsigned int interlaced -) -{ - int i; - int dw = (hscale - 1 + src->y_width * hratio) / hscale; - int dh = (vscale - 1 + src->y_height * vratio) / vscale; - - /* call our internal scaling routines!! */ - Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height, - (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh, - temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); - - if (dw < (int)dst->y_width) - for (i = 0; i < dh; i++) - duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i*dst->y_stride+dw-2], dst->y_width - dw + 1); - - if (dh < (int)dst->y_height) - for (i = dh - 1; i < (int)dst->y_height; i++) - duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); - - Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, - (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, - temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); - - if (dw / 2 < (int)dst->uv_width) - for (i = 0; i < dst->uv_height; i++) - duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1); - - if (dh / 2 < (int)dst->uv_height) - for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) - duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); - - Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, - (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, - temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); - - if (dw / 2 < (int)dst->uv_width) - for (i = 0; i < dst->uv_height; i++) - duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1); - - if (dh / 2 < (int) dst->uv_height) - for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) - duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); + YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, + unsigned char *temp_area, + unsigned char temp_height, + unsigned int hscale, + unsigned int hratio, + unsigned int vscale, + unsigned int vratio, + unsigned int interlaced +) { + int i; + int dw = (hscale - 1 + src->y_width * hratio) / hscale; + int dh = (vscale - 1 + src->y_height * vratio) / vscale; + + /* call our internal scaling routines!! */ + Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height, + (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh, + temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); + + if (dw < (int)dst->y_width) + for (i = 0; i < dh; i++) + duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1); + + if (dh < (int)dst->y_height) + for (i = dh - 1; i < (int)dst->y_height; i++) + duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); + + Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, + (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, + temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); + + if (dw / 2 < (int)dst->uv_width) + for (i = 0; i < dst->uv_height; i++) + duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); + + if (dh / 2 < (int)dst->uv_height) + for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) + duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); + + Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, + (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, + temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); + + if (dw / 2 < (int)dst->uv_width) + for (i = 0; i < dst->uv_height; i++) + duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); + + if (dh / 2 < (int) dst->uv_height) + for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) + duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); } /**************************************************************************** * @@ -651,183 +616,177 @@ void vp8_scale_frame static int any_ratio_2d_scale ( - SCALE_VARS *si, - const unsigned char *source, - int source_pitch, - unsigned int source_width, - unsigned int source_height, - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width, - unsigned int dest_height -) -{ - unsigned int i, k; - unsigned int src_band_height = 0; - unsigned int dest_band_height = 0; - - /* suggested scale factors */ - int hs = si->HScale; - int hr = si->HRatio; - int vs = si->VScale; - int vr = si->VRatio; - - /* assume the ratios are scalable instead of should be centered */ - int ratio_scalable = 1; - - const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch))); - const unsigned char *line_src; - - void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; - void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; - void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; - - (void) si; - - /* find out the ratio for each direction */ - switch (hr * 30 / hs) - { + SCALE_VARS *si, + const unsigned char *source, + int source_pitch, + unsigned int source_width, + unsigned int source_height, + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width, + unsigned int dest_height +) { + unsigned int i, k; + unsigned int src_band_height = 0; + unsigned int dest_band_height = 0; + + /* suggested scale factors */ + int hs = si->HScale; + int hr = si->HRatio; + int vs = si->VScale; + int vr = si->VRatio; + + /* assume the ratios are scalable instead of should be centered */ + int ratio_scalable = 1; + + const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch))); + const unsigned char *line_src; + + void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; + void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; + void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; + + (void) si; + + /* find out the ratio for each direction */ + switch (hr * 30 / hs) { case 24: - /* 4-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_4_5_scale; - break; + /* 4-5 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_4_5_scale; + break; case 22: - /* 3-4 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_3_4_scale; - break; + /* 3-4 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_3_4_scale; + break; case 20: - /* 4-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_2_3_scale; - break; + /* 4-5 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_2_3_scale; + break; case 18: - /* 3-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_3_5_scale; - break; + /* 3-5 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_3_5_scale; + break; case 15: - /* 1-2 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_1_2_scale; - break; + /* 1-2 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_1_2_scale; + break; case 30: - /* no scale in Width direction */ - horiz_line_scale = horizontal_line_copy; - break; + /* no scale in Width direction */ + horiz_line_scale = horizontal_line_copy; + break; default: - /* The ratio is not acceptable now */ - /* throw("The ratio is not acceptable for now!"); */ - ratio_scalable = 0; - break; - } + /* The ratio is not acceptable now */ + /* throw("The ratio is not acceptable for now!"); */ + ratio_scalable = 0; + break; + } - switch (vr * 30 / vs) - { + switch (vr * 30 / vs) { case 24: - /* 4-5 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_4_5_scale; - last_vert_band_scale = vp8_last_vertical_band_4_5_scale; - src_band_height = 4; - dest_band_height = 5; - break; + /* 4-5 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_4_5_scale; + last_vert_band_scale = vp8_last_vertical_band_4_5_scale; + src_band_height = 4; + dest_band_height = 5; + break; case 22: - /* 3-4 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_3_4_scale; - last_vert_band_scale = vp8_last_vertical_band_3_4_scale; - src_band_height = 3; - dest_band_height = 4; - break; + /* 3-4 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_3_4_scale; + last_vert_band_scale = vp8_last_vertical_band_3_4_scale; + src_band_height = 3; + dest_band_height = 4; + break; case 20: - /* 2-3 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_2_3_scale; - last_vert_band_scale = vp8_last_vertical_band_2_3_scale; - src_band_height = 2; - dest_band_height = 3; - break; + /* 2-3 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_2_3_scale; + last_vert_band_scale = vp8_last_vertical_band_2_3_scale; + src_band_height = 2; + dest_band_height = 3; + break; case 18: - /* 3-5 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_3_5_scale; - last_vert_band_scale = vp8_last_vertical_band_3_5_scale; - src_band_height = 3; - dest_band_height = 5; - break; + /* 3-5 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_3_5_scale; + last_vert_band_scale = vp8_last_vertical_band_3_5_scale; + src_band_height = 3; + dest_band_height = 5; + break; case 15: - /* 1-2 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_1_2_scale; - last_vert_band_scale = vp8_last_vertical_band_1_2_scale; - src_band_height = 1; - dest_band_height = 2; - break; + /* 1-2 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_1_2_scale; + last_vert_band_scale = vp8_last_vertical_band_1_2_scale; + src_band_height = 1; + dest_band_height = 2; + break; case 30: - /* no scale in Width direction */ - vert_band_scale = null_scale; - last_vert_band_scale = null_scale; - src_band_height = 4; - dest_band_height = 4; - break; + /* no scale in Width direction */ + vert_band_scale = null_scale; + last_vert_band_scale = null_scale; + src_band_height = 4; + dest_band_height = 4; + break; default: - /* The ratio is not acceptable now */ - /* throw("The ratio is not acceptable for now!"); */ - ratio_scalable = 0; - break; - } + /* The ratio is not acceptable now */ + /* throw("The ratio is not acceptable for now!"); */ + ratio_scalable = 0; + break; + } - if (ratio_scalable == 0) - return ratio_scalable; + if (ratio_scalable == 0) + return ratio_scalable; - horiz_line_scale(source, source_width, dest, dest_width); + horiz_line_scale(source, source_width, dest, dest_width); - /* except last band */ - for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) - { - /* scale one band horizontally */ - for (i = 1; i < src_band_height; i++) - { - /* Trap case where we could read off the base of the source buffer */ - line_src = source + i * source_pitch; + /* except last band */ + for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) { + /* scale one band horizontally */ + for (i = 1; i < src_band_height; i++) { + /* Trap case where we could read off the base of the source buffer */ + line_src = source + i * source_pitch; - if (line_src < source_base) - line_src = source_base; + if (line_src < source_base) + line_src = source_base; - horiz_line_scale(line_src, source_width, - dest + i * dest_pitch, dest_width); - } + horiz_line_scale(line_src, source_width, + dest + i * dest_pitch, dest_width); + } - /* first line of next band */ - /* Trap case where we could read off the base of the source buffer */ - line_src = source + src_band_height * source_pitch; + /* first line of next band */ + /* Trap case where we could read off the base of the source buffer */ + line_src = source + src_band_height * source_pitch; - if (line_src < source_base) - line_src = source_base; + if (line_src < source_base) + line_src = source_base; - horiz_line_scale(line_src, source_width, - dest + dest_band_height * dest_pitch, - dest_width); + horiz_line_scale(line_src, source_width, + dest + dest_band_height * dest_pitch, + dest_width); - /* Vertical scaling is in place */ - vert_band_scale(dest, dest_pitch, dest_width); + /* Vertical scaling is in place */ + vert_band_scale(dest, dest_pitch, dest_width); - /* Next band... */ - source += src_band_height * source_pitch; - dest += dest_band_height * dest_pitch; - } + /* Next band... */ + source += src_band_height * source_pitch; + dest += dest_band_height * dest_pitch; + } - /* scale one band horizontally */ - for (i = 1; i < src_band_height; i++) - { - /* Trap case where we could read off the base of the source buffer */ - line_src = source + i * source_pitch; + /* scale one band horizontally */ + for (i = 1; i < src_band_height; i++) { + /* Trap case where we could read off the base of the source buffer */ + line_src = source + i * source_pitch; - if (line_src < source_base) - line_src = source_base; + if (line_src < source_base) + line_src = source_base; - horiz_line_scale(line_src, source_width, - dest + i * dest_pitch, - dest_width); - } + horiz_line_scale(line_src, source_width, + dest + i * dest_pitch, + dest_width); + } - /* Vertical scaling is in place */ - last_vert_band_scale(dest, dest_pitch, dest_width); + /* Vertical scaling is in place */ + last_vert_band_scale(dest, dest_pitch, dest_width); - return ratio_scalable; + return ratio_scalable; } /**************************************************************************** @@ -849,70 +808,69 @@ int any_ratio_2d_scale * ****************************************************************************/ static -int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) -{ - int i; - int ew; - int eh; - - /* suggested scale factors */ - int hs = scale_vars->HScale; - int hr = scale_vars->HRatio; - int vs = scale_vars->VScale; - int vr = scale_vars->VRatio; - - int ratio_scalable = 1; - - int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs; - int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs; - int dw = scale_vars->expanded_frame_width; - int dh = scale_vars->expanded_frame_height; - YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config; - YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config; - - if (hr == 3) - ew = (sw + 2) / 3 * 3 * hs / hr; - else - ew = (sw + 7) / 8 * 8 * hs / hr; - - if (vr == 3) - eh = (sh + 2) / 3 * 3 * vs / vr; - else - eh = (sh + 7) / 8 * 8 * vs / vr; - - ratio_scalable = any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->y_buffer, - src_yuv_config->y_stride, sw, sh, - (unsigned char *) dst_yuv_config->y_buffer + YOffset, - dst_yuv_config->y_stride, dw, dh); - - for (i = 0; i < eh; i++) - duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw); - - for (i = dh; i < eh; i++) - duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew); - - if (ratio_scalable == 0) - return ratio_scalable; +int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) { + int i; + int ew; + int eh; + + /* suggested scale factors */ + int hs = scale_vars->HScale; + int hr = scale_vars->HRatio; + int vs = scale_vars->VScale; + int vr = scale_vars->VRatio; + + int ratio_scalable = 1; + + int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs; + int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs; + int dw = scale_vars->expanded_frame_width; + int dh = scale_vars->expanded_frame_height; + YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config; + YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config; + + if (hr == 3) + ew = (sw + 2) / 3 * 3 * hs / hr; + else + ew = (sw + 7) / 8 * 8 * hs / hr; + + if (vr == 3) + eh = (sh + 2) / 3 * 3 * vs / vr; + else + eh = (sh + 7) / 8 * 8 * vs / vr; + + ratio_scalable = any_ratio_2d_scale(scale_vars, + (const unsigned char *)src_yuv_config->y_buffer, + src_yuv_config->y_stride, sw, sh, + (unsigned char *) dst_yuv_config->y_buffer + YOffset, + dst_yuv_config->y_stride, dw, dh); + + for (i = 0; i < eh; i++) + duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw); + + for (i = dh; i < eh; i++) + duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew); + + if (ratio_scalable == 0) + return ratio_scalable; - sw = (sw + 1) >> 1; - sh = (sh + 1) >> 1; - dw = (dw + 1) >> 1; - dh = (dh + 1) >> 1; + sw = (sw + 1) >> 1; + sh = (sh + 1) >> 1; + dw = (dw + 1) >> 1; + dh = (dh + 1) >> 1; - any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->u_buffer, - src_yuv_config->y_stride / 2, sw, sh, - (unsigned char *)dst_yuv_config->u_buffer + UVOffset, - dst_yuv_config->uv_stride, dw, dh); + any_ratio_2d_scale(scale_vars, + (const unsigned char *)src_yuv_config->u_buffer, + src_yuv_config->y_stride / 2, sw, sh, + (unsigned char *)dst_yuv_config->u_buffer + UVOffset, + dst_yuv_config->uv_stride, dw, dh); - any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->v_buffer, - src_yuv_config->y_stride / 2, sw, sh, - (unsigned char *)dst_yuv_config->v_buffer + UVOffset, - dst_yuv_config->uv_stride, dw, dh); + any_ratio_2d_scale(scale_vars, + (const unsigned char *)src_yuv_config->v_buffer, + src_yuv_config->y_stride / 2, sw, sh, + (unsigned char *)dst_yuv_config->v_buffer + UVOffset, + dst_yuv_config->uv_stride, dw, dh); - return ratio_scalable; + return ratio_scalable; } /**************************************************************************** @@ -931,52 +889,48 @@ int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) * ****************************************************************************/ static void -center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) -{ - int i; - int row_offset, col_offset; - unsigned char *src_data_pointer; - unsigned char *dst_data_pointer; - - /* center values */ - row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2; - col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2; - - /* Y's */ - src_data_pointer = src_yuv_config->y_buffer; - dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset; - - for (i = 0; i < src_yuv_config->y_height; i++) - { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width); - dst_data_pointer += dst_yuv_config->y_stride; - src_data_pointer += src_yuv_config->y_stride; - } - - row_offset /= 2; - col_offset /= 2; - - /* U's */ - src_data_pointer = src_yuv_config->u_buffer; - dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; - - for (i = 0; i < src_yuv_config->uv_height; i++) - { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); - dst_data_pointer += dst_yuv_config->uv_stride; - src_data_pointer += src_yuv_config->uv_stride; - } - - /* V's */ - src_data_pointer = src_yuv_config->v_buffer; - dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; - - for (i = 0; i < src_yuv_config->uv_height; i++) - { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); - dst_data_pointer += dst_yuv_config->uv_stride; - src_data_pointer += src_yuv_config->uv_stride; - } +center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) { + int i; + int row_offset, col_offset; + unsigned char *src_data_pointer; + unsigned char *dst_data_pointer; + + /* center values */ + row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2; + col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2; + + /* Y's */ + src_data_pointer = src_yuv_config->y_buffer; + dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset; + + for (i = 0; i < src_yuv_config->y_height; i++) { + duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width); + dst_data_pointer += dst_yuv_config->y_stride; + src_data_pointer += src_yuv_config->y_stride; + } + + row_offset /= 2; + col_offset /= 2; + + /* U's */ + src_data_pointer = src_yuv_config->u_buffer; + dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; + + for (i = 0; i < src_yuv_config->uv_height; i++) { + duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); + dst_data_pointer += dst_yuv_config->uv_stride; + src_data_pointer += src_yuv_config->uv_stride; + } + + /* V's */ + src_data_pointer = src_yuv_config->v_buffer; + dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; + + for (i = 0; i < src_yuv_config->uv_height; i++) { + duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); + dst_data_pointer += dst_yuv_config->uv_stride; + src_data_pointer += src_yuv_config->uv_stride; + } } /**************************************************************************** @@ -999,61 +953,58 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con void vp8_yv12_scale_or_center ( - YV12_BUFFER_CONFIG *src_yuv_config, - YV12_BUFFER_CONFIG *dst_yuv_config, - int expanded_frame_width, - int expanded_frame_height, - int scaling_mode, - int HScale, - int HRatio, - int VScale, - int VRatio -) -{ - /*if ( ppi->post_processing_level ) - update_umvborder ( ppi, frame_buffer );*/ - - - switch (scaling_mode) - { + YV12_BUFFER_CONFIG *src_yuv_config, + YV12_BUFFER_CONFIG *dst_yuv_config, + int expanded_frame_width, + int expanded_frame_height, + int scaling_mode, + int HScale, + int HRatio, + int VScale, + int VRatio +) { + /*if ( ppi->post_processing_level ) + update_umvborder ( ppi, frame_buffer );*/ + + + switch (scaling_mode) { case SCALE_TO_FIT: - case MAINTAIN_ASPECT_RATIO: - { - SCALE_VARS scale_vars; - /* center values */ + case MAINTAIN_ASPECT_RATIO: { + SCALE_VARS scale_vars; + /* center values */ #if 1 - int row = (dst_yuv_config->y_height - expanded_frame_height) / 2; - int col = (dst_yuv_config->y_width - expanded_frame_width) / 2; - /*int YOffset = row * dst_yuv_config->y_width + col; - int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/ - int YOffset = row * dst_yuv_config->y_stride + col; - int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1); + int row = (dst_yuv_config->y_height - expanded_frame_height) / 2; + int col = (dst_yuv_config->y_width - expanded_frame_width) / 2; + /*int YOffset = row * dst_yuv_config->y_width + col; + int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/ + int YOffset = row * dst_yuv_config->y_stride + col; + int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1); #else - int row = (src_yuv_config->y_height - expanded_frame_height) / 2; - int col = (src_yuv_config->y_width - expanded_frame_width) / 2; - int YOffset = row * src_yuv_config->y_width + col; - int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1); + int row = (src_yuv_config->y_height - expanded_frame_height) / 2; + int col = (src_yuv_config->y_width - expanded_frame_width) / 2; + int YOffset = row * src_yuv_config->y_width + col; + int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1); #endif - scale_vars.dst_yuv_config = dst_yuv_config; - scale_vars.src_yuv_config = src_yuv_config; - scale_vars.HScale = HScale; - scale_vars.HRatio = HRatio; - scale_vars.VScale = VScale; - scale_vars.VRatio = VRatio; - scale_vars.expanded_frame_width = expanded_frame_width; - scale_vars.expanded_frame_height = expanded_frame_height; + scale_vars.dst_yuv_config = dst_yuv_config; + scale_vars.src_yuv_config = src_yuv_config; + scale_vars.HScale = HScale; + scale_vars.HRatio = HRatio; + scale_vars.VScale = VScale; + scale_vars.VRatio = VRatio; + scale_vars.expanded_frame_width = expanded_frame_width; + scale_vars.expanded_frame_height = expanded_frame_height; - /* perform center and scale */ - any_ratio_frame_scale(&scale_vars, YOffset, UVOffset); + /* perform center and scale */ + any_ratio_frame_scale(&scale_vars, YOffset, UVOffset); - break; + break; } case CENTER: - center_image(src_yuv_config, dst_yuv_config); - break; + center_image(src_yuv_config, dst_yuv_config); + break; default: - break; - } + break; + } } diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c index eff594e..4cb2a41 100644 --- a/vpx_scale/generic/yv12config.c +++ b/vpx_scale/generic/yv12config.c @@ -20,81 +20,73 @@ * ****************************************************************************/ int -vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) -{ - if (ybf) - { - vpx_free(ybf->buffer_alloc); - - /* buffer_alloc isn't accessed by most functions. Rather y_buffer, - u_buffer and v_buffer point to buffer_alloc and are used. Clear out - all of this so that a freed pointer isn't inadvertently used */ - vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG)); - } - else - { - return -1; - } - - return 0; +vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) { + if (ybf) { + vpx_free(ybf->buffer_alloc); + + /* buffer_alloc isn't accessed by most functions. Rather y_buffer, + u_buffer and v_buffer point to buffer_alloc and are used. Clear out + all of this so that a freed pointer isn't inadvertently used */ + vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); + } else { + return -1; + } + + return 0; } /**************************************************************************** * ****************************************************************************/ int -vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) -{ -/*NOTE:*/ - - if (ybf) - { - int y_stride = ((width + 2 * border) + 31) & ~31; - int yplane_size = (height + 2 * border) * y_stride; - int uv_width = width >> 1; - int uv_height = height >> 1; - /** There is currently a bunch of code which assumes - * uv_stride == y_stride/2, so enforce this here. */ - int uv_stride = y_stride >> 1; - int uvplane_size = (uv_height + border) * uv_stride; - - vp8_yv12_de_alloc_frame_buffer(ybf); - - /** Only support allocating buffers that have a height and width that - * are multiples of 16, and a border that's a multiple of 32. - * The border restriction is required to get 16-byte alignment of the - * start of the chroma rows without intoducing an arbitrary gap - * between planes, which would break the semantics of things like - * vpx_img_set_rect(). */ - if ((width & 0xf) | (height & 0xf) | (border & 0x1f)) - return -3; - - ybf->y_width = width; - ybf->y_height = height; - ybf->y_stride = y_stride; - - ybf->uv_width = uv_width; - ybf->uv_height = uv_height; - ybf->uv_stride = uv_stride; - - ybf->border = border; - ybf->frame_size = yplane_size + 2 * uvplane_size; - - ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size); - - if (ybf->buffer_alloc == NULL) - return -1; - - ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; - ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; - ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; - - ybf->corrupted = 0; /* assume not currupted by errors */ - } - else - { - return -2; - } - - return 0; +vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) { + /*NOTE:*/ + + if (ybf) { + int y_stride = ((width + 2 * border) + 31) & ~31; + int yplane_size = (height + 2 * border) * y_stride; + int uv_width = width >> 1; + int uv_height = height >> 1; + /** There is currently a bunch of code which assumes + * uv_stride == y_stride/2, so enforce this here. */ + int uv_stride = y_stride >> 1; + int uvplane_size = (uv_height + border) * uv_stride; + + vp8_yv12_de_alloc_frame_buffer(ybf); + + /** Only support allocating buffers that have a height and width that + * are multiples of 16, and a border that's a multiple of 32. + * The border restriction is required to get 16-byte alignment of the + * start of the chroma rows without intoducing an arbitrary gap + * between planes, which would break the semantics of things like + * vpx_img_set_rect(). */ + if ((width & 0xf) | (height & 0xf) | (border & 0x1f)) + return -3; + + ybf->y_width = width; + ybf->y_height = height; + ybf->y_stride = y_stride; + + ybf->uv_width = uv_width; + ybf->uv_height = uv_height; + ybf->uv_stride = uv_stride; + + ybf->border = border; + ybf->frame_size = yplane_size + 2 * uvplane_size; + + ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size); + + if (ybf->buffer_alloc == NULL) + return -1; + + ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; + ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; + ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; + + ybf->corrupted = 0; /* assume not currupted by errors */ + } else { + return -2; + } + + return 0; } diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c index 638633b..247078c 100644 --- a/vpx_scale/generic/yv12extend.c +++ b/vpx_scale/generic/yv12extend.c @@ -21,184 +21,174 @@ * ****************************************************************************/ void -vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - plane_width = ybf->y_width; - - /* copy the left and right most columns out */ - src_ptr1 = ybf->y_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - plane_height = ybf->uv_height; - plane_width = ybf->uv_width; - Border /= 2; - - /* copy the left and right most columns out */ - src_ptr1 = ybf->u_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->u_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - /* copy the left and right most columns out */ - src_ptr1 = ybf->v_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->v_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } +vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { + int i; + unsigned char *src_ptr1, *src_ptr2; + unsigned char *dest_ptr1, *dest_ptr2; + + unsigned int Border; + int plane_stride; + int plane_height; + int plane_width; + + /***********/ + /* Y Plane */ + /***********/ + Border = ybf->border; + plane_stride = ybf->y_stride; + plane_height = ybf->y_height; + plane_width = ybf->y_width; + + /* copy the left and right most columns out */ + src_ptr1 = ybf->y_buffer; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /* Now copy the top and bottom source lines into each line of the respective borders */ + src_ptr1 = ybf->y_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)Border; i++) { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + + /***********/ + /* U Plane */ + /***********/ + plane_stride = ybf->uv_stride; + plane_height = ybf->uv_height; + plane_width = ybf->uv_width; + Border /= 2; + + /* copy the left and right most columns out */ + src_ptr1 = ybf->u_buffer; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /* Now copy the top and bottom source lines into each line of the respective borders */ + src_ptr1 = ybf->u_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)(Border); i++) { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /***********/ + /* V Plane */ + /***********/ + + /* copy the left and right most columns out */ + src_ptr1 = ybf->v_buffer; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /* Now copy the top and bottom source lines into each line of the respective borders */ + src_ptr1 = ybf->v_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)(Border); i++) { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } } static void -extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - plane_width = ybf->y_width; - - /* copy the left and right most columns out */ - src_ptr1 = ybf->y_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - plane_stride /= 2; - plane_height /= 2; - plane_width /= 2; - Border /= 2; +extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) { + int i; + unsigned char *src_ptr1, *src_ptr2; + unsigned char *dest_ptr1, *dest_ptr2; + + unsigned int Border; + int plane_stride; + int plane_height; + int plane_width; + + /***********/ + /* Y Plane */ + /***********/ + Border = ybf->border; + plane_stride = ybf->y_stride; + plane_height = ybf->y_height; + plane_width = ybf->y_width; + + /* copy the left and right most columns out */ + src_ptr1 = ybf->y_buffer; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /* Now copy the top and bottom source lines into each line of the respective borders */ + src_ptr1 = ybf->y_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)Border; i++) { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + plane_stride /= 2; + plane_height /= 2; + plane_width /= 2; + Border /= 2; } @@ -221,57 +211,53 @@ extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) * ****************************************************************************/ void -vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) -{ - int row; - unsigned char *source, *dest; - - source = src_ybc->y_buffer; - dest = dst_ybc->y_buffer; - - for (row = 0; row < src_ybc->y_height; row++) - { - vpx_memcpy(dest, source, src_ybc->y_width); - source += src_ybc->y_stride; - dest += dst_ybc->y_stride; - } - - source = src_ybc->u_buffer; - dest = dst_ybc->u_buffer; - - for (row = 0; row < src_ybc->uv_height; row++) - { - vpx_memcpy(dest, source, src_ybc->uv_width); - source += src_ybc->uv_stride; - dest += dst_ybc->uv_stride; - } - - source = src_ybc->v_buffer; - dest = dst_ybc->v_buffer; - - for (row = 0; row < src_ybc->uv_height; row++) - { - vpx_memcpy(dest, source, src_ybc->uv_width); - source += src_ybc->uv_stride; - dest += dst_ybc->uv_stride; - } - - vp8_yv12_extend_frame_borders_c(dst_ybc); +vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, + YV12_BUFFER_CONFIG *dst_ybc) { + int row; + unsigned char *source, *dest; + + source = src_ybc->y_buffer; + dest = dst_ybc->y_buffer; + + for (row = 0; row < src_ybc->y_height; row++) { + vpx_memcpy(dest, source, src_ybc->y_width); + source += src_ybc->y_stride; + dest += dst_ybc->y_stride; + } + + source = src_ybc->u_buffer; + dest = dst_ybc->u_buffer; + + for (row = 0; row < src_ybc->uv_height; row++) { + vpx_memcpy(dest, source, src_ybc->uv_width); + source += src_ybc->uv_stride; + dest += dst_ybc->uv_stride; + } + + source = src_ybc->v_buffer; + dest = dst_ybc->v_buffer; + + for (row = 0; row < src_ybc->uv_height; row++) { + vpx_memcpy(dest, source, src_ybc->uv_width); + source += src_ybc->uv_stride; + dest += dst_ybc->uv_stride; + } + + vp8_yv12_extend_frame_borders_c(dst_ybc); } -void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) -{ - int row; - unsigned char *source, *dest; +void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, + YV12_BUFFER_CONFIG *dst_ybc) { + int row; + unsigned char *source, *dest; - source = src_ybc->y_buffer; - dest = dst_ybc->y_buffer; + source = src_ybc->y_buffer; + dest = dst_ybc->y_buffer; - for (row = 0; row < src_ybc->y_height; row++) - { - vpx_memcpy(dest, source, src_ybc->y_width); - source += src_ybc->y_stride; - dest += dst_ybc->y_stride; - } + for (row = 0; row < src_ybc->y_height; row++) { + vpx_memcpy(dest, source, src_ybc->y_width); + source += src_ybc->y_stride; + dest += dst_ybc->y_stride; + } } diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h index 39de181..c535252 100644 --- a/vpx_scale/include/generic/vpxscale_arbitrary.h +++ b/vpx_scale/include/generic/vpxscale_arbitrary.h @@ -14,33 +14,32 @@ #include "vpx_scale/yv12config.h" -typedef struct -{ - int in_width; - int in_height; - - int out_width; - int out_height; - int max_usable_out_width; - - // numerator for the width and height - int nw; - int nh; - int nh_uv; - - // output to input correspondance array - short *l_w; - short *l_h; - short *l_h_uv; - - // polyphase coefficients - short *c_w; - short *c_h; - short *c_h_uv; - - // buffer for horizontal filtering. - unsigned char *hbuf; - unsigned char *hbuf_uv; +typedef struct { + int in_width; + int in_height; + + int out_width; + int out_height; + int max_usable_out_width; + + // numerator for the width and height + int nw; + int nh; + int nh_uv; + + // output to input correspondance array + short *l_w; + short *l_h; + short *l_h_uv; + + // polyphase coefficients + short *c_w; + short *c_h; + short *c_h_uv; + + // buffer for horizontal filtering. + unsigned char *hbuf; + unsigned char *hbuf_uv; } BICUBIC_SCALER_STRUCT; int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height); diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h index 1476e64..5581385 100644 --- a/vpx_scale/scale_mode.h +++ b/vpx_scale/scale_mode.h @@ -17,12 +17,11 @@ #ifndef SCALE_MODE_H #define SCALE_MODE_H -typedef enum -{ - MAINTAIN_ASPECT_RATIO = 0x0, - SCALE_TO_FIT = 0x1, - CENTER = 0x2, - OTHER = 0x3 +typedef enum { + MAINTAIN_ASPECT_RATIO = 0x0, + SCALE_TO_FIT = 0x1, + CENTER = 0x2, + OTHER = 0x3 } SCALE_MODE; diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h index 8919a24..3c2194d 100644 --- a/vpx_scale/vpxscale.h +++ b/vpx_scale/vpxscale.h @@ -14,29 +14,24 @@ #include "vpx_scale/yv12config.h" -extern void vp8_yv12_scale_or_center -( - YV12_BUFFER_CONFIG *src_yuv_config, - YV12_BUFFER_CONFIG *dst_yuv_config, - int expanded_frame_width, - int expanded_frame_height, - int scaling_mode, - int HScale, - int HRatio, - int VScale, - int VRatio -); -extern void vp8_scale_frame -( - YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - unsigned char *temp_area, - unsigned char temp_height, - unsigned int hscale, - unsigned int hratio, - unsigned int vscale, - unsigned int vratio, - unsigned int interlaced -); +extern void vp8_yv12_scale_or_center(YV12_BUFFER_CONFIG *src_yuv_config, + YV12_BUFFER_CONFIG *dst_yuv_config, + int expanded_frame_width, + int expanded_frame_height, + int scaling_mode, + int HScale, + int HRatio, + int VScale, + int VRatio); + +extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, + unsigned char *temp_area, + unsigned char temp_height, + unsigned int hscale, + unsigned int hratio, + unsigned int vscale, + unsigned int vratio, + unsigned int interlaced); #endif diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c index 3711fe5..2d96cc7 100644 --- a/vpx_scale/win32/scaleopt.c +++ b/vpx_scale/win32/scaleopt.c @@ -61,114 +61,112 @@ __declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, static void horizontal_line_3_5_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; - __asm - { + __asm { - push ebx + push ebx - mov esi, source - mov edi, dest + mov esi, source + mov edi, dest - mov ecx, source_width - lea edx, [esi+ecx-3]; + mov ecx, source_width + lea edx, [esi+ecx-3]; - movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx - movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx + movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx + movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 + movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx + pxor mm7, mm7 // clear mm7 - horiz_line_3_5_loop: + horiz_line_3_5_loop: - mov eax, DWORD PTR [esi] // eax = 00 01 02 03 - mov ebx, eax + mov eax, DWORD PTR [esi] // eax = 00 01 02 03 + mov ebx, eax - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 + and ebx, 0xffff00 // ebx = xx 01 02 xx + mov ecx, eax // ecx = 00 01 02 03 - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx + and eax, 0xffff0000 // eax = xx xx 02 03 + xor ecx, eax // ecx = 00 01 xx xx - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 + shr ebx, 8 // ebx = 01 02 xx xx + or eax, ebx // eax = 01 02 02 03 - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx + shl ebx, 16 // ebx = xx xx 01 02 + movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx + or ebx, ecx // ebx = 00 01 01 02 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // + movd mm0, ebx // mm0 = 00 01 01 02 + pmullw mm1, mm6 // - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx + pmullw mm0, mm5 // - mov [edi], ebx // writeoutput 00 xx xx xx - add esi, 3 + mov [edi], ebx // writeoutput 00 xx xx xx + add esi, 3 - add edi, 5 - paddw mm0, mm1 + add edi, 5 + paddw mm0, mm1 - paddw mm0, mm4 - psrlw mm0, 8 + paddw mm0, mm4 + psrlw mm0, 8 - cmp esi, edx - packuswb mm0, mm7 + cmp esi, edx + packuswb mm0, mm7 - movd DWORD Ptr [edi-4], mm0 - jl horiz_line_3_5_loop + movd DWORD Ptr [edi-4], mm0 + jl horiz_line_3_5_loop -//Exit: - mov eax, DWORD PTR [esi] // eax = 00 01 02 03 - mov ebx, eax +// Exit: + mov eax, DWORD PTR [esi] // eax = 00 01 02 03 + mov ebx, eax - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 + and ebx, 0xffff00 // ebx = xx 01 02 xx + mov ecx, eax // ecx = 00 01 02 03 - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx + and eax, 0xffff0000 // eax = xx xx 02 03 + xor ecx, eax // ecx = 00 01 xx xx - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 + shr ebx, 8 // ebx = 01 02 xx xx + or eax, ebx // eax = 01 02 02 03 - shl eax, 8 // eax = xx 01 02 02 - and eax, 0xffff0000 // eax = xx xx 02 02 + shl eax, 8 // eax = xx 01 02 02 + and eax, 0xffff0000 // eax = xx xx 02 02 - or eax, ebx // eax = 01 02 02 02 + or eax, ebx // eax = 01 02 02 02 - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx + shl ebx, 16 // ebx = xx xx 01 02 + movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx + or ebx, ecx // ebx = 00 01 01 02 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // + movd mm0, ebx // mm0 = 00 01 01 02 + pmullw mm1, mm6 // - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx + pmullw mm0, mm5 // - mov [edi], ebx // writeoutput 00 xx xx xx - paddw mm0, mm1 + mov [edi], ebx // writeoutput 00 xx xx xx + paddw mm0, mm1 - paddw mm0, mm4 - psrlw mm0, 8 + paddw mm0, mm4 + psrlw mm0, 8 - packuswb mm0, mm7 - movd DWORD Ptr [edi+1], mm0 + packuswb mm0, mm7 + movd DWORD Ptr [edi+1], mm0 - pop ebx + pop ebx - } + } } @@ -194,120 +192,118 @@ void horizontal_line_3_5_scale_mmx static void horizontal_line_4_5_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void)dest_width; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void)dest_width; - __asm - { + __asm { - mov esi, source - mov edi, dest + mov esi, source + mov edi, dest - mov ecx, source_width - lea edx, [esi+ecx-8]; + mov ecx, source_width + lea edx, [esi+ecx-8]; - movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx - movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx + movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx + movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 + movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx + pxor mm7, mm7 // clear mm7 - horiz_line_4_5_loop: + horiz_line_4_5_loop: - movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 + movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 + movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 + movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 + movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 - movd DWORD PTR [edi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx + movd DWORD PTR [edi], mm0 // write output 00 xx xx xx + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx + pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx + pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 + punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 + movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx + pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 + punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx + pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 - paddw mm0, mm1 // added round values - paddw mm0, mm4 + paddw mm0, mm1 // added round values + paddw mm0, mm4 - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 + psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx + packuswb mm0, mm7 - movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 - add edi, 10 + movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 + add edi, 10 - add esi, 8 - paddw mm2, mm3 // + add esi, 8 + paddw mm2, mm3 // - paddw mm2, mm4 // added round values - cmp esi, edx + paddw mm2, mm4 // added round values + cmp esi, edx - psrlw mm2, 8 - packuswb mm2, mm7 + psrlw mm2, 8 + packuswb mm2, mm7 - movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 - jl horiz_line_4_5_loop + movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 + jl horiz_line_4_5_loop -//Exit: - movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 +// Exit: + movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 + movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 + movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 + psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 - movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 - pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 + movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 + pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 - psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 - por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 + psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 + por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 - movq mm3, mm1 + movq mm3, mm1 - movd DWORD PTR [edi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx + movd DWORD PTR [edi], mm0 // write output 00 xx xx xx + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx + pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx + pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 + punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 + movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx + pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 + punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx + pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 - paddw mm0, mm1 // added round values - paddw mm0, mm4 + paddw mm0, mm1 // added round values + paddw mm0, mm4 - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx + psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx + packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx - movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 - paddw mm2, mm3 // + movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 + paddw mm2, mm3 // - paddw mm2, mm4 // added round values - psrlw mm2, 8 + paddw mm2, mm4 // added round values + psrlw mm2, 8 - packuswb mm2, mm7 - movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 + packuswb mm2, mm7 + movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 - } + } } /**************************************************************************** @@ -332,167 +328,165 @@ void horizontal_line_4_5_scale_mmx static void vertical_band_4_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - vs_4_5_loop: + vs_4_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word + movq mm5, one_fifth + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 1/5 + pmullw mm0, mm5 // a * 1/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan + pmullw mm2, mm5 // a * 1/5 + movq mm6, four_fifths // constan - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 4/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 + pmullw mm5, mm6 // b * 4/5 + paddw mm0, mm4 // a * 1/5 + b * 4/5 - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 1/5 + b * 4/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm5, two_fifths - movq mm2, mm0 // make a copy + movq mm5, two_fifths + movq mm2, mm0 // make a copy - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths + pmullw mm1, mm5 // b * 2/5 + movq mm6, three_fifths - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 + punpcklbw mm0, mm7 // unpack low to word + pmullw mm3, mm5 // b * 2/5 - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word + movq mm4, mm0 // make copy of c + punpckhbw mm2, mm7 // unpack high to word - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 + pmullw mm4, mm6 // c * 3/5 + movq mm5, mm2 - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 + pmullw mm5, mm6 // c * 3/5 + paddw mm1, mm4 // b * 2/5 + c * 3/5 - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 + paddw mm3, mm5 // b * 2/5 + c * 3/5 + paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - psrlw mm1, 8 + paddw mm3, round_values // + 128 + psrlw mm1, 8 - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] + psrlw mm3, 8 + packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - movq mm1, [edi] // mm1=Src[3]; + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + movq mm1, [edi] // mm1=Src[3]; - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking + // mm0, mm2 --- Src[2] + // mm1 --- Src[3] + // mm6 --- 3/5 + // mm7 for unpacking - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 + pmullw mm0, mm6 // c * 3/5 + movq mm5, two_fifths // mm5 = 2/5 - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 + movq mm3, mm1 // make a copy + pmullw mm2, mm6 // c * 3/5 - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low + movq mm4, mm1 // make a copy - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 + punpckhbw mm3, mm7 // unpack high + pmullw mm4, mm5 // d * 2/5 - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 + movq mm6, mm3 // make a copy + pmullw mm6, mm5 // d * 2/5 - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 + paddw mm0, mm4 // c * 3/5 + d * 2/5 + paddw mm2, mm6 // c * 3/5 + d * 2/5 - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 + paddw mm0, round_values // + 128 + paddw mm2, round_values // + 128 - psrlw mm0, 8 - psrlw mm2, 8 + psrlw mm0, 8 + psrlw mm2, 8 - packuswb mm0, mm2 // des[3] - movq QWORD ptr [edi], mm0 // write des[3] + packuswb mm0, mm2 // des[3] + movq QWORD ptr [edi], mm0 // write des[3] - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking + // mm1, mm3 --- Src[3] + // mm7 -- cleared for unpacking - movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group + movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group - movq mm5, four_fifths // mm5 = 4/5 - pmullw mm1, mm5 // d * 4/5 + movq mm5, four_fifths // mm5 = 4/5 + pmullw mm1, mm5 // d * 4/5 - movq mm6, one_fifth // mm6 = 1/5 - movq mm2, mm0 // make a copy + movq mm6, one_fifth // mm6 = 1/5 + movq mm2, mm0 // make a copy - pmullw mm3, mm5 // d * 4/5 - punpcklbw mm0, mm7 // unpack low + pmullw mm3, mm5 // d * 4/5 + punpcklbw mm0, mm7 // unpack low - pmullw mm0, mm6 // an * 1/5 - punpckhbw mm2, mm7 // unpack high + pmullw mm0, mm6 // an * 1/5 + punpckhbw mm2, mm7 // unpack high - paddw mm1, mm0 // d * 4/5 + an * 1/5 - pmullw mm2, mm6 // an * 1/5 + paddw mm1, mm0 // d * 4/5 + an * 1/5 + pmullw mm2, mm6 // an * 1/5 - paddw mm3, mm2 // d * 4/5 + an * 1/5 - paddw mm1, round_values // + 128 + paddw mm3, mm2 // d * 4/5 + an * 1/5 + paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - psrlw mm1, 8 + paddw mm3, round_values // + 128 + psrlw mm1, 8 - psrlw mm3, 8 - packuswb mm1, mm3 // des[4] + psrlw mm3, 8 + packuswb mm1, mm3 // des[4] - movq QWORD ptr [edi+ecx], mm1 // write des[4] + movq QWORD ptr [edi+ecx], mm1 // write des[4] - add edi, 8 - add esi, 8 + add edi, 8 + add esi, 8 - sub edx, 8 - jg vs_4_5_loop - } + sub edx, 8 + jg vs_4_5_loop + } } /**************************************************************************** @@ -517,139 +511,137 @@ void vertical_band_4_5_scale_mmx static void last_vertical_band_4_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - last_vs_4_5_loop: + last_vs_4_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word + movq mm5, one_fifth + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 1/5 + pmullw mm0, mm5 // a * 1/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan + pmullw mm2, mm5 // a * 1/5 + movq mm6, four_fifths // constan - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 4/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 + pmullw mm5, mm6 // b * 4/5 + paddw mm0, mm4 // a * 1/5 + b * 4/5 - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 1/5 + b * 4/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm5, two_fifths - movq mm2, mm0 // make a copy + movq mm5, two_fifths + movq mm2, mm0 // make a copy - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths + pmullw mm1, mm5 // b * 2/5 + movq mm6, three_fifths - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 + punpcklbw mm0, mm7 // unpack low to word + pmullw mm3, mm5 // b * 2/5 - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word + movq mm4, mm0 // make copy of c + punpckhbw mm2, mm7 // unpack high to word - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 + pmullw mm4, mm6 // c * 3/5 + movq mm5, mm2 - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 + pmullw mm5, mm6 // c * 3/5 + paddw mm1, mm4 // b * 2/5 + c * 3/5 - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 + paddw mm3, mm5 // b * 2/5 + c * 3/5 + paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - psrlw mm1, 8 + paddw mm3, round_values // + 128 + psrlw mm1, 8 - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] + psrlw mm3, 8 + packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - movq mm1, [edi] // mm1=Src[3]; + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + movq mm1, [edi] // mm1=Src[3]; - movq QWORD ptr [edi+ecx], mm1 // write des[4]; + movq QWORD ptr [edi+ecx], mm1 // write des[4]; - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking + // mm0, mm2 --- Src[2] + // mm1 --- Src[3] + // mm6 --- 3/5 + // mm7 for unpacking - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 + pmullw mm0, mm6 // c * 3/5 + movq mm5, two_fifths // mm5 = 2/5 - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 + movq mm3, mm1 // make a copy + pmullw mm2, mm6 // c * 3/5 - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low + movq mm4, mm1 // make a copy - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 + punpckhbw mm3, mm7 // unpack high + pmullw mm4, mm5 // d * 2/5 - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 + movq mm6, mm3 // make a copy + pmullw mm6, mm5 // d * 2/5 - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 + paddw mm0, mm4 // c * 3/5 + d * 2/5 + paddw mm2, mm6 // c * 3/5 + d * 2/5 - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 + paddw mm0, round_values // + 128 + paddw mm2, round_values // + 128 - psrlw mm0, 8 - psrlw mm2, 8 + psrlw mm0, 8 + psrlw mm2, 8 - packuswb mm0, mm2 // des[3] - movq QWORD ptr [edi], mm0 // write des[3] + packuswb mm0, mm2 // des[3] + movq QWORD ptr [edi], mm0 // write des[3] - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking - add edi, 8 - add esi, 8 + // mm1, mm3 --- Src[3] + // mm7 -- cleared for unpacking + add edi, 8 + add esi, 8 - sub edx, 8 - jg last_vs_4_5_loop - } + sub edx, 8 + jg last_vs_4_5_loop + } } /**************************************************************************** @@ -674,153 +666,151 @@ void last_vertical_band_4_5_scale_mmx static void vertical_band_3_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - vs_3_5_loop: + vs_3_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word + movq mm5, two_fifths // mm5 = 2/5 + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 2/5 + pmullw mm0, mm5 // a * 2/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 + pmullw mm2, mm5 // a * 2/5 + movq mm6, three_fifths // mm6 = 3/5 - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 3/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 + pmullw mm5, mm6 // b * 3/5 + paddw mm0, mm4 // a * 2/5 + b * 3/5 - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 2/5 + b * 3/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low + movq mm4, mm1 // b low + pmullw mm1, four_fifths // b * 4/5 low - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high + movq mm5, mm3 // b high + pmullw mm3, four_fifths // b * 4/5 high - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 + movq mm2, mm0 // c + pmullw mm4, one_fifth // b * 1/5 - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 + punpcklbw mm0, mm7 // c low + pmullw mm5, one_fifth // b * 1/5 - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high + movq mm6, mm0 // make copy of c low + punpckhbw mm2, mm7 // c high - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, one_fifth // c * 1/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low + pmullw mm7, one_fifth // c * 1/5 high + paddw mm1, mm6 // b * 4/5 + c * 1/5 low - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low + paddw mm3, mm7 // b * 4/5 + c * 1/5 high + movq mm6, mm0 // make copy of c low - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, four_fifths // c * 4/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, four_fifths // c * 4/5 high + pmullw mm7, four_fifths // c * 4/5 high - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high + paddw mm4, mm6 // b * 1/5 + c * 4/5 low + paddw mm5, mm7 // b * 1/5 + c * 4/5 high - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 + paddw mm1, round_values // + 128 + paddw mm3, round_values // + 128 - psrlw mm1, 8 - psrlw mm3, 8 + psrlw mm1, 8 + psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + packuswb mm1, mm3 // des[2] + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 + paddw mm4, round_values // + 128 + paddw mm5, round_values // + 128 - psrlw mm4, 8 - psrlw mm5, 8 + psrlw mm4, 8 + psrlw mm5, 8 - packuswb mm4, mm5 // des[3] - movq QWORD ptr [edi], mm4 // write des[3] + packuswb mm4, mm5 // des[3] + movq QWORD ptr [edi], mm4 // write des[3] - // mm0, mm2 --- Src[3] + // mm0, mm2 --- Src[3] - pxor mm7, mm7 // clear mm7 for unpacking - movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group + pxor mm7, mm7 // clear mm7 for unpacking + movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group - movq mm5, three_fifths // mm5 = 3/5 - pmullw mm0, mm5 // d * 3/5 + movq mm5, three_fifths // mm5 = 3/5 + pmullw mm0, mm5 // d * 3/5 - movq mm6, two_fifths // mm6 = 2/5 - movq mm3, mm1 // make a copy + movq mm6, two_fifths // mm6 = 2/5 + movq mm3, mm1 // make a copy - pmullw mm2, mm5 // d * 3/5 - punpcklbw mm1, mm7 // unpack low + pmullw mm2, mm5 // d * 3/5 + punpcklbw mm1, mm7 // unpack low - pmullw mm1, mm6 // an * 2/5 - punpckhbw mm3, mm7 // unpack high + pmullw mm1, mm6 // an * 2/5 + punpckhbw mm3, mm7 // unpack high - paddw mm0, mm1 // d * 3/5 + an * 2/5 - pmullw mm3, mm6 // an * 2/5 + paddw mm0, mm1 // d * 3/5 + an * 2/5 + pmullw mm3, mm6 // an * 2/5 - paddw mm2, mm3 // d * 3/5 + an * 2/5 - paddw mm0, round_values // + 128 + paddw mm2, mm3 // d * 3/5 + an * 2/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des[4] + psrlw mm2, 8 + packuswb mm0, mm2 // des[4] - movq QWORD ptr [edi+ecx], mm0 // write des[4] + movq QWORD ptr [edi+ecx], mm0 // write des[4] - add edi, 8 - add esi, 8 + add edi, 8 + add esi, 8 - sub edx, 8 - jg vs_3_5_loop - } + sub edx, 8 + jg vs_3_5_loop + } } /**************************************************************************** @@ -845,129 +835,127 @@ void vertical_band_3_5_scale_mmx static void last_vertical_band_3_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - last_vs_3_5_loop: + last_vs_3_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word + movq mm5, two_fifths // mm5 = 2/5 + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 2/5 + pmullw mm0, mm5 // a * 2/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 + pmullw mm2, mm5 // a * 2/5 + movq mm6, three_fifths // mm6 = 3/5 - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 3/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 + pmullw mm5, mm6 // b * 3/5 + paddw mm0, mm4 // a * 2/5 + b * 3/5 - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 2/5 + b * 3/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low + movq mm4, mm1 // b low + pmullw mm1, four_fifths // b * 4/5 low - movq QWORD ptr [edi+ecx], mm0 // write des[4] + movq QWORD ptr [edi+ecx], mm0 // write des[4] - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high + movq mm5, mm3 // b high + pmullw mm3, four_fifths // b * 4/5 high - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 + movq mm2, mm0 // c + pmullw mm4, one_fifth // b * 1/5 - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 + punpcklbw mm0, mm7 // c low + pmullw mm5, one_fifth // b * 1/5 - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high + movq mm6, mm0 // make copy of c low + punpckhbw mm2, mm7 // c high - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, one_fifth // c * 1/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low + pmullw mm7, one_fifth // c * 1/5 high + paddw mm1, mm6 // b * 4/5 + c * 1/5 low - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low + paddw mm3, mm7 // b * 4/5 + c * 1/5 high + movq mm6, mm0 // make copy of c low - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, four_fifths // c * 4/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, four_fifths // c * 4/5 high + pmullw mm7, four_fifths // c * 4/5 high - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high + paddw mm4, mm6 // b * 1/5 + c * 4/5 low + paddw mm5, mm7 // b * 1/5 + c * 4/5 high - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 + paddw mm1, round_values // + 128 + paddw mm3, round_values // + 128 - psrlw mm1, 8 - psrlw mm3, 8 + psrlw mm1, 8 + psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + packuswb mm1, mm3 // des[2] + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 + paddw mm4, round_values // + 128 + paddw mm5, round_values // + 128 - psrlw mm4, 8 - psrlw mm5, 8 + psrlw mm4, 8 + psrlw mm5, 8 - packuswb mm4, mm5 // des[3] - movq QWORD ptr [edi], mm4 // write des[3] + packuswb mm4, mm5 // des[3] + movq QWORD ptr [edi], mm4 // write des[3] - // mm0, mm2 --- Src[3] + // mm0, mm2 --- Src[3] - add edi, 8 - add esi, 8 + add edi, 8 + add esi, 8 - sub edx, 8 - jg last_vs_3_5_loop - } + sub edx, 8 + jg last_vs_3_5_loop + } } /**************************************************************************** @@ -992,52 +980,50 @@ void last_vertical_band_3_5_scale_mmx static void vertical_band_1_2_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - vs_1_2_loop: + vs_1_2_loop: - movq mm0, [esi] // get Src[0] - movq mm1, [esi + ecx * 2] // get Src[1] + movq mm0, [esi] // get Src[0] + movq mm1, [esi + ecx * 2] // get Src[1] - movq mm2, mm0 // make copy before unpack - movq mm3, mm1 // make copy before unpack + movq mm2, mm0 // make copy before unpack + movq mm3, mm1 // make copy before unpack - punpcklbw mm0, mm7 // low Src[0] - movq mm6, four_ones // mm6= 1, 1, 1, 1 + punpcklbw mm0, mm7 // low Src[0] + movq mm6, four_ones // mm6= 1, 1, 1, 1 - punpcklbw mm1, mm7 // low Src[1] - paddw mm0, mm1 // low (a + b) + punpcklbw mm1, mm7 // low Src[1] + paddw mm0, mm1 // low (a + b) - punpckhbw mm2, mm7 // high Src[0] - paddw mm0, mm6 // low (a + b + 1) + punpckhbw mm2, mm7 // high Src[0] + paddw mm0, mm6 // low (a + b + 1) - punpckhbw mm3, mm7 - paddw mm2, mm3 // high (a + b ) + punpckhbw mm3, mm7 + paddw mm2, mm3 // high (a + b ) - psraw mm0, 1 // low (a + b +1 )/2 - paddw mm2, mm6 // high (a + b + 1) + psraw mm0, 1 // low (a + b +1 )/2 + paddw mm2, mm6 // high (a + b + 1) - psraw mm2, 1 // high (a + b + 1)/2 - packuswb mm0, mm2 // pack results + psraw mm2, 1 // high (a + b + 1)/2 + packuswb mm0, mm2 // pack results - movq [esi+ecx], mm0 // write out eight bytes - add esi, 8 + movq [esi+ecx], mm0 // write out eight bytes + add esi, 8 - sub edx, 8 - jg vs_1_2_loop - } + sub edx, 8 + jg vs_1_2_loop + } } @@ -1063,28 +1049,26 @@ void vertical_band_1_2_scale_mmx static void last_vertical_band_1_2_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - mov edx, dest_width // Loop counter + mov edx, dest_width // Loop counter - last_vs_1_2_loop: + last_vs_1_2_loop: - movq mm0, [esi] // get Src[0] - movq [esi+ecx], mm0 // write out eight bytes + movq mm0, [esi] // get Src[0] + movq [esi+ecx], mm0 // write out eight bytes - add esi, 8 - sub edx, 8 + add esi, 8 + sub edx, 8 - jg last_vs_1_2_loop - } + jg last_vs_1_2_loop + } } /**************************************************************************** @@ -1108,106 +1092,104 @@ void last_vertical_band_1_2_scale_mmx static void horizontal_line_1_2_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; - __asm - { - mov esi, source - mov edi, dest + __asm { + mov esi, source + mov edi, dest - pxor mm7, mm7 - movq mm6, four_ones + pxor mm7, mm7 + movq mm6, four_ones - mov ecx, source_width + mov ecx, source_width - hs_1_2_loop: + hs_1_2_loop: - movq mm0, [esi] - movq mm1, [esi+1] + movq mm0, [esi] + movq mm1, [esi+1] - movq mm2, mm0 - movq mm3, mm1 + movq mm2, mm0 + movq mm3, mm1 - movq mm4, mm0 - punpcklbw mm0, mm7 + movq mm4, mm0 + punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - paddw mm0, mm1 + punpcklbw mm1, mm7 + paddw mm0, mm1 - paddw mm0, mm6 - punpckhbw mm2, mm7 + paddw mm0, mm6 + punpckhbw mm2, mm7 - punpckhbw mm3, mm7 - paddw mm2, mm3 + punpckhbw mm3, mm7 + paddw mm2, mm3 - paddw mm2, mm6 - psraw mm0, 1 + paddw mm2, mm6 + psraw mm0, 1 - psraw mm2, 1 - packuswb mm0, mm2 + psraw mm2, 1 + packuswb mm0, mm2 - movq mm2, mm4 - punpcklbw mm2, mm0 + movq mm2, mm4 + punpcklbw mm2, mm0 - movq [edi], mm2 - punpckhbw mm4, mm0 + movq [edi], mm2 + punpckhbw mm4, mm0 - movq [edi+8], mm4 - add esi, 8 + movq [edi+8], mm4 + add esi, 8 - add edi, 16 - sub ecx, 8 + add edi, 16 + sub ecx, 8 - cmp ecx, 8 - jg hs_1_2_loop + cmp ecx, 8 + jg hs_1_2_loop // last eight pixel - movq mm0, [esi] - movq mm1, mm0 + movq mm0, [esi] + movq mm1, mm0 - movq mm2, mm0 - movq mm3, mm1 + movq mm2, mm0 + movq mm3, mm1 - psrlq mm1, 8 - psrlq mm3, 56 + psrlq mm1, 8 + psrlq mm3, 56 - psllq mm3, 56 - por mm1, mm3 + psllq mm3, 56 + por mm1, mm3 - movq mm3, mm1 - movq mm4, mm0 + movq mm3, mm1 + movq mm4, mm0 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 + punpcklbw mm0, mm7 + punpcklbw mm1, mm7 - paddw mm0, mm1 - paddw mm0, mm6 + paddw mm0, mm1 + paddw mm0, mm6 - punpckhbw mm2, mm7 - punpckhbw mm3, mm7 + punpckhbw mm2, mm7 + punpckhbw mm3, mm7 - paddw mm2, mm3 - paddw mm2, mm6 + paddw mm2, mm3 + paddw mm2, mm6 - psraw mm0, 1 - psraw mm2, 1 + psraw mm0, 1 + psraw mm2, 1 - packuswb mm0, mm2 - movq mm2, mm4 + packuswb mm0, mm2 + movq mm2, mm4 - punpcklbw mm2, mm0 - movq [edi], mm2 + punpcklbw mm2, mm0 + movq [edi], mm2 - punpckhbw mm4, mm0 - movq [edi+8], mm4 - } + punpckhbw mm4, mm0 + movq [edi+8], mm4 + } } @@ -1240,86 +1222,84 @@ __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, static void horizontal_line_5_4_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - /* - unsigned i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - const unsigned char *src = source; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + /* + unsigned i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + const unsigned char *src = source; - (void) dest_width; + (void) dest_width; - for ( i=0; i>8); - des[2] = ((c*128 + d*128 + 128)>>8); - des[3] = ((d* 64 + e*192 + 128)>>8); + des[0] = a; + des[1] = ((b*192 + c* 64 + 128)>>8); + des[2] = ((c*128 + d*128 + 128)>>8); + des[3] = ((d* 64 + e*192 + 128)>>8); - src += 5; - des += 4; - } - */ - (void) dest_width; + src += 5; + des += 4; + } + */ + (void) dest_width; - __asm - { + __asm { - mov esi, source ; - mov edi, dest ; + mov esi, source; + mov edi, dest; - mov ecx, source_width ; - movq mm5, const54_1 ; + mov ecx, source_width; + movq mm5, const54_1; - pxor mm7, mm7 ; - movq mm6, const54_2 ; + pxor mm7, mm7; + movq mm6, const54_2; - movq mm4, round_values ; - lea edx, [esi+ecx] ; - horizontal_line_5_4_loop: + movq mm4, round_values; + lea edx, [esi+ecx]; + horizontal_line_5_4_loop: - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 + movq mm0, QWORD PTR [esi]; + 00 01 02 03 04 05 06 07 + movq mm1, mm0; + 00 01 02 03 04 05 06 07 - psrlq mm0, 8 ; - 01 02 03 04 05 06 07 xx - punpcklbw mm1, mm7 ; - xx 00 xx 01 xx 02 xx 03 + psrlq mm0, 8; + 01 02 03 04 05 06 07 xx + punpcklbw mm1, mm7; + xx 00 xx 01 xx 02 xx 03 - punpcklbw mm0, mm7 ; - xx 01 xx 02 xx 03 xx 04 - pmullw mm1, mm5 + punpcklbw mm0, mm7; + xx 01 xx 02 xx 03 xx 04 + pmullw mm1, mm5 - pmullw mm0, mm6 - add esi, 5 + pmullw mm0, mm6 + add esi, 5 - add edi, 4 - paddw mm1, mm0 + add edi, 4 + paddw mm1, mm0 - paddw mm1, mm4 - psrlw mm1, 8 + paddw mm1, mm4 + psrlw mm1, 8 - cmp esi, edx - packuswb mm1, mm7 + cmp esi, edx + packuswb mm1, mm7 - movd DWORD PTR [edi-4], mm1 + movd DWORD PTR [edi-4], mm1 - jl horizontal_line_5_4_loop + jl horizontal_line_5_4_loop - } + } } __declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 }; @@ -1327,86 +1307,84 @@ __declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128, __declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 }; static -void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ +void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - __asm - { - push ebx + __asm { + push ebx - mov esi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size + mov esi, source // Get the source and destination pointer + mov ecx, src_pitch // Get the pitch size - mov edi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 + mov edi, dest // tow lines below + pxor mm7, mm7 // clear out mm7 - mov edx, dest_pitch // Loop counter - mov ebx, dest_width + mov edx, dest_pitch // Loop counter + mov ebx, dest_width - vs_5_4_loop: + vs_5_4_loop: - movd mm0, DWORD ptr [esi] // src[0]; - movd mm1, DWORD ptr [esi+ecx] // src[1]; + movd mm0, DWORD ptr [esi] // src[0]; + movd mm1, DWORD ptr [esi+ecx] // src[1]; - movd mm2, DWORD ptr [esi+ecx*2] - lea eax, [esi+ecx*2] // + movd mm2, DWORD ptr [esi+ecx*2] + lea eax, [esi+ecx*2] // - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 + punpcklbw mm1, mm7 + punpcklbw mm2, mm7 - movq mm3, mm2 - pmullw mm1, three_fourths + movq mm3, mm2 + pmullw mm1, three_fourths - pmullw mm2, one_fourths - movd mm4, [eax+ecx] + pmullw mm2, one_fourths + movd mm4, [eax+ecx] - pmullw mm3, two_fourths - punpcklbw mm4, mm7 + pmullw mm3, two_fourths + punpcklbw mm4, mm7 - movq mm5, mm4 - pmullw mm4, two_fourths + movq mm5, mm4 + pmullw mm4, two_fourths - paddw mm1, mm2 - movd mm6, [eax+ecx*2] + paddw mm1, mm2 + movd mm6, [eax+ecx*2] - pmullw mm5, one_fourths - paddw mm1, round_values; + pmullw mm5, one_fourths + paddw mm1, round_values; - paddw mm3, mm4 - psrlw mm1, 8 + paddw mm3, mm4 + psrlw mm1, 8 - punpcklbw mm6, mm7 - paddw mm3, round_values + punpcklbw mm6, mm7 + paddw mm3, round_values - pmullw mm6, three_fourths - psrlw mm3, 8 + pmullw mm6, three_fourths + psrlw mm3, 8 - packuswb mm1, mm7 - packuswb mm3, mm7 + packuswb mm1, mm7 + packuswb mm3, mm7 - movd DWORD PTR [edi], mm0 - movd DWORD PTR [edi+edx], mm1 + movd DWORD PTR [edi], mm0 + movd DWORD PTR [edi+edx], mm1 - paddw mm5, mm6 - movd DWORD PTR [edi+edx*2], mm3 + paddw mm5, mm6 + movd DWORD PTR [edi+edx*2], mm3 - lea eax, [edi+edx*2] - paddw mm5, round_values + lea eax, [edi+edx*2] + paddw mm5, round_values - psrlw mm5, 8 - add edi, 4 + psrlw mm5, 8 + add edi, 4 - packuswb mm5, mm7 - movd DWORD PTR [eax+edx], mm5 + packuswb mm5, mm7 + movd DWORD PTR [eax+edx], mm5 - add esi, 4 - sub ebx, 4 + add esi, 4 + sub ebx, 4 - jg vs_5_4_loop + jg vs_5_4_loop - pop ebx - } + pop ebx + } } @@ -1417,96 +1395,94 @@ __declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85, static void horizontal_line_5_3_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { - (void) dest_width; - __asm - { + (void) dest_width; + __asm { - mov esi, source ; - mov edi, dest ; + mov esi, source; + mov edi, dest; - mov ecx, source_width ; - movq mm5, const53_1 ; + mov ecx, source_width; + movq mm5, const53_1; - pxor mm7, mm7 ; - movq mm6, const53_2 ; + pxor mm7, mm7; + movq mm6, const53_2; - movq mm4, round_values ; - lea edx, [esi+ecx-5] ; - horizontal_line_5_3_loop: + movq mm4, round_values; + lea edx, [esi+ecx-5]; + horizontal_line_5_3_loop: - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 + movq mm0, QWORD PTR [esi]; + 00 01 02 03 04 05 06 07 + movq mm1, mm0; + 00 01 02 03 04 05 06 07 - psllw mm0, 8 ; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8 ; - 01 xx 03 xx 05 xx 07 xx + psllw mm0, 8; + xx 00 xx 02 xx 04 xx 06 + psrlw mm1, 8; + 01 xx 03 xx 05 xx 07 xx - psrlw mm0, 8 ; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16 ; - xx xx 01 xx 03 xx 05 xx + psrlw mm0, 8; + 00 xx 02 xx 04 xx 06 xx + psllq mm1, 16; + xx xx 01 xx 03 xx 05 xx - pmullw mm0, mm6 + pmullw mm0, mm6 - pmullw mm1, mm5 - add esi, 5 + pmullw mm1, mm5 + add esi, 5 - add edi, 3 - paddw mm1, mm0 + add edi, 3 + paddw mm1, mm0 - paddw mm1, mm4 - psrlw mm1, 8 + paddw mm1, mm4 + psrlw mm1, 8 - cmp esi, edx - packuswb mm1, mm7 + cmp esi, edx + packuswb mm1, mm7 - movd DWORD PTR [edi-3], mm1 - jl horizontal_line_5_3_loop + movd DWORD PTR [edi-3], mm1 + jl horizontal_line_5_3_loop -//exit condition - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 +// exit condition + movq mm0, QWORD PTR [esi]; + 00 01 02 03 04 05 06 07 + movq mm1, mm0; + 00 01 02 03 04 05 06 07 - psllw mm0, 8 ; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8 ; - 01 xx 03 xx 05 xx 07 xx + psllw mm0, 8; + xx 00 xx 02 xx 04 xx 06 + psrlw mm1, 8; + 01 xx 03 xx 05 xx 07 xx - psrlw mm0, 8 ; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16 ; - xx xx 01 xx 03 xx 05 xx + psrlw mm0, 8; + 00 xx 02 xx 04 xx 06 xx + psllq mm1, 16; + xx xx 01 xx 03 xx 05 xx - pmullw mm0, mm6 + pmullw mm0, mm6 - pmullw mm1, mm5 - paddw mm1, mm0 + pmullw mm1, mm5 + paddw mm1, mm0 - paddw mm1, mm4 - psrlw mm1, 8 + paddw mm1, mm4 + psrlw mm1, 8 - packuswb mm1, mm7 - movd eax, mm1 + packuswb mm1, mm7 + movd eax, mm1 - mov edx, eax - shr edx, 16 + mov edx, eax + shr edx, 16 - mov WORD PTR[edi], ax - mov BYTE PTR[edi+2], dl + mov WORD PTR[edi], ax + mov BYTE PTR[edi+2], dl - } + } } @@ -1514,75 +1490,73 @@ __declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85 __declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 }; static -void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ +void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - __asm - { - push ebx + __asm { + push ebx - mov esi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size + mov esi, source // Get the source and destination pointer + mov ecx, src_pitch // Get the pitch size - mov edi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 + mov edi, dest // tow lines below + pxor mm7, mm7 // clear out mm7 - mov edx, dest_pitch // Loop counter - movq mm5, one_thirds + mov edx, dest_pitch // Loop counter + movq mm5, one_thirds - movq mm6, two_thirds - mov ebx, dest_width; + movq mm6, two_thirds + mov ebx, dest_width; - vs_5_3_loop: + vs_5_3_loop: - movd mm0, DWORD ptr [esi] // src[0]; - movd mm1, DWORD ptr [esi+ecx] // src[1]; + movd mm0, DWORD ptr [esi] // src[0]; + movd mm1, DWORD ptr [esi+ecx] // src[1]; - movd mm2, DWORD ptr [esi+ecx*2] - lea eax, [esi+ecx*2] // + movd mm2, DWORD ptr [esi+ecx*2] + lea eax, [esi+ecx*2] // - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 + punpcklbw mm1, mm7 + punpcklbw mm2, mm7 - pmullw mm1, mm5 - pmullw mm2, mm6 + pmullw mm1, mm5 + pmullw mm2, mm6 - movd mm3, DWORD ptr [eax+ecx] - movd mm4, DWORD ptr [eax+ecx*2] + movd mm3, DWORD ptr [eax+ecx] + movd mm4, DWORD ptr [eax+ecx*2] - punpcklbw mm3, mm7 - punpcklbw mm4, mm7 + punpcklbw mm3, mm7 + punpcklbw mm4, mm7 - pmullw mm3, mm6 - pmullw mm4, mm5 + pmullw mm3, mm6 + pmullw mm4, mm5 - movd DWORD PTR [edi], mm0 - paddw mm1, mm2 + movd DWORD PTR [edi], mm0 + paddw mm1, mm2 - paddw mm1, round_values - psrlw mm1, 8 + paddw mm1, round_values + psrlw mm1, 8 - packuswb mm1, mm7 - paddw mm3, mm4 + packuswb mm1, mm7 + paddw mm3, mm4 - paddw mm3, round_values - movd DWORD PTR [edi+edx], mm1 + paddw mm3, round_values + movd DWORD PTR [edi+edx], mm1 - psrlw mm3, 8 - packuswb mm3, mm7 + psrlw mm3, 8 + packuswb mm3, mm7 - movd DWORD PTR [edi+edx*2], mm3 + movd DWORD PTR [edi+edx*2], mm3 - add edi, 4 - add esi, 4 + add edi, 4 + add esi, 4 - sub ebx, 4 - jg vs_5_3_loop + sub ebx, 4 + jg vs_5_3_loop - pop ebx - } + pop ebx + } } @@ -1609,48 +1583,45 @@ void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, static void horizontal_line_2_1_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; - (void) source_width; - __asm - { - mov esi, source - mov edi, dest - - pxor mm7, mm7 - mov ecx, dest_width - - xor edx, edx - hs_2_1_loop: - - movq mm0, [esi+edx*2] - psllw mm0, 8 - - psrlw mm0, 8 - packuswb mm0, mm7 - - movd DWORD Ptr [edi+edx], mm0; - add edx, 4 - - cmp edx, ecx - jl hs_2_1_loop - - } + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; + (void) source_width; + __asm { + mov esi, source + mov edi, dest + + pxor mm7, mm7 + mov ecx, dest_width + + xor edx, edx + hs_2_1_loop: + + movq mm0, [esi+edx*2] + psllw mm0, 8 + + psrlw mm0, 8 + packuswb mm0, mm7 + + movd DWORD Ptr [edi+edx], mm0; + add edx, 4 + + cmp edx, ecx + jl hs_2_1_loop + + } } static -void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - (void) dest_pitch; - (void) src_pitch; - vpx_memcpy(dest, source, dest_width); +void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + (void) dest_pitch; + (void) src_pitch; + vpx_memcpy(dest, source, dest_width); } @@ -1658,91 +1629,88 @@ __declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 4 __declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 }; static -void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ +void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - (void) dest_pitch; - __asm - { - mov esi, source - mov edi, dest + (void) dest_pitch; + __asm { + mov esi, source + mov edi, dest - mov eax, src_pitch - mov edx, dest_width + mov eax, src_pitch + mov edx, dest_width - pxor mm7, mm7 - sub esi, eax //back one line + pxor mm7, mm7 + sub esi, eax // back one line - lea ecx, [esi+edx]; - movq mm6, round_values; + lea ecx, [esi+edx]; + movq mm6, round_values; - movq mm5, three_sixteenths; - movq mm4, ten_sixteenths; + movq mm5, three_sixteenths; + movq mm4, ten_sixteenths; - vs_2_1_i_loop: - movd mm0, [esi] // - movd mm1, [esi+eax] // + vs_2_1_i_loop: + movd mm0, [esi] // + movd mm1, [esi+eax] // - movd mm2, [esi+eax*2] // - punpcklbw mm0, mm7 + movd mm2, [esi+eax*2] // + punpcklbw mm0, mm7 - pmullw mm0, mm5 - punpcklbw mm1, mm7 + pmullw mm0, mm5 + punpcklbw mm1, mm7 - pmullw mm1, mm4 - punpcklbw mm2, mm7 + pmullw mm1, mm4 + punpcklbw mm2, mm7 - pmullw mm2, mm5 - paddw mm0, round_values + pmullw mm2, mm5 + paddw mm0, round_values - paddw mm1, mm2 - paddw mm0, mm1 + paddw mm1, mm2 + paddw mm0, mm1 - psrlw mm0, 8 - packuswb mm0, mm7 + psrlw mm0, 8 + packuswb mm0, mm7 - movd DWORD PTR [edi], mm0 - add esi, 4 + movd DWORD PTR [edi], mm0 + add esi, 4 - add edi, 4; - cmp esi, ecx - jl vs_2_1_i_loop + add edi, 4; + cmp esi, ecx + jl vs_2_1_i_loop - } + } } void -register_mmxscalers(void) -{ - vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; - vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; - vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; - vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; - vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; - vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; - vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; - vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; - vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; - - vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; - vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; - vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - - - - vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; - vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; - vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; - vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; - vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; - vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; - vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; +register_mmxscalers(void) { + vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; + vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; + vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; + vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; + vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; + vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; + vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; + vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; + vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; + + vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; + vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; + vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; + vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; + vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; + vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; + + + + vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; + vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; + vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; + vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; + vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; + vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; + vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; diff --git a/vpx_scale/win32/scalesystemdependent.c b/vpx_scale/win32/scalesystemdependent.c index 19e61c3..98913d1 100644 --- a/vpx_scale/win32/scalesystemdependent.c +++ b/vpx_scale/win32/scalesystemdependent.c @@ -46,46 +46,42 @@ extern void register_mmxscalers(void); * ****************************************************************************/ void -vp8_scale_machine_specific_config(void) -{ - // If MMX supported then set to use MMX versions of functions else - // use original 'C' versions. - int mmx_enabled; - int xmm_enabled; - int wmt_enabled; +vp8_scale_machine_specific_config(void) { + // If MMX supported then set to use MMX versions of functions else + // use original 'C' versions. + int mmx_enabled; + int xmm_enabled; + int wmt_enabled; - vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); + vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); - if (mmx_enabled || xmm_enabled || wmt_enabled) - { - register_mmxscalers(); - } - else - { - vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; - vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; - vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; - vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; - vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; - vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; - vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; - vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; - vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; - vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; - vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; + if (mmx_enabled || xmm_enabled || wmt_enabled) { + register_mmxscalers(); + } else { + vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; + vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; + vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; + vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; + vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; + vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; + vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; + vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; + vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; + vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; + vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; + vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; + vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; + vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; + vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; - vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; - vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; - vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; - vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; - vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; - vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; - vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; + vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; + vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; + vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; + vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; + vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; + vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; + vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; - } + } } diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h index 800f700..6a8a1fc 100644 --- a/vpx_scale/yv12config.h +++ b/vpx_scale/yv12config.h @@ -16,54 +16,54 @@ extern "C" { #endif -#define VP7BORDERINPIXELS 48 #define VP8BORDERINPIXELS 32 +#define VP9BORDERINPIXELS 64 +#define VP9_INTERP_EXTEND 4 - /************************************* - For INT_YUV: + /************************************* + For INT_YUV: - Y = (R+G*2+B)/4; - U = (R-B)/2; - V = (G*2 - R - B)/4; - And - R = Y+U-V; - G = Y+V; - B = Y-U-V; - ************************************/ - typedef enum - { - REG_YUV = 0, /* Regular yuv */ - INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */ - } - YUV_TYPE; + Y = (R+G*2+B)/4; + U = (R-B)/2; + V = (G*2 - R - B)/4; + And + R = Y+U-V; + G = Y+V; + B = Y-U-V; + ************************************/ + typedef enum + { + REG_YUV = 0, /* Regular yuv */ + INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */ + } + YUV_TYPE; - typedef struct yv12_buffer_config - { - int y_width; - int y_height; - int y_stride; -/* int yinternal_width; */ + typedef struct yv12_buffer_config { + int y_width; + int y_height; + int y_stride; + /* int yinternal_width; */ - int uv_width; - int uv_height; - int uv_stride; -/* int uvinternal_width; */ + int uv_width; + int uv_height; + int uv_stride; + /* int uvinternal_width; */ - unsigned char *y_buffer; - unsigned char *u_buffer; - unsigned char *v_buffer; + unsigned char *y_buffer; + unsigned char *u_buffer; + unsigned char *v_buffer; - unsigned char *buffer_alloc; - int border; - int frame_size; - YUV_TYPE clrtype; + unsigned char *buffer_alloc; + int border; + int frame_size; + YUV_TYPE clrtype; - int corrupted; - int flags; - } YV12_BUFFER_CONFIG; + int corrupted; + int flags; + } YV12_BUFFER_CONFIG; - int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); - int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); + int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); + int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); #ifdef __cplusplus } diff --git a/vpxdec.c b/vpxdec.c index 4482f3d..9b728bf 100644 --- a/vpxdec.c +++ b/vpxdec.c @@ -52,7 +52,7 @@ static const char *exec_name; static const struct { char const *name; - const vpx_codec_iface_t *iface; + vpx_codec_iface_t *iface; unsigned int fourcc; unsigned int fourcc_mask; } ifaces[] = @@ -152,7 +152,8 @@ static void usage_exit() "write to. If the\n argument does not include any escape " "characters, the output will be\n written to a single file. " "Otherwise, the filename will be calculated by\n expanding " - "the following escape characters:\n" + "the following escape characters:\n"); + fprintf(stderr, "\n\t%%w - Frame width" "\n\t%%h - Frame height" "\n\t%% - Frame number, zero padded to places (1..9)" @@ -356,7 +357,7 @@ void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5) } else { - if(fwrite(buf, 1, len, out)); + (void) fwrite(buf, 1, len, out); } } @@ -502,7 +503,7 @@ nestegg_seek_cb(int64_t offset, int whence, void * userdata) case NESTEGG_SEEK_CUR: whence = SEEK_CUR; break; case NESTEGG_SEEK_END: whence = SEEK_END; break; }; - return fseek(userdata, offset, whence)? -1 : 0; + return fseek(userdata, (long)offset, whence)? -1 : 0; } @@ -559,7 +560,7 @@ webm_guess_framerate(struct input_ctx *input, goto fail; *fps_num = (i - 1) * 1000000; - *fps_den = tstamp / 1000; + *fps_den = (unsigned int)(tstamp / 1000); return 0; fail: nestegg_destroy(input->nestegg_ctx); @@ -580,10 +581,10 @@ file_is_webm(struct input_ctx *input, unsigned int i, n; int track_type = -1; - nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, - input->infile}; + nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0}; nestegg_video_params params; + io.userdata = input->infile; if(nestegg_init(&input->nestegg_ctx, io, NULL)) goto fail; @@ -647,7 +648,7 @@ void generate_filename(const char *pattern, char *out, size_t q_len, { size_t pat_len; - // parse the pattern + /* parse the pattern */ q[q_len - 1] = '\0'; switch(p[1]) { @@ -677,7 +678,7 @@ void generate_filename(const char *pattern, char *out, size_t q_len, { size_t copy_len; - // copy the next segment + /* copy the next segment */ if(!next_pat) copy_len = strlen(p); else @@ -922,7 +923,7 @@ int main(int argc, const char **argv_) p = strchr(p, '%'); if(p && p[1] >= '1' && p[1] <= '9') { - // pattern contains sequence number, so it's not unique. + /* pattern contains sequence number, so it's not unique. */ single_file = 0; break; } @@ -962,7 +963,8 @@ int main(int argc, const char **argv_) That will have to wait until these tools support WebM natively.*/ sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n", "420jpeg", width, height, fps_num, fps_den, 'p'); - out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5); + out_put(out, (unsigned char *)buffer, + (unsigned int)strlen(buffer), do_md5); } /* Try to determine the codec from the fourcc. */ @@ -1040,7 +1042,7 @@ int main(int argc, const char **argv_) vpx_usec_timer_start(&timer); - if (vpx_codec_decode(&decoder, buf, buf_sz, NULL, 0)) + if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0)) { const char *detail = vpx_codec_error_detail(&decoder); fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder)); @@ -1052,7 +1054,7 @@ int main(int argc, const char **argv_) } vpx_usec_timer_mark(&timer); - dx_time += vpx_usec_timer_elapsed(&timer); + dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); ++frame_in; @@ -1064,9 +1066,14 @@ int main(int argc, const char **argv_) } frames_corrupted += corrupted; + vpx_usec_timer_start(&timer); + if ((img = vpx_codec_get_frame(&decoder, &iter))) ++frame_out; + vpx_usec_timer_mark(&timer); + dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); + if (progress) show_progress(frame_in, frame_out, dx_time); diff --git a/vpxenc.c b/vpxenc.c index d32b21b..c9547ea 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -54,11 +54,7 @@ typedef __int64 off_t; #define off_t off64_t #endif -#if defined(_MSC_VER) -#define LITERALU64(n) n -#else -#define LITERALU64(n) n##LLU -#endif +#define LITERALU64(hi,lo) ((((uint64_t)hi)<<32)|lo) /* We should use 32-bit file operations in WebM file format * when building ARM executable file (.axf) with RVCT */ @@ -68,12 +64,28 @@ typedef long off_t; #define ftello ftell #endif +/* Swallow warnings about unused results of fread/fwrite */ +static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, + FILE *stream) +{ + return fread(ptr, size, nmemb, stream); +} +#define fread wrap_fread + +static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb, + FILE *stream) +{ + return fwrite(ptr, size, nmemb, stream); +} +#define fwrite wrap_fwrite + + static const char *exec_name; static const struct codec_item { char const *name; - const vpx_codec_iface_t *iface; + vpx_codec_iface_t *iface; unsigned int fourcc; } codecs[] = { @@ -245,7 +257,7 @@ void stats_write(stats_io_t *stats, const void *pkt, size_t len) { if (stats->file) { - if(fwrite(pkt, 1, len, stats->file)); + (void) fwrite(pkt, 1, len, stats->file); } else { @@ -338,7 +350,7 @@ static int read_frame(struct input_state *input, vpx_image_t *img) * write_ivf_frame_header() for documentation on the frame header * layout. */ - if(fread(junk, 1, IVF_FRAME_HDR_SZ, f)); + (void) fread(junk, 1, IVF_FRAME_HDR_SZ, f); } for (plane = 0; plane < 3; plane++) @@ -468,7 +480,7 @@ static void write_ivf_file_header(FILE *outfile, mem_put_le32(header + 24, frame_cnt); /* length */ mem_put_le32(header + 28, 0); /* unused */ - if(fwrite(header, 1, 32, outfile)); + (void) fwrite(header, 1, 32, outfile); } @@ -482,18 +494,18 @@ static void write_ivf_frame_header(FILE *outfile, return; pts = pkt->data.frame.pts; - mem_put_le32(header, pkt->data.frame.sz); + mem_put_le32(header, (int)pkt->data.frame.sz); mem_put_le32(header + 4, pts & 0xFFFFFFFF); mem_put_le32(header + 8, pts >> 32); - if(fwrite(header, 1, 12, outfile)); + (void) fwrite(header, 1, 12, outfile); } static void write_ivf_frame_size(FILE *outfile, size_t size) { char header[4]; - mem_put_le32(header, size); - fwrite(header, 1, 4, outfile); + mem_put_le32(header, (int)size); + (void) fwrite(header, 1, 4, outfile); } @@ -541,13 +553,13 @@ struct EbmlGlobal void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) { - if(fwrite(buffer_in, 1, len, glob->stream)); + (void) fwrite(buffer_in, 1, len, glob->stream); } #define WRITE_BUFFER(s) \ for(i = len-1; i>=0; i--)\ { \ - x = *(const s *)buffer_in >> (i * CHAR_BIT); \ + x = (char)(*(const s *)buffer_in >> (i * CHAR_BIT)); \ Ebml_Write(glob, &x, 1); \ } void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len) @@ -597,9 +609,9 @@ static void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id) { - //todo this is always taking 8 bytes, this may need later optimization - //this is a key that says length unknown - uint64_t unknownLen = LITERALU64(0x01FFFFFFFFFFFFFF); + /* todo this is always taking 8 bytes, this may need later optimization */ + /* this is a key that says length unknown */ + uint64_t unknownLen = LITERALU64(0x01FFFFFF, 0xFFFFFFFF); Ebml_WriteID(glob, class_id); *ebmlLoc = ftello(glob->stream); @@ -617,7 +629,7 @@ Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) /* Calculate the size of this element */ size = pos - *ebmlLoc - 8; - size |= LITERALU64(0x0100000000000000); + size |= LITERALU64(0x01000000,0x00000000); /* Seek back to the beginning of the element and write the new size */ fseeko(glob->stream, *ebmlLoc, SEEK_SET); @@ -664,7 +676,7 @@ write_webm_seek_info(EbmlGlobal *ebml) Ebml_EndSubElement(ebml, &start); } { - //segment info + /* segment info */ EbmlLoc startInfo; uint64_t frame_time; char version_string[64]; @@ -686,7 +698,7 @@ write_webm_seek_info(EbmlGlobal *ebml) Ebml_StartSubElement(ebml, &startInfo, Info); Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000); Ebml_SerializeFloat(ebml, Segment_Duration, - ebml->last_pts_ms + frame_time); + (double)(ebml->last_pts_ms + frame_time)); Ebml_SerializeString(ebml, 0x4D80, version_string); Ebml_SerializeString(ebml, 0x5741, version_string); Ebml_EndSubElement(ebml, &startInfo); @@ -704,16 +716,16 @@ write_webm_file_header(EbmlGlobal *glob, EbmlLoc start; Ebml_StartSubElement(glob, &start, EBML); Ebml_SerializeUnsigned(glob, EBMLVersion, 1); - Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version - Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length - Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length - Ebml_SerializeString(glob, DocType, "webm"); //Doc Type - Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version - Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version + Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); + Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); + Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); + Ebml_SerializeString(glob, DocType, "webm"); + Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); + Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); Ebml_EndSubElement(glob, &start); } { - Ebml_StartSubElement(glob, &glob->startSegment, Segment); //segment + Ebml_StartSubElement(glob, &glob->startSegment, Segment); glob->position_reference = ftello(glob->stream); glob->framerate = *fps; write_webm_seek_info(glob); @@ -731,7 +743,7 @@ write_webm_file_header(EbmlGlobal *glob, Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber); glob->track_id_pos = ftello(glob->stream); Ebml_SerializeUnsigned32(glob, TrackUID, trackID); - Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1 + Ebml_SerializeUnsigned(glob, TrackType, 1); Ebml_SerializeString(glob, CodecID, "V_VP8"); { unsigned int pixelWidth = cfg->g_w; @@ -744,13 +756,13 @@ write_webm_file_header(EbmlGlobal *glob, Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight); Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt); Ebml_SerializeFloat(glob, FrameRate, frameRate); - Ebml_EndSubElement(glob, &videoStart); //Video + Ebml_EndSubElement(glob, &videoStart); } - Ebml_EndSubElement(glob, &start); //Track Entry + Ebml_EndSubElement(glob, &start); /* Track Entry */ } Ebml_EndSubElement(glob, &trackStart); } - // segment element is open + /* segment element is open */ } } @@ -778,7 +790,7 @@ write_webm_block(EbmlGlobal *glob, if(pts_ms - glob->cluster_timecode > SHRT_MAX) start_cluster = 1; else - block_timecode = pts_ms - glob->cluster_timecode; + block_timecode = (unsigned short)pts_ms - glob->cluster_timecode; is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY); if(start_cluster || is_keyframe) @@ -789,9 +801,9 @@ write_webm_block(EbmlGlobal *glob, /* Open the new cluster */ block_timecode = 0; glob->cluster_open = 1; - glob->cluster_timecode = pts_ms; + glob->cluster_timecode = (uint32_t)pts_ms; glob->cluster_pos = ftello(glob->stream); - Ebml_StartSubElement(glob, &glob->startCluster, Cluster); //cluster + Ebml_StartSubElement(glob, &glob->startCluster, Cluster); /* cluster */ Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode); /* Save a cue point if this is a keyframe. */ @@ -816,7 +828,7 @@ write_webm_block(EbmlGlobal *glob, /* Write the Simple Block */ Ebml_WriteID(glob, SimpleBlock); - block_length = pkt->data.frame.sz + 4; + block_length = (unsigned long)pkt->data.frame.sz + 4; block_length |= 0x10000000; Ebml_Serialize(glob, &block_length, sizeof(block_length), 4); @@ -833,7 +845,7 @@ write_webm_block(EbmlGlobal *glob, flags |= 0x08; Ebml_Write(glob, &flags, 1); - Ebml_Write(glob, pkt->data.frame.buf, pkt->data.frame.sz); + Ebml_Write(glob, pkt->data.frame.buf, (unsigned long)pkt->data.frame.sz); } @@ -865,7 +877,6 @@ write_webm_file_footer(EbmlGlobal *glob, long hash) Ebml_SerializeUnsigned(glob, CueTrack, 1); Ebml_SerializeUnsigned64(glob, CueClusterPosition, cue->loc - glob->position_reference); - //Ebml_SerializeUnsigned(glob, CueBlockNumber, cue->blockNumber); Ebml_EndSubElement(glob, &start); } Ebml_EndSubElement(glob, &start); @@ -942,7 +953,7 @@ static double vp8_mse2psnr(double Samples, double Peak, double Mse) if ((double)Mse > 0.0) psnr = 10.0 * log10(Peak * Peak * Samples / Mse); else - psnr = 60; // Limit to prevent / 0 + psnr = 60; /* Limit to prevent / 0 */ if (psnr > 60) psnr = 60; @@ -978,6 +989,8 @@ static const arg_def_t good_dl = ARG_DEF(NULL, "good", 0, "Use Good Quality Deadline"); static const arg_def_t rt_dl = ARG_DEF(NULL, "rt", 0, "Use Realtime Quality Deadline"); +static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0, + "Do not print encode progress"); static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0, "Show encoder parameters"); static const arg_def_t psnrarg = ARG_DEF(NULL, "psnr", 0, @@ -997,7 +1010,7 @@ static const arg_def_t *main_args[] = &debugmode, &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, &best_dl, &good_dl, &rt_dl, - &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n, + &quietarg, &verbosearg, &psnrarg, &use_ivf, &out_part, &q_hist_n, &rate_hist_n, NULL }; @@ -1225,7 +1238,7 @@ static int merge_hist_buckets(struct hist_bucket *bucket, { int last_bucket = buckets - 1; - // merge the small bucket with an adjacent one. + /* merge the small bucket with an adjacent one. */ if(small_bucket == 0) merge_bucket = 1; else if(small_bucket == last_bucket) @@ -1325,7 +1338,7 @@ static void show_histogram(const struct hist_bucket *bucket, int j; float pct; - pct = 100.0 * (float)bucket[i].count / (float)total; + pct = (float)(100.0 * bucket[i].count / total); len = HIST_BAR_MAX * bucket[i].count / scale; if(len < 1) len = 1; @@ -1393,7 +1406,7 @@ static void init_rate_histogram(struct rate_hist *hist, */ hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000; - // prevent division by zero + /* prevent division by zero */ if (hist->samples == 0) hist->samples=1; @@ -1427,7 +1440,7 @@ static void update_rate_histogram(struct rate_hist *hist, idx = hist->frames++ % hist->samples; hist->pts[idx] = now; - hist->sz[idx] = pkt->data.frame.sz; + hist->sz[idx] = (int)pkt->data.frame.sz; if(now < cfg->rc_buf_initial_sz) return; @@ -1449,15 +1462,15 @@ static void update_rate_histogram(struct rate_hist *hist, return; avg_bitrate = sum_sz * 8 * 1000 / (now - then); - idx = avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000); + idx = (int)(avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000)); if(idx < 0) idx = 0; if(idx > RATE_BINS-1) idx = RATE_BINS-1; if(hist->bucket[idx].low > avg_bitrate) - hist->bucket[idx].low = avg_bitrate; + hist->bucket[idx].low = (int)avg_bitrate; if(hist->bucket[idx].high < avg_bitrate) - hist->bucket[idx].high = avg_bitrate; + hist->bucket[idx].high = (int)avg_bitrate; hist->bucket[idx].count++; hist->total++; } @@ -1495,6 +1508,7 @@ struct global_config int usage; int deadline; int use_i420; + int quiet; int verbose; int limit; int show_psnr; @@ -1619,6 +1633,8 @@ static void parse_global_config(struct global_config *global, char **argv) global->use_i420 = 0; else if (arg_match(&arg, &use_i420, argi)) global->use_i420 = 1; + else if (arg_match(&arg, &quietarg, argi)) + global->quiet = 1; else if (arg_match(&arg, &verbosearg, argi)) global->verbose = 1; else if (arg_match(&arg, &limit, argi)) @@ -2000,7 +2016,7 @@ static void set_default_kf_interval(struct stream_state *stream, { double framerate = (double)global->framerate.num/global->framerate.den; if (framerate > 0.0) - stream->config.cfg.kf_max_dist = 5.0*framerate; + stream->config.cfg.kf_max_dist = (unsigned int)(5.0*framerate); } } @@ -2180,7 +2196,7 @@ static void encode_frame(struct stream_state *stream, / cfg->g_timebase.num / global->framerate.num; vpx_usec_timer_start(&timer); vpx_codec_encode(&stream->encoder, img, frame_start, - next_frame_start - frame_start, + (unsigned long)(next_frame_start - frame_start), 0, global->deadline); vpx_usec_timer_mark(&timer); stream->cx_time += vpx_usec_timer_elapsed(&timer); @@ -2224,8 +2240,9 @@ static void get_cx_data(struct stream_state *stream, { stream->frames_out++; } - fprintf(stderr, " %6luF", - (unsigned long)pkt->data.frame.sz); + if (!global->quiet) + fprintf(stderr, " %6luF", + (unsigned long)pkt->data.frame.sz); update_rate_histogram(&stream->rate_hist, cfg, pkt); if(stream->config.write_webm) @@ -2233,7 +2250,8 @@ static void get_cx_data(struct stream_state *stream, /* Update the hash */ if(!stream->ebml.debug) stream->hash = murmur(pkt->data.frame.buf, - pkt->data.frame.sz, stream->hash); + (int)pkt->data.frame.sz, + stream->hash); write_webm_block(&stream->ebml, cfg, pkt); } @@ -2259,15 +2277,16 @@ static void get_cx_data(struct stream_state *stream, } } - fwrite(pkt->data.frame.buf, 1, - pkt->data.frame.sz, stream->file); + (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, + stream->file); } stream->nbytes += pkt->data.raw.sz; break; case VPX_CODEC_STATS_PKT: stream->frames_out++; - fprintf(stderr, " %6luS", - (unsigned long)pkt->data.twopass_stats.sz); + if (!global->quiet) + fprintf(stderr, " %6luS", + (unsigned long)pkt->data.twopass_stats.sz); stats_write(&stream->stats, pkt->data.twopass_stats.buf, pkt->data.twopass_stats.sz); @@ -2283,7 +2302,8 @@ static void get_cx_data(struct stream_state *stream, stream->psnr_samples_total += pkt->data.psnr.samples[0]; for (i = 0; i < 4; i++) { - fprintf(stderr, "%.3lf ", pkt->data.psnr.psnr[i]); + if (!global->quiet) + fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]); stream->psnr_totals[i] += pkt->data.psnr.psnr[i]; } stream->psnr_count++; @@ -2306,13 +2326,13 @@ static void show_psnr(struct stream_state *stream) return; fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index); - ovpsnr = vp8_mse2psnr(stream->psnr_samples_total, 255.0, - stream->psnr_sse_total); - fprintf(stderr, " %.3lf", ovpsnr); + ovpsnr = vp8_mse2psnr((double)stream->psnr_samples_total, 255.0, + (double)stream->psnr_sse_total); + fprintf(stderr, " %.3f", ovpsnr); for (i = 0; i < 4; i++) { - fprintf(stderr, " %.3lf", stream->psnr_totals[i]/stream->psnr_count); + fprintf(stderr, " %.3f", stream->psnr_totals[i]/stream->psnr_count); } fprintf(stderr, "\n"); } @@ -2320,7 +2340,7 @@ static void show_psnr(struct stream_state *stream) float usec_to_fps(uint64_t usec, unsigned int frames) { - return usec > 0 ? (float)frames * 1000000.0 / (float)usec : 0; + return (float)(usec > 0 ? frames * 1000000.0 / (float)usec : 0); } @@ -2437,7 +2457,7 @@ int main(int argc, const char **argv_) vpx_img_alloc(&raw, input.use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12, - input.w, input.h, 1); + input.w, input.h, 32); FOREACH_STREAM(init_rate_histogram(&stream->rate_hist, &stream->config.cfg, @@ -2462,18 +2482,21 @@ int main(int argc, const char **argv_) if (frame_avail) frames_in++; - if(stream_cnt == 1) - fprintf(stderr, - "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K", - pass + 1, global.passes, frames_in, - streams->frames_out, (int64_t)streams->nbytes); - else - fprintf(stderr, - "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K", - pass + 1, global.passes, frames_in, - cx_time > 9999999 ? cx_time / 1000 : cx_time, - cx_time > 9999999 ? "ms" : "us", - usec_to_fps(cx_time, frames_in)); + if (!global.quiet) + { + if(stream_cnt == 1) + fprintf(stderr, + "\rPass %d/%d frame %4d/%-4d %7"PRId64"B \033[K", + pass + 1, global.passes, frames_in, + streams->frames_out, (int64_t)streams->nbytes); + else + fprintf(stderr, + "\rPass %d/%d frame %4d %7lu %s (%.2f fps)\033[K", + pass + 1, global.passes, frames_in, + cx_time > 9999999 ? cx_time / 1000 : cx_time, + cx_time > 9999999 ? "ms" : "us", + usec_to_fps(cx_time, frames_in)); + } } else @@ -2484,7 +2507,7 @@ int main(int argc, const char **argv_) frame_avail ? &raw : NULL, frames_in)); vpx_usec_timer_mark(&timer); - cx_time += vpx_usec_timer_elapsed(&timer); + cx_time += (unsigned long)vpx_usec_timer_elapsed(&timer); FOREACH_STREAM(update_quantizer_histogram(stream)); @@ -2497,20 +2520,21 @@ int main(int argc, const char **argv_) if(stream_cnt > 1) fprintf(stderr, "\n"); - FOREACH_STREAM(fprintf( - stderr, - "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s" - " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1, - global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes, - frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0, - frames_in ? (int64_t)stream->nbytes * 8 - * (int64_t)global.framerate.num / global.framerate.den - / frames_in - : 0, - stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time, - stream->cx_time > 9999999 ? "ms" : "us", - usec_to_fps(stream->cx_time, frames_in)); - ); + if (!global.quiet) + FOREACH_STREAM(fprintf( + stderr, + "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s" + " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1, + global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes, + frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0, + frames_in ? (int64_t)stream->nbytes * 8 + * (int64_t)global.framerate.num / global.framerate.den + / frames_in + : 0, + stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time, + stream->cx_time > 9999999 ? "ms" : "us", + usec_to_fps(stream->cx_time, frames_in)); + ); if (global.show_psnr) FOREACH_STREAM(show_psnr(stream)); diff --git a/y4minput.c b/y4minput.c index dd51421..ff9ffbc 100644 --- a/y4minput.c +++ b/y4minput.c @@ -662,7 +662,7 @@ int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){ _nskip--; } else{ - ret=fread(buffer+i,1,1,_fin); + ret=(int)fread(buffer+i,1,1,_fin); if(ret<1)return -1; } if(buffer[i]=='\n')break; @@ -818,7 +818,7 @@ int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){ int c_sz; int ret; /*Read and skip the frame header.*/ - ret=fread(frame,1,6,_fin); + ret=(int)fread(frame,1,6,_fin); if(ret<6)return 0; if(memcmp(frame,"FRAME",5)){ fprintf(stderr,"Loss of framing in Y4M input data\n");