x86: conditionally compile H.264 QPEL optimizations
authorDiego Biurrun <diego@biurrun.de>
Sun, 18 Dec 2011 13:10:33 +0000 (14:10 +0100)
committerDiego Biurrun <diego@biurrun.de>
Sun, 25 Mar 2012 09:50:45 +0000 (11:50 +0200)
configure
libavcodec/x86/Makefile
libavcodec/x86/dsputil_mmx.c

index 8c227aa..89a4fce 100755 (executable)
--- a/configure
+++ b/configure
@@ -1165,6 +1165,7 @@ CONFIG_EXTRA="
     h264chroma
     h264dsp
     h264pred
+    h264qpel
     huffman
     lgplv3
     lpc
@@ -1311,7 +1312,7 @@ h263_encoder_select="aandct"
 h263_vaapi_hwaccel_select="vaapi h263_decoder"
 h263i_decoder_select="h263_decoder"
 h263p_encoder_select="h263_encoder"
-h264_decoder_select="golomb h264chroma h264dsp h264pred"
+h264_decoder_select="golomb h264chroma h264dsp h264pred h264qpel"
 h264_dxva2_hwaccel_deps="dxva2api_h"
 h264_dxva2_hwaccel_select="dxva2 h264_decoder"
 h264_vaapi_hwaccel_select="vaapi h264_decoder"
@@ -1366,14 +1367,14 @@ rv10_decoder_select="h263_decoder"
 rv10_encoder_select="h263_encoder"
 rv20_decoder_select="h263_decoder"
 rv20_encoder_select="h263_encoder"
-rv30_decoder_select="golomb h264chroma h264pred"
-rv40_decoder_select="golomb h264chroma h264pred"
+rv30_decoder_select="golomb h264chroma h264pred h264qpel"
+rv40_decoder_select="golomb h264chroma h264pred h264qpel"
 shorten_decoder_select="golomb"
 sipr_decoder_select="lsp"
 snow_decoder_select="dwt"
 snow_encoder_select="aandct dwt"
 svq1_encoder_select="aandct"
-svq3_decoder_select="golomb h264chroma h264dsp h264pred"
+svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel"
 svq3_decoder_suggest="zlib"
 theora_decoder_select="vp3_decoder"
 tiff_decoder_suggest="zlib"
@@ -1381,7 +1382,7 @@ tiff_encoder_suggest="zlib"
 truehd_decoder_select="mlp_decoder"
 tscc_decoder_select="zlib"
 twinvq_decoder_select="mdct lsp sinewin"
-vc1_decoder_select="h263_decoder h264chroma"
+vc1_decoder_select="h263_decoder h264chroma h264qpel"
 vc1_dxva2_hwaccel_deps="dxva2api_h"
 vc1_dxva2_hwaccel_select="dxva2 vc1_decoder"
 vc1_vaapi_hwaccel_select="vaapi vc1_decoder"
@@ -1392,7 +1393,7 @@ vorbis_encoder_select="mdct"
 vp6_decoder_select="huffman"
 vp6a_decoder_select="vp6_decoder"
 vp6f_decoder_select="vp6_decoder"
-vp8_decoder_select="h264pred"
+vp8_decoder_select="h264pred h264qpel"
 wmapro_decoder_select="mdct sinewin"
 wmav1_decoder_select="mdct sinewin"
 wmav1_encoder_select="mdct sinewin"
@@ -1419,7 +1420,7 @@ vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads"
 vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h"
 
 # parsers
-h264_parser_select="golomb h264chroma h264dsp h264pred"
+h264_parser_select="golomb h264chroma h264dsp h264pred h264qpel"
 
 # external libraries
 libdirac_decoder_deps="libdirac !libschroedinger"
index e64697a..7944799 100644 (file)
@@ -23,6 +23,7 @@ YASM-OBJS-$(CONFIG_H264DSP)            += x86/h264_deblock.o            \
 YASM-OBJS-$(CONFIG_H264PRED)           += x86/h264_intrapred.o          \
                                           x86/h264_intrapred_10bit.o
 MMX-OBJS-$(CONFIG_H264PRED)            += x86/h264_intrapred_init.o
+YASM-OBJS-$(CONFIG_H264QPEL)           += x86/h264_qpel_10bit.o
 
 MMX-OBJS-$(CONFIG_RV30_DECODER)        += x86/rv34dsp_init.o
 YASM-OBJS-$(CONFIG_RV30_DECODER)       += x86/rv34dsp.o
@@ -62,7 +63,6 @@ MMX-OBJS-$(CONFIG_VP8_DECODER)         += x86/vp8dsp-init.o
 MMX-OBJS-$(HAVE_YASM)                  += x86/dsputil_yasm.o            \
                                           x86/deinterlace.o             \
                                           x86/fmtconvert.o              \
-                                          x86/h264_qpel_10bit.o         \
                                           $(YASM-OBJS-yes)
 
 MMX-OBJS-$(CONFIG_FFT)                 += x86/fft.o
index 7d9bb99..665eec9 100644 (file)
@@ -2479,6 +2479,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
         c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
     }
 
+    if (CONFIG_H264QPEL) {
     SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
     SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
     SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
@@ -2510,6 +2511,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
     SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
     SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
     SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
+    }
 
 #if HAVE_YASM
     if (!high_bit_depth && CONFIG_H264CHROMA) {
@@ -2577,6 +2579,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
         c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
     }
 
+    if (CONFIG_H264QPEL) {
     SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, );
     SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, );
     SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
@@ -2597,6 +2600,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
     SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, );
     SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
     SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
+    }
 
 #if HAVE_YASM
     if (!high_bit_depth && CONFIG_H264CHROMA) {
@@ -2671,11 +2675,12 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
             c->put_pixels_tab[0][0]        = put_pixels16_sse2;
             c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
             c->avg_pixels_tab[0][0]        = avg_pixels16_sse2;
-            H264_QPEL_FUNCS(0, 0, sse2);
+            if (CONFIG_H264QPEL)
+                H264_QPEL_FUNCS(0, 0, sse2);
         }
     }
 
-    if (!high_bit_depth) {
+    if (!high_bit_depth && CONFIG_H264QPEL) {
         H264_QPEL_FUNCS(0, 1, sse2);
         H264_QPEL_FUNCS(0, 2, sse2);
         H264_QPEL_FUNCS(0, 3, sse2);
@@ -2692,6 +2697,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
 
 #if HAVE_YASM
     if (bit_depth == 10) {
+        if (CONFIG_H264QPEL) {
         SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
         SET_QPEL_FUNCS(put_h264_qpel, 1, 8,  10_sse2, ff_);
         SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
@@ -2699,7 +2705,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
         H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
         H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
         H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
-
+        }
         if (CONFIG_H264CHROMA) {
             c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
             c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
@@ -2729,7 +2735,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
     const int bit_depth      = avctx->bits_per_raw_sample;
 
-    if (!high_bit_depth) {
+    if (!high_bit_depth && CONFIG_H264QPEL) {
         H264_QPEL_FUNCS(1, 0, ssse3);
         H264_QPEL_FUNCS(1, 1, ssse3);
         H264_QPEL_FUNCS(1, 2, ssse3);
@@ -2744,7 +2750,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
         H264_QPEL_FUNCS(3, 3, ssse3);
     }
 #if HAVE_YASM
-    else if (bit_depth == 10) {
+    else if (bit_depth == 10 && CONFIG_H264QPEL) {
         H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
         H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
         H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
@@ -2788,9 +2794,11 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
     if (bit_depth == 10) {
         // AVX implies !cache64.
         // TODO: Port cache(32|64) detection from x264.
+        if (CONFIG_H264QPEL) {
         H264_QPEL_FUNCS_10(1, 0, sse2);
         H264_QPEL_FUNCS_10(2, 0, sse2);
         H264_QPEL_FUNCS_10(3, 0, sse2);
+        }
 
         if (CONFIG_H264CHROMA) {
             c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;