x86/synth_filter: add synth_filter_sse
[libav.git] / libavcodec / x86 / dcadsp_init.c
index 5f6e8c5..5b77985 100644 (file)
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/dcadsp.h"
 
-void ff_int8x8_fmul_int32_sse(float *dst, const int8_t *src, int scale);
-void ff_int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale);
-void ff_int8x8_fmul_int32_sse4(float *dst, const int8_t *src, int scale);
+void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
+                      const int8_t hf_vq[1024][32], intptr_t vq_offset,
+                      int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
+void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
+                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
+                       int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
+void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
+                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
+                       int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
 void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
 void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
 
@@ -35,45 +41,60 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
 
     if (EXTERNAL_SSE(cpu_flags)) {
 #if ARCH_X86_32
-        s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse;
+        s->decode_hf = ff_decode_hf_sse;
 #endif
         s->lfe_fir[0]        = ff_dca_lfe_fir0_sse;
         s->lfe_fir[1]        = ff_dca_lfe_fir1_sse;
     }
 
     if (EXTERNAL_SSE2(cpu_flags)) {
-        s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse2;
+        s->decode_hf = ff_decode_hf_sse2;
     }
 
     if (EXTERNAL_SSE4(cpu_flags)) {
-        s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse4;
+        s->decode_hf = ff_decode_hf_sse4;
     }
 }
 
-void ff_synth_filter_inner_sse2(float *synth_buf_ptr, float synth_buf2[32],
-                                const float window[512],
-                                float out[32], intptr_t offset, float scale);
 
-static void synth_filter_sse2(FFTContext *imdct,
-                              float *synth_buf_ptr, int *synth_buf_offset,
-                              float synth_buf2[32], const float window[512],
-                              float out[32], const float in[32], float scale)
-{
-    float *synth_buf= synth_buf_ptr + *synth_buf_offset;
-
-    imdct->imdct_half(imdct, synth_buf, in);
-
-    ff_synth_filter_inner_sse2(synth_buf, synth_buf2, window,
-                               out, *synth_buf_offset, scale);
+#define SYNTH_FILTER_FUNC(opt)                                                 \
+void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32],   \
+                                 const float window[512],                      \
+                                 float out[32], intptr_t offset, float scale); \
+static void synth_filter_##opt(FFTContext *imdct,                              \
+                               float *synth_buf_ptr, int *synth_buf_offset,    \
+                               float synth_buf2[32], const float window[512],  \
+                               float out[32], const float in[32], float scale) \
+{                                                                              \
+    float *synth_buf= synth_buf_ptr + *synth_buf_offset;                       \
+                                                                               \
+    imdct->imdct_half(imdct, synth_buf, in);                                   \
+                                                                               \
+    ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window,                 \
+                                out, *synth_buf_offset, scale);                \
+                                                                               \
+    *synth_buf_offset = (*synth_buf_offset - 32) & 511;                        \
+}                                                                              \
 
-    *synth_buf_offset = (*synth_buf_offset - 32) & 511;
-}
+#if HAVE_YASM
+#if ARCH_X86_32
+SYNTH_FILTER_FUNC(sse)
+#endif
+SYNTH_FILTER_FUNC(sse2)
+#endif /* HAVE_YASM */
 
 av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
 {
+#if HAVE_YASM
     int cpu_flags = av_get_cpu_flags();
 
+#if ARCH_X86_32
+    if (EXTERNAL_SSE(cpu_flags)) {
+        s->synth_filter_float = synth_filter_sse;
+    }
+#endif
     if (EXTERNAL_SSE2(cpu_flags)) {
         s->synth_filter_float = synth_filter_sse2;
     }
+#endif /* HAVE_YASM */
 }