ADI Blackfin optimizations
authorMichael Benjamin <neuroptik@gmail.com>
Sat, 16 Sep 2006 22:26:09 +0000 (22:26 +0000)
committerDiego Biurrun <diego@biurrun.de>
Sat, 16 Sep 2006 22:26:09 +0000 (22:26 +0000)
patch by Michael Benjamin, neuroptik gmail com

Originally committed as revision 6282 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/Makefile
libavcodec/bfin/dsputil_bfin.c [new file with mode: 0644]
libavcodec/dsputil.c
libavcodec/dsputil.h

index dd56970..e7efa15 100644 (file)
@@ -357,6 +357,7 @@ OBJS-$(TARGET_ALTIVEC)                 += ppc/dsputil_altivec.o      \
                                           ppc/snow_altivec.o        \
                                           ppc/vc1dsp_altivec.o       \
                                           ppc/float_altivec.o
+OBJS-$(TARGET_ARCH_BFIN)               += bfin/dsputil_bfin.o
 
 CFLAGS += $(CFLAGS-yes)
 OBJS += $(OBJS-yes)
diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c
new file mode 100644 (file)
index 0000000..d066745
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2006 Michael Benjamin
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../avcodec.h"
+#include "../dsputil.h"
+
+static int sad8x8_bfin( void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h )
+{
+    int sum;
+    __asm__ __volatile__ (
+    "P0 = %1;" // blk1
+    "P1 = %2;" // blk2
+    "P2 = %3;\n" // h
+    "I0 = P0;"
+    "I1 = P1;\n"
+    "A0 = 0;"
+    "A1 = 0;\n"
+    "M0 = P2;\n"
+    "P3 = 32;\n"
+    "LSETUP (sad8x8LoopBegin, sad8x8LoopEnd) LC0=P3;\n"
+    "sad8x8LoopBegin:\n"
+    "  DISALGNEXCPT || R0 = [I0] || R2 = [I1];\n"
+    "  DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];\n"
+    "sad8x8LoopEnd:\n"
+    "  SAA ( R1:0 , R3:2 );\n"
+    "R3 = A1.L + A1.H, R2 = A0.L + A0.H;\n"
+    "%0 = R2 + R3 (S);\n"
+    : "=&d" (sum)
+    : "m"(blk1), "m"(blk2), "m"(h)
+    : "P0","P1","P2","I0","I1","A0","A1","R0","R1","R2","R3");
+    return sum;
+}
+
+void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
+{
+    c->pix_abs[1][0] = sad8x8_bfin;
+    c->sad[1] = sad8x8_bfin;
+}
index 5fdd2ed..dcf6a54 100644 (file)
@@ -4189,6 +4189,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 #ifdef ARCH_SH4
     dsputil_init_sh4(c,avctx);
 #endif
+#ifdef ARCH_BFIN
+    dsputil_init_bfin(c,avctx);
+#endif
 
     switch(c->idct_permutation_type){
     case FF_NO_IDCT_PERM:
index eea7e94..3df2f37 100644 (file)
@@ -571,6 +571,13 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
 
 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
 
+#elif defined(ARCH_BFIN)
+
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
+#define STRIDE_ALIGN 8
+
+void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
+
 #else
 
 #define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))