arm: dsputil: fix overreads in put/avg_pixels functions
authorMans Rullgard <mans@mansr.com>
Wed, 9 May 2012 23:55:18 +0000 (00:55 +0100)
committerMans Rullgard <mans@mansr.com>
Thu, 10 May 2012 13:39:34 +0000 (14:39 +0100)
The vertically interpolating variants of these functions read
ahead one line to optimise the loop.  On the last line processed,
this might be outside the buffer.  Fix these invalid reads by
processing the last line outside the loop.

Signed-off-by: Mans Rullgard <mans@mansr.com>
libavcodec/arm/dsputil_neon.S

index d49aedd..4bdcd95 100644 (file)
@@ -95,6 +95,7 @@ endfunc
 .endm
 
 .macro  pixels16_y2     rnd=1, avg=0
+        sub             r3,  r3,  #2
         vld1.64         {q0},     [r1], r2
         vld1.64         {q1},     [r1], r2
 1:      subs            r3,  r3,  #2
@@ -114,10 +115,25 @@ endfunc
         vst1.64         {q2},     [r0,:128], r2
         vst1.64         {q3},     [r0,:128], r2
         bne             1b
+
+        avg             q2,  q0,  q1
+        vld1.64         {q0},     [r1], r2
+        avg             q3,  q0,  q1
+  .if \avg
+        vld1.8          {q8},     [r0,:128], r2
+        vld1.8          {q9},     [r0,:128]
+        vrhadd.u8       q2,  q2,  q8
+        vrhadd.u8       q3,  q3,  q9
+        sub             r0,  r0,  r2
+  .endif
+        vst1.64         {q2},     [r0,:128], r2
+        vst1.64         {q3},     [r0,:128], r2
+
         bx              lr
 .endm
 
 .macro  pixels16_xy2    rnd=1, avg=0
+        sub             r3,  r3,  #2
         vld1.64         {d0-d2},  [r1], r2
         vld1.64         {d4-d6},  [r1], r2
   .ifeq \rnd
@@ -173,6 +189,42 @@ endfunc
         vaddl.u8        q11, d3,  d5
         vst1.64         {q15},    [r0,:128], r2
         bgt             1b
+
+        vld1.64         {d0-d2},  [r1], r2
+        vadd.u16        q12, q8,  q9
+  .ifeq \rnd
+        vadd.u16        q12, q12, q13
+  .endif
+        vext.8          q15, q0,  q1,  #1
+        vadd.u16        q1 , q10, q11
+        shrn            d28, q12, #2
+  .ifeq \rnd
+        vadd.u16        q1,  q1,  q13
+  .endif
+        shrn            d29, q1,  #2
+  .if \avg
+        vld1.8          {q8},     [r0,:128]
+        vrhadd.u8       q14, q14, q8
+  .endif
+        vaddl.u8        q8,  d0,  d30
+        vaddl.u8        q10, d1,  d31
+        vst1.64         {q14},    [r0,:128], r2
+        vadd.u16        q12, q8,  q9
+  .ifeq \rnd
+        vadd.u16        q12, q12, q13
+  .endif
+        vadd.u16        q0,  q10, q11
+        shrn            d30, q12, #2
+  .ifeq \rnd
+        vadd.u16        q0,  q0,  q13
+  .endif
+        shrn            d31, q0,  #2
+  .if \avg
+        vld1.8          {q9},     [r0,:128]
+        vrhadd.u8       q15, q15, q9
+  .endif
+        vst1.64         {q15},    [r0,:128], r2
+
         bx              lr
 .endm
 
@@ -228,6 +280,7 @@ endfunc
 .endm
 
 .macro  pixels8_y2      rnd=1, avg=0
+        sub             r3,  r3,  #2
         vld1.64         {d0},     [r1], r2
         vld1.64         {d1},     [r1], r2
 1:      subs            r3,  r3,  #2
@@ -246,10 +299,24 @@ endfunc
         vst1.64         {d4},     [r0,:64], r2
         vst1.64         {d5},     [r0,:64], r2
         bne             1b
+
+        avg             d4,  d0,  d1
+        vld1.64         {d0},     [r1], r2
+        avg             d5,  d0,  d1
+  .if \avg
+        vld1.8          {d2},     [r0,:64], r2
+        vld1.8          {d3},     [r0,:64]
+        vrhadd.u8       q2,  q2,  q1
+        sub             r0,  r0,  r2
+  .endif
+        vst1.64         {d4},     [r0,:64], r2
+        vst1.64         {d5},     [r0,:64], r2
+
         bx              lr
 .endm
 
 .macro  pixels8_xy2     rnd=1, avg=0
+        sub             r3,  r3,  #2
         vld1.64         {q0},     [r1], r2
         vld1.64         {q1},     [r1], r2
   .ifeq \rnd
@@ -291,6 +358,31 @@ endfunc
         vaddl.u8        q9,  d2,  d6
         vst1.64         {d7},     [r0,:64], r2
         bgt             1b
+
+        vld1.64         {q0},     [r1], r2
+        vadd.u16        q10, q8,  q9
+        vext.8          d4,  d0,  d1,  #1
+  .ifeq \rnd
+        vadd.u16        q10, q10, q11
+  .endif
+        vaddl.u8        q8,  d0,  d4
+        shrn            d5,  q10, #2
+        vadd.u16        q10, q8,  q9
+  .if \avg
+        vld1.8          {d7},     [r0,:64]
+        vrhadd.u8       d5,  d5,  d7
+  .endif
+  .ifeq \rnd
+        vadd.u16        q10, q10, q11
+  .endif
+        vst1.64         {d5},     [r0,:64], r2
+        shrn            d7,  q10, #2
+  .if \avg
+        vld1.8          {d5},     [r0,:64]
+        vrhadd.u8       d7,  d7,  d5
+  .endif
+        vst1.64         {d7},     [r0,:64], r2
+
         bx              lr
 .endm