arm: hpeldsp: prevent overreads in armv6 asm
[libav.git] / libavcodec / arm / hpeldsp_armv6.S
1 /*
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/arm/asm.S"
22
23 .macro call_2x_pixels type, subp
24 function ff_\type\()_pixels16\subp\()_armv6, export=1
25 push {r0-r3, lr}
26 bl X(ff_\type\()_pixels8\subp\()_armv6)
27 pop {r0-r3, lr}
28 add r0, r0, #8
29 add r1, r1, #8
30 b X(ff_\type\()_pixels8\subp\()_armv6)
31 endfunc
32 .endm
33
34 call_2x_pixels avg
35 call_2x_pixels put, _x2
36 call_2x_pixels put, _y2
37 call_2x_pixels put, _x2_no_rnd
38 call_2x_pixels put, _y2_no_rnd
39
40 function ff_put_pixels16_armv6, export=1
41 push {r4-r11}
42 1:
43 ldr r5, [r1, #4]
44 ldr r6, [r1, #8]
45 ldr r7, [r1, #12]
46 ldr_post r4, r1, r2
47 strd r6, r7, [r0, #8]
48 ldr r9, [r1, #4]
49 strd_post r4, r5, r0, r2
50 ldr r10, [r1, #8]
51 ldr r11, [r1, #12]
52 ldr_post r8, r1, r2
53 strd r10, r11, [r0, #8]
54 subs r3, r3, #2
55 strd_post r8, r9, r0, r2
56 bne 1b
57
58 pop {r4-r11}
59 bx lr
60 endfunc
61
62 function ff_put_pixels8_armv6, export=1
63 push {r4-r7}
64 1:
65 ldr r5, [r1, #4]
66 ldr_post r4, r1, r2
67 ldr r7, [r1, #4]
68 strd_post r4, r5, r0, r2
69 ldr_post r6, r1, r2
70 subs r3, r3, #2
71 strd_post r6, r7, r0, r2
72 bne 1b
73
74 pop {r4-r7}
75 bx lr
76 endfunc
77
78 function ff_put_pixels8_x2_armv6, export=1
79 push {r4-r11, lr}
80 mov r12, #1
81 orr r12, r12, r12, lsl #8
82 orr r12, r12, r12, lsl #16
83 1:
84 ldr r4, [r1]
85 subs r3, r3, #2
86 ldr r5, [r1, #4]
87 ldr r7, [r1, #5]
88 lsr r6, r4, #8
89 ldr_pre r8, r1, r2
90 orr r6, r6, r5, lsl #24
91 ldr r9, [r1, #4]
92 ldr r11, [r1, #5]
93 lsr r10, r8, #8
94 add r1, r1, r2
95 orr r10, r10, r9, lsl #24
96 eor r14, r4, r6
97 uhadd8 r4, r4, r6
98 eor r6, r5, r7
99 uhadd8 r5, r5, r7
100 and r14, r14, r12
101 and r6, r6, r12
102 uadd8 r4, r4, r14
103 eor r14, r8, r10
104 uadd8 r5, r5, r6
105 eor r6, r9, r11
106 uhadd8 r8, r8, r10
107 and r14, r14, r12
108 uhadd8 r9, r9, r11
109 and r6, r6, r12
110 uadd8 r8, r8, r14
111 strd_post r4, r5, r0, r2
112 uadd8 r9, r9, r6
113 strd_post r8, r9, r0, r2
114 bne 1b
115
116 pop {r4-r11, pc}
117 endfunc
118
119 function ff_put_pixels8_y2_armv6, export=1
120 push {r4-r11}
121 mov r12, #1
122 orr r12, r12, r12, lsl #8
123 orr r12, r12, r12, lsl #16
124 ldr r4, [r1]
125 ldr r5, [r1, #4]
126 ldr_pre r6, r1, r2
127 ldr r7, [r1, #4]
128 1:
129 subs r3, r3, #2
130 uhadd8 r8, r4, r6
131 eor r10, r4, r6
132 uhadd8 r9, r5, r7
133 eor r11, r5, r7
134 and r10, r10, r12
135 ldrc_pre ne, r4, r1, r2
136 uadd8 r8, r8, r10
137 and r11, r11, r12
138 uadd8 r9, r9, r11
139 it ne
140 ldrne r5, [r1, #4]
141 uhadd8 r10, r4, r6
142 eor r6, r4, r6
143 uhadd8 r11, r5, r7
144 and r6, r6, r12
145 eor r7, r5, r7
146 uadd8 r10, r10, r6
147 and r7, r7, r12
148 ldrc_pre ne, r6, r1, r2
149 uadd8 r11, r11, r7
150 strd_post r8, r9, r0, r2
151 it ne
152 ldrne r7, [r1, #4]
153 strd_post r10, r11, r0, r2
154 bne 1b
155
156 pop {r4-r11}
157 bx lr
158 endfunc
159
160 function ff_put_pixels8_x2_no_rnd_armv6, export=1
161 push {r4-r9, lr}
162 1:
163 subs r3, r3, #2
164 ldr r4, [r1]
165 ldr r5, [r1, #4]
166 ldr r7, [r1, #5]
167 ldr_pre r8, r1, r2
168 ldr r9, [r1, #4]
169 ldr r14, [r1, #5]
170 add r1, r1, r2
171 lsr r6, r4, #8
172 orr r6, r6, r5, lsl #24
173 lsr r12, r8, #8
174 orr r12, r12, r9, lsl #24
175 uhadd8 r4, r4, r6
176 uhadd8 r5, r5, r7
177 uhadd8 r8, r8, r12
178 uhadd8 r9, r9, r14
179 stm r0, {r4,r5}
180 add r0, r0, r2
181 stm r0, {r8,r9}
182 add r0, r0, r2
183 bne 1b
184
185 pop {r4-r9, pc}
186 endfunc
187
188 function ff_put_pixels8_y2_no_rnd_armv6, export=1
189 push {r4-r9, lr}
190 ldr r4, [r1]
191 ldr r5, [r1, #4]
192 ldr_pre r6, r1, r2
193 ldr r7, [r1, #4]
194 1:
195 subs r3, r3, #2
196 uhadd8 r8, r4, r6
197 ldrc_pre ne, r4, r1, r2
198 uhadd8 r9, r5, r7
199 it ne
200 ldrne r5, [r1, #4]
201 uhadd8 r12, r4, r6
202 ldrc_pre ne, r6, r1, r2
203 uhadd8 r14, r5, r7
204 it ne
205 ldrne r7, [r1, #4]
206 stm r0, {r8,r9}
207 add r0, r0, r2
208 stm r0, {r12,r14}
209 add r0, r0, r2
210 bne 1b
211
212 pop {r4-r9, pc}
213 endfunc
214
215 function ff_avg_pixels8_armv6, export=1
216 pld [r1, r2]
217 push {r4-r10, lr}
218 mov lr, #1
219 orr lr, lr, lr, lsl #8
220 orr lr, lr, lr, lsl #16
221 ldrd r4, r5, [r0]
222 ldr r10, [r1, #4]
223 ldr_post r9, r1, r2
224 subs r3, r3, #2
225 1:
226 pld [r1, r2]
227 eor r8, r4, r9
228 uhadd8 r4, r4, r9
229 eor r12, r5, r10
230 ldrd_reg r6, r7, r0, r2
231 uhadd8 r5, r5, r10
232 and r8, r8, lr
233 ldr r10, [r1, #4]
234 and r12, r12, lr
235 uadd8 r4, r4, r8
236 ldr_post r9, r1, r2
237 eor r8, r6, r9
238 uadd8 r5, r5, r12
239 pld [r1, r2, lsl #1]
240 eor r12, r7, r10
241 uhadd8 r6, r6, r9
242 strd_post r4, r5, r0, r2
243 uhadd8 r7, r7, r10
244 beq 2f
245 and r8, r8, lr
246 ldrd_reg r4, r5, r0, r2
247 uadd8 r6, r6, r8
248 ldr r10, [r1, #4]
249 and r12, r12, lr
250 subs r3, r3, #2
251 uadd8 r7, r7, r12
252 ldr_post r9, r1, r2
253 strd_post r6, r7, r0, r2
254 b 1b
255 2:
256 and r8, r8, lr
257 and r12, r12, lr
258 uadd8 r6, r6, r8
259 uadd8 r7, r7, r12
260 strd_post r6, r7, r0, r2
261
262 pop {r4-r10, pc}
263 endfunc