4a76a08fade98a22fc73f21dc9181a52231afabb
[libav.git] / libavcodec / arm / dsputil_arm_s.S
1 @
2 @ ARMv4 optimized DSP utils
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4 @
5 @ This file is part of FFmpeg.
6 @
7 @ FFmpeg is free software; you can redistribute it and/or
8 @ modify it under the terms of the GNU Lesser General Public
9 @ License as published by the Free Software Foundation; either
10 @ version 2.1 of the License, or (at your option) any later version.
11 @
12 @ FFmpeg is distributed in the hope that it will be useful,
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 @ Lesser General Public License for more details.
16 @
17 @ You should have received a copy of the GNU Lesser General Public
18 @ License along with FFmpeg; if not, write to the Free Software
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 @
21
22 #include "config.h"
23 #include "asm.S"
24
25 preserve8
26
27 #if !HAVE_PLD
28 .macro pld reg
29 .endm
30 #endif
31
32 #if HAVE_ARMV5TE
33 function ff_prefetch_arm, export=1
34 subs r2, r2, #1
35 pld [r0]
36 add r0, r0, r1
37 bne ff_prefetch_arm
38 bx lr
39 .endfunc
40 #endif
41
42 .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43 mov \Rd0, \Rn0, lsr #(\shift * 8)
44 mov \Rd1, \Rn1, lsr #(\shift * 8)
45 mov \Rd2, \Rn2, lsr #(\shift * 8)
46 mov \Rd3, \Rn3, lsr #(\shift * 8)
47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51 .endm
52 .macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
53 mov \R0, \R0, lsr #(\shift * 8)
54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
55 mov \R1, \R1, lsr #(\shift * 8)
56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
57 .endm
58 .macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63 .endm
64
65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67 @ Rmask = 0xFEFEFEFE
68 @ Rn = destroy
69 eor \Rd0, \Rn0, \Rm0
70 eor \Rd1, \Rn1, \Rm1
71 orr \Rn0, \Rn0, \Rm0
72 orr \Rn1, \Rn1, \Rm1
73 and \Rd0, \Rd0, \Rmask
74 and \Rd1, \Rd1, \Rmask
75 sub \Rd0, \Rn0, \Rd0, lsr #1
76 sub \Rd1, \Rn1, \Rd1, lsr #1
77 .endm
78
79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81 @ Rmask = 0xFEFEFEFE
82 @ Rn = destroy
83 eor \Rd0, \Rn0, \Rm0
84 eor \Rd1, \Rn1, \Rm1
85 and \Rn0, \Rn0, \Rm0
86 and \Rn1, \Rn1, \Rm1
87 and \Rd0, \Rd0, \Rmask
88 and \Rd1, \Rd1, \Rmask
89 add \Rd0, \Rn0, \Rd0, lsr #1
90 add \Rd1, \Rn1, \Rd1, lsr #1
91 .endm
92
93 .macro JMP_ALIGN tmp, reg, label1, label2, label3
94 ands \tmp, \reg, #3
95 bic \reg, \reg, #3
96 beq 1f
97 subs \tmp, \tmp, #1
98 beq 2f
99 subs \tmp, \tmp, #1
100 beq 3f
101 b 4f
102 .endm
103
104 @ ----------------------------------------------------------------
105 .align 5
106 function put_pixels16_arm, export=1
107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108 @ block = word aligned, pixles = unaligned
109 pld [r1]
110 stmfd sp!, {r4-r11, lr} @ R14 is also called LR
111 JMP_ALIGN r5, r1, 2f, 3f, 4f
112 1:
113 ldmia r1, {r4-r7}
114 add r1, r1, r2
115 stmia r0, {r4-r7}
116 pld [r1]
117 subs r3, r3, #1
118 add r0, r0, r2
119 bne 1b
120 ldmfd sp!, {r4-r11, pc}
121 .align 5
122 2:
123 ldmia r1, {r4-r8}
124 add r1, r1, r2
125 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
126 pld [r1]
127 subs r3, r3, #1
128 stmia r0, {r9-r12}
129 add r0, r0, r2
130 bne 2b
131 ldmfd sp!, {r4-r11, pc}
132 .align 5
133 3:
134 ldmia r1, {r4-r8}
135 add r1, r1, r2
136 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
137 pld [r1]
138 subs r3, r3, #1
139 stmia r0, {r9-r12}
140 add r0, r0, r2
141 bne 3b
142 ldmfd sp!, {r4-r11, pc}
143 .align 5
144 4:
145 ldmia r1, {r4-r8}
146 add r1, r1, r2
147 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
148 pld [r1]
149 subs r3, r3, #1
150 stmia r0, {r9-r12}
151 add r0, r0, r2
152 bne 4b
153 ldmfd sp!, {r4-r11,pc}
154 .endfunc
155
156 @ ----------------------------------------------------------------
157 .align 5
158 function put_pixels8_arm, export=1
159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160 @ block = word aligned, pixles = unaligned
161 pld [r1]
162 stmfd sp!, {r4-r5,lr} @ R14 is also called LR
163 JMP_ALIGN r5, r1, 2f, 3f, 4f
164 1:
165 ldmia r1, {r4-r5}
166 add r1, r1, r2
167 subs r3, r3, #1
168 pld [r1]
169 stmia r0, {r4-r5}
170 add r0, r0, r2
171 bne 1b
172 ldmfd sp!, {r4-r5,pc}
173 .align 5
174 2:
175 ldmia r1, {r4-r5, r12}
176 add r1, r1, r2
177 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
178 pld [r1]
179 subs r3, r3, #1
180 stmia r0, {r4-r5}
181 add r0, r0, r2
182 bne 2b
183 ldmfd sp!, {r4-r5,pc}
184 .align 5
185 3:
186 ldmia r1, {r4-r5, r12}
187 add r1, r1, r2
188 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
189 pld [r1]
190 subs r3, r3, #1
191 stmia r0, {r4-r5}
192 add r0, r0, r2
193 bne 3b
194 ldmfd sp!, {r4-r5,pc}
195 .align 5
196 4:
197 ldmia r1, {r4-r5, r12}
198 add r1, r1, r2
199 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
200 pld [r1]
201 subs r3, r3, #1
202 stmia r0, {r4-r5}
203 add r0, r0, r2
204 bne 4b
205 ldmfd sp!, {r4-r5,pc}
206 .endfunc
207
208 @ ----------------------------------------------------------------
209 .align 5
210 function put_pixels8_x2_arm, export=1
211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212 @ block = word aligned, pixles = unaligned
213 pld [r1]
214 stmfd sp!, {r4-r10,lr} @ R14 is also called LR
215 ldr r12, =0xfefefefe
216 JMP_ALIGN r5, r1, 2f, 3f, 4f
217 1:
218 ldmia r1, {r4-r5, r10}
219 add r1, r1, r2
220 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
221 pld [r1]
222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
223 subs r3, r3, #1
224 stmia r0, {r8-r9}
225 add r0, r0, r2
226 bne 1b
227 ldmfd sp!, {r4-r10,pc}
228 .align 5
229 2:
230 ldmia r1, {r4-r5, r10}
231 add r1, r1, r2
232 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
233 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
234 pld [r1]
235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
236 subs r3, r3, #1
237 stmia r0, {r4-r5}
238 add r0, r0, r2
239 bne 2b
240 ldmfd sp!, {r4-r10,pc}
241 .align 5
242 3:
243 ldmia r1, {r4-r5, r10}
244 add r1, r1, r2
245 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
246 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
247 pld [r1]
248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
249 subs r3, r3, #1
250 stmia r0, {r4-r5}
251 add r0, r0, r2
252 bne 3b
253 ldmfd sp!, {r4-r10,pc}
254 .align 5
255 4:
256 ldmia r1, {r4-r5, r10}
257 add r1, r1, r2
258 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
259 pld [r1]
260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12
261 subs r3, r3, #1
262 stmia r0, {r8-r9}
263 add r0, r0, r2
264 bne 4b
265 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
266 .endfunc
267
268 .align 5
269 function put_no_rnd_pixels8_x2_arm, export=1
270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271 @ block = word aligned, pixles = unaligned
272 pld [r1]
273 stmfd sp!, {r4-r10,lr} @ R14 is also called LR
274 ldr r12, =0xfefefefe
275 JMP_ALIGN r5, r1, 2f, 3f, 4f
276 1:
277 ldmia r1, {r4-r5, r10}
278 add r1, r1, r2
279 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
280 pld [r1]
281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
282 subs r3, r3, #1
283 stmia r0, {r8-r9}
284 add r0, r0, r2
285 bne 1b
286 ldmfd sp!, {r4-r10,pc}
287 .align 5
288 2:
289 ldmia r1, {r4-r5, r10}
290 add r1, r1, r2
291 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
292 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
293 pld [r1]
294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
295 subs r3, r3, #1
296 stmia r0, {r4-r5}
297 add r0, r0, r2
298 bne 2b
299 ldmfd sp!, {r4-r10,pc}
300 .align 5
301 3:
302 ldmia r1, {r4-r5, r10}
303 add r1, r1, r2
304 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
305 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
306 pld [r1]
307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
308 subs r3, r3, #1
309 stmia r0, {r4-r5}
310 add r0, r0, r2
311 bne 3b
312 ldmfd sp!, {r4-r10,pc}
313 .align 5
314 4:
315 ldmia r1, {r4-r5, r10}
316 add r1, r1, r2
317 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
318 pld [r1]
319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
320 subs r3, r3, #1
321 stmia r0, {r8-r9}
322 add r0, r0, r2
323 bne 4b
324 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
325 .endfunc
326
327
328 @ ----------------------------------------------------------------
329 .align 5
330 function put_pixels8_y2_arm, export=1
331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332 @ block = word aligned, pixles = unaligned
333 pld [r1]
334 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
335 mov r3, r3, lsr #1
336 ldr r12, =0xfefefefe
337 JMP_ALIGN r5, r1, 2f, 3f, 4f
338 1:
339 ldmia r1, {r4-r5}
340 add r1, r1, r2
341 6: ldmia r1, {r6-r7}
342 add r1, r1, r2
343 pld [r1]
344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
345 ldmia r1, {r4-r5}
346 add r1, r1, r2
347 stmia r0, {r8-r9}
348 add r0, r0, r2
349 pld [r1]
350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12
351 subs r3, r3, #1
352 stmia r0, {r8-r9}
353 add r0, r0, r2
354 bne 6b
355 ldmfd sp!, {r4-r11,pc}
356 .align 5
357 2:
358 ldmia r1, {r4-r6}
359 add r1, r1, r2
360 pld [r1]
361 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
362 6: ldmia r1, {r7-r9}
363 add r1, r1, r2
364 pld [r1]
365 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
367 stmia r0, {r10-r11}
368 add r0, r0, r2
369 ldmia r1, {r4-r6}
370 add r1, r1, r2
371 pld [r1]
372 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
373 subs r3, r3, #1
374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
375 stmia r0, {r10-r11}
376 add r0, r0, r2
377 bne 6b
378 ldmfd sp!, {r4-r11,pc}
379 .align 5
380 3:
381 ldmia r1, {r4-r6}
382 add r1, r1, r2
383 pld [r1]
384 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
385 6: ldmia r1, {r7-r9}
386 add r1, r1, r2
387 pld [r1]
388 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
390 stmia r0, {r10-r11}
391 add r0, r0, r2
392 ldmia r1, {r4-r6}
393 add r1, r1, r2
394 pld [r1]
395 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
396 subs r3, r3, #1
397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
398 stmia r0, {r10-r11}
399 add r0, r0, r2
400 bne 6b
401 ldmfd sp!, {r4-r11,pc}
402 .align 5
403 4:
404 ldmia r1, {r4-r6}
405 add r1, r1, r2
406 pld [r1]
407 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
408 6: ldmia r1, {r7-r9}
409 add r1, r1, r2
410 pld [r1]
411 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
413 stmia r0, {r10-r11}
414 add r0, r0, r2
415 ldmia r1, {r4-r6}
416 add r1, r1, r2
417 pld [r1]
418 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
419 subs r3, r3, #1
420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
421 stmia r0, {r10-r11}
422 add r0, r0, r2
423 bne 6b
424 ldmfd sp!, {r4-r11,pc}
425 .endfunc
426
427 .align 5
428 function put_no_rnd_pixels8_y2_arm, export=1
429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430 @ block = word aligned, pixles = unaligned
431 pld [r1]
432 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
433 mov r3, r3, lsr #1
434 ldr r12, =0xfefefefe
435 JMP_ALIGN r5, r1, 2f, 3f, 4f
436 1:
437 ldmia r1, {r4-r5}
438 add r1, r1, r2
439 6: ldmia r1, {r6-r7}
440 add r1, r1, r2
441 pld [r1]
442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
443 ldmia r1, {r4-r5}
444 add r1, r1, r2
445 stmia r0, {r8-r9}
446 add r0, r0, r2
447 pld [r1]
448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
449 subs r3, r3, #1
450 stmia r0, {r8-r9}
451 add r0, r0, r2
452 bne 6b
453 ldmfd sp!, {r4-r11,pc}
454 .align 5
455 2:
456 ldmia r1, {r4-r6}
457 add r1, r1, r2
458 pld [r1]
459 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
460 6: ldmia r1, {r7-r9}
461 add r1, r1, r2
462 pld [r1]
463 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
465 stmia r0, {r10-r11}
466 add r0, r0, r2
467 ldmia r1, {r4-r6}
468 add r1, r1, r2
469 pld [r1]
470 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
471 subs r3, r3, #1
472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
473 stmia r0, {r10-r11}
474 add r0, r0, r2
475 bne 6b
476 ldmfd sp!, {r4-r11,pc}
477 .align 5
478 3:
479 ldmia r1, {r4-r6}
480 add r1, r1, r2
481 pld [r1]
482 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
483 6: ldmia r1, {r7-r9}
484 add r1, r1, r2
485 pld [r1]
486 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
488 stmia r0, {r10-r11}
489 add r0, r0, r2
490 ldmia r1, {r4-r6}
491 add r1, r1, r2
492 pld [r1]
493 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
494 subs r3, r3, #1
495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
496 stmia r0, {r10-r11}
497 add r0, r0, r2
498 bne 6b
499 ldmfd sp!, {r4-r11,pc}
500 .align 5
501 4:
502 ldmia r1, {r4-r6}
503 add r1, r1, r2
504 pld [r1]
505 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
506 6: ldmia r1, {r7-r9}
507 add r1, r1, r2
508 pld [r1]
509 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
511 stmia r0, {r10-r11}
512 add r0, r0, r2
513 ldmia r1, {r4-r6}
514 add r1, r1, r2
515 pld [r1]
516 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
517 subs r3, r3, #1
518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
519 stmia r0, {r10-r11}
520 add r0, r0, r2
521 bne 6b
522 ldmfd sp!, {r4-r11,pc}
523 .endfunc
524
525 .ltorg
526
527 @ ----------------------------------------------------------------
528 .macro RND_XY2_IT align, rnd
529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531 .if \align == 0
532 ldmia r1, {r6-r8}
533 .elseif \align == 3
534 ldmia r1, {r5-r7}
535 .else
536 ldmia r1, {r8-r10}
537 .endif
538 add r1, r1, r2
539 pld [r1]
540 .if \align == 0
541 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
542 .elseif \align == 1
543 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
544 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
545 .elseif \align == 2
546 ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
547 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
548 .elseif \align == 3
549 ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
550 .endif
551 ldr r14, =0x03030303
552 tst r3, #1
553 and r8, r4, r14
554 and r9, r5, r14
555 and r10, r6, r14
556 and r11, r7, r14
557 andeq r14, r14, r14, \rnd #1
558 add r8, r8, r10
559 add r9, r9, r11
560 addeq r8, r8, r14
561 addeq r9, r9, r14
562 ldr r14, =0xfcfcfcfc >> 2
563 and r4, r14, r4, lsr #2
564 and r5, r14, r5, lsr #2
565 and r6, r14, r6, lsr #2
566 and r7, r14, r7, lsr #2
567 add r10, r4, r6
568 add r11, r5, r7
569 subs r3, r3, #1
570 .endm
571
572 .macro RND_XY2_EXPAND align, rnd
573 RND_XY2_IT \align, \rnd
574 6: stmfd sp!, {r8-r11}
575 RND_XY2_IT \align, \rnd
576 ldmfd sp!, {r4-r7}
577 add r4, r4, r8
578 add r5, r5, r9
579 add r6, r6, r10
580 add r7, r7, r11
581 ldr r14, =0x0f0f0f0f
582 and r4, r14, r4, lsr #2
583 and r5, r14, r5, lsr #2
584 add r4, r4, r6
585 add r5, r5, r7
586 stmia r0, {r4-r5}
587 add r0, r0, r2
588 bge 6b
589 ldmfd sp!, {r4-r11,pc}
590 .endm
591
592 .align 5
593 function put_pixels8_xy2_arm, export=1
594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595 @ block = word aligned, pixles = unaligned
596 pld [r1]
597 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
598 JMP_ALIGN r5, r1, 2f, 3f, 4f
599 1:
600 RND_XY2_EXPAND 0, lsl
601
602 .align 5
603 2:
604 RND_XY2_EXPAND 1, lsl
605
606 .align 5
607 3:
608 RND_XY2_EXPAND 2, lsl
609
610 .align 5
611 4:
612 RND_XY2_EXPAND 3, lsl
613 .endfunc
614
615 .align 5
616 function put_no_rnd_pixels8_xy2_arm, export=1
617 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
618 @ block = word aligned, pixles = unaligned
619 pld [r1]
620 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
621 JMP_ALIGN r5, r1, 2f, 3f, 4f
622 1:
623 RND_XY2_EXPAND 0, lsr
624
625 .align 5
626 2:
627 RND_XY2_EXPAND 1, lsr
628
629 .align 5
630 3:
631 RND_XY2_EXPAND 2, lsr
632
633 .align 5
634 4:
635 RND_XY2_EXPAND 3, lsr
636 .endfunc
637
638 .align 5
639 @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
640 function ff_add_pixels_clamped_ARM, export=1
641 push {r4-r10}
642 mov r10, #8
643 1:
644 ldr r4, [r1] /* load dest */
645 /* block[0] and block[1]*/
646 ldrsh r5, [r0]
647 ldrsh r7, [r0, #2]
648 and r6, r4, #0xFF
649 and r8, r4, #0xFF00
650 add r6, r5, r6
651 add r8, r7, r8, lsr #8
652 mvn r5, r5
653 mvn r7, r7
654 tst r6, #0x100
655 movne r6, r5, lsr #24
656 tst r8, #0x100
657 movne r8, r7, lsr #24
658 mov r9, r6
659 ldrsh r5, [r0, #4] /* moved form [A] */
660 orr r9, r9, r8, lsl #8
661 /* block[2] and block[3] */
662 /* [A] */
663 ldrsh r7, [r0, #6]
664 and r6, r4, #0xFF0000
665 and r8, r4, #0xFF000000
666 add r6, r5, r6, lsr #16
667 add r8, r7, r8, lsr #24
668 mvn r5, r5
669 mvn r7, r7
670 tst r6, #0x100
671 movne r6, r5, lsr #24
672 tst r8, #0x100
673 movne r8, r7, lsr #24
674 orr r9, r9, r6, lsl #16
675 ldr r4, [r1, #4] /* moved form [B] */
676 orr r9, r9, r8, lsl #24
677 /* store dest */
678 ldrsh r5, [r0, #8] /* moved form [C] */
679 str r9, [r1]
680
681 /* load dest */
682 /* [B] */
683 /* block[4] and block[5] */
684 /* [C] */
685 ldrsh r7, [r0, #10]
686 and r6, r4, #0xFF
687 and r8, r4, #0xFF00
688 add r6, r5, r6
689 add r8, r7, r8, lsr #8
690 mvn r5, r5
691 mvn r7, r7
692 tst r6, #0x100
693 movne r6, r5, lsr #24
694 tst r8, #0x100
695 movne r8, r7, lsr #24
696 mov r9, r6
697 ldrsh r5, [r0, #12] /* moved from [D] */
698 orr r9, r9, r8, lsl #8
699 /* block[6] and block[7] */
700 /* [D] */
701 ldrsh r7, [r0, #14]
702 and r6, r4, #0xFF0000
703 and r8, r4, #0xFF000000
704 add r6, r5, r6, lsr #16
705 add r8, r7, r8, lsr #24
706 mvn r5, r5
707 mvn r7, r7
708 tst r6, #0x100
709 movne r6, r5, lsr #24
710 tst r8, #0x100
711 movne r8, r7, lsr #24
712 orr r9, r9, r6, lsl #16
713 add r0, r0, #16 /* moved from [E] */
714 orr r9, r9, r8, lsl #24
715 subs r10, r10, #1 /* moved from [F] */
716 /* store dest */
717 str r9, [r1, #4]
718
719 /* [E] */
720 /* [F] */
721 add r1, r1, r2
722 bne 1b
723
724 pop {r4-r10}
725 bx lr
726 .endfunc