9425d33286c77b19848049f3acaac2f6fb18c607
[libav.git] / libavcodec / arm / dsputil_arm_s.S
1 @
2 @ ARMv4 optimized DSP utils
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4 @
5 @ This file is part of FFmpeg.
6 @
7 @ FFmpeg is free software; you can redistribute it and/or
8 @ modify it under the terms of the GNU Lesser General Public
9 @ License as published by the Free Software Foundation; either
10 @ version 2.1 of the License, or (at your option) any later version.
11 @
12 @ FFmpeg is distributed in the hope that it will be useful,
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 @ Lesser General Public License for more details.
16 @
17 @ You should have received a copy of the GNU Lesser General Public
18 @ License along with FFmpeg; if not, write to the Free Software
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 @
21
22 #include "config.h"
23 #include "asm.S"
24
25 preserve8
26
27 #if !HAVE_PLD
28 .macro pld reg
29 .endm
30 #endif
31
32 #if HAVE_ARMV5TE
33 function ff_prefetch_arm, export=1
34 subs r2, r2, #1
35 pld [r0]
36 add r0, r0, r1
37 bne ff_prefetch_arm
38 bx lr
39 .endfunc
40 #endif
41
42 .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43 mov \Rd0, \Rn0, lsr #(\shift * 8)
44 mov \Rd1, \Rn1, lsr #(\shift * 8)
45 mov \Rd2, \Rn2, lsr #(\shift * 8)
46 mov \Rd3, \Rn3, lsr #(\shift * 8)
47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51 .endm
52 .macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
53 mov \R0, \R0, lsr #(\shift * 8)
54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
55 mov \R1, \R1, lsr #(\shift * 8)
56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
57 .endm
58 .macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63 .endm
64
65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67 @ Rmask = 0xFEFEFEFE
68 @ Rn = destroy
69 eor \Rd0, \Rn0, \Rm0
70 eor \Rd1, \Rn1, \Rm1
71 orr \Rn0, \Rn0, \Rm0
72 orr \Rn1, \Rn1, \Rm1
73 and \Rd0, \Rd0, \Rmask
74 and \Rd1, \Rd1, \Rmask
75 sub \Rd0, \Rn0, \Rd0, lsr #1
76 sub \Rd1, \Rn1, \Rd1, lsr #1
77 .endm
78
79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81 @ Rmask = 0xFEFEFEFE
82 @ Rn = destroy
83 eor \Rd0, \Rn0, \Rm0
84 eor \Rd1, \Rn1, \Rm1
85 and \Rn0, \Rn0, \Rm0
86 and \Rn1, \Rn1, \Rm1
87 and \Rd0, \Rd0, \Rmask
88 and \Rd1, \Rd1, \Rmask
89 add \Rd0, \Rn0, \Rd0, lsr #1
90 add \Rd1, \Rn1, \Rd1, lsr #1
91 .endm
92
93 @ ----------------------------------------------------------------
94 .align 8
95 function put_pixels16_arm, export=1
96 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
97 @ block = word aligned, pixles = unaligned
98 pld [r1]
99 stmfd sp!, {r4-r11, lr} @ R14 is also called LR
100 adr r5, 5f
101 ands r4, r1, #3
102 bic r1, r1, #3
103 add r5, r5, r4, lsl #2
104 ldrne pc, [r5]
105 1:
106 ldmia r1, {r4-r7}
107 add r1, r1, r2
108 stmia r0, {r4-r7}
109 pld [r1]
110 subs r3, r3, #1
111 add r0, r0, r2
112 bne 1b
113 ldmfd sp!, {r4-r11, pc}
114 .align 8
115 2:
116 ldmia r1, {r4-r8}
117 add r1, r1, r2
118 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
119 pld [r1]
120 subs r3, r3, #1
121 stmia r0, {r9-r12}
122 add r0, r0, r2
123 bne 2b
124 ldmfd sp!, {r4-r11, pc}
125 .align 8
126 3:
127 ldmia r1, {r4-r8}
128 add r1, r1, r2
129 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
130 pld [r1]
131 subs r3, r3, #1
132 stmia r0, {r9-r12}
133 add r0, r0, r2
134 bne 3b
135 ldmfd sp!, {r4-r11, pc}
136 .align 8
137 4:
138 ldmia r1, {r4-r8}
139 add r1, r1, r2
140 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
141 pld [r1]
142 subs r3, r3, #1
143 stmia r0, {r9-r12}
144 add r0, r0, r2
145 bne 4b
146 ldmfd sp!, {r4-r11,pc}
147 .align 8
148 5:
149 .word 1b
150 .word 2b
151 .word 3b
152 .word 4b
153 .endfunc
154
155 @ ----------------------------------------------------------------
156 .align 8
157 function put_pixels8_arm, export=1
158 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
159 @ block = word aligned, pixles = unaligned
160 pld [r1]
161 stmfd sp!, {r4-r5,lr} @ R14 is also called LR
162 adr r5, 5f
163 ands r4, r1, #3
164 bic r1, r1, #3
165 add r5, r5, r4, lsl #2
166 ldrne pc, [r5]
167 1:
168 ldmia r1, {r4-r5}
169 add r1, r1, r2
170 subs r3, r3, #1
171 pld [r1]
172 stmia r0, {r4-r5}
173 add r0, r0, r2
174 bne 1b
175 ldmfd sp!, {r4-r5,pc}
176 .align 8
177 2:
178 ldmia r1, {r4-r5, r12}
179 add r1, r1, r2
180 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
181 pld [r1]
182 subs r3, r3, #1
183 stmia r0, {r4-r5}
184 add r0, r0, r2
185 bne 2b
186 ldmfd sp!, {r4-r5,pc}
187 .align 8
188 3:
189 ldmia r1, {r4-r5, r12}
190 add r1, r1, r2
191 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
192 pld [r1]
193 subs r3, r3, #1
194 stmia r0, {r4-r5}
195 add r0, r0, r2
196 bne 3b
197 ldmfd sp!, {r4-r5,pc}
198 .align 8
199 4:
200 ldmia r1, {r4-r5, r12}
201 add r1, r1, r2
202 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
203 pld [r1]
204 subs r3, r3, #1
205 stmia r0, {r4-r5}
206 add r0, r0, r2
207 bne 4b
208 ldmfd sp!, {r4-r5,pc}
209 .align 8
210 5:
211 .word 1b
212 .word 2b
213 .word 3b
214 .word 4b
215 .endfunc
216
217 @ ----------------------------------------------------------------
218 .align 8
219 function put_pixels8_x2_arm, export=1
220 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
221 @ block = word aligned, pixles = unaligned
222 pld [r1]
223 stmfd sp!, {r4-r10,lr} @ R14 is also called LR
224 adr r5, 5f
225 ands r4, r1, #3
226 ldr r12, [r5]
227 add r5, r5, r4, lsl #2
228 bic r1, r1, #3
229 ldrne pc, [r5]
230 1:
231 ldmia r1, {r4-r5, r10}
232 add r1, r1, r2
233 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
234 pld [r1]
235 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
236 subs r3, r3, #1
237 stmia r0, {r8-r9}
238 add r0, r0, r2
239 bne 1b
240 ldmfd sp!, {r4-r10,pc}
241 .align 8
242 2:
243 ldmia r1, {r4-r5, r10}
244 add r1, r1, r2
245 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
246 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
247 pld [r1]
248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
249 subs r3, r3, #1
250 stmia r0, {r4-r5}
251 add r0, r0, r2
252 bne 2b
253 ldmfd sp!, {r4-r10,pc}
254 .align 8
255 3:
256 ldmia r1, {r4-r5, r10}
257 add r1, r1, r2
258 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
259 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
260 pld [r1]
261 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
262 subs r3, r3, #1
263 stmia r0, {r4-r5}
264 add r0, r0, r2
265 bne 3b
266 ldmfd sp!, {r4-r10,pc}
267 .align 8
268 4:
269 ldmia r1, {r4-r5, r10}
270 add r1, r1, r2
271 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
272 pld [r1]
273 RND_AVG32 r8, r9, r6, r7, r5, r10, r12
274 subs r3, r3, #1
275 stmia r0, {r8-r9}
276 add r0, r0, r2
277 bne 4b
278 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
279 .align 8
280 5:
281 .word 0xFEFEFEFE
282 .word 2b
283 .word 3b
284 .word 4b
285 .endfunc
286
287 .align 8
288 function put_no_rnd_pixels8_x2_arm, export=1
289 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
290 @ block = word aligned, pixles = unaligned
291 pld [r1]
292 stmfd sp!, {r4-r10,lr} @ R14 is also called LR
293 adr r5, 5f
294 ands r4, r1, #3
295 ldr r12, [r5]
296 add r5, r5, r4, lsl #2
297 bic r1, r1, #3
298 ldrne pc, [r5]
299 1:
300 ldmia r1, {r4-r5, r10}
301 add r1, r1, r2
302 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
303 pld [r1]
304 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
305 subs r3, r3, #1
306 stmia r0, {r8-r9}
307 add r0, r0, r2
308 bne 1b
309 ldmfd sp!, {r4-r10,pc}
310 .align 8
311 2:
312 ldmia r1, {r4-r5, r10}
313 add r1, r1, r2
314 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
315 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
316 pld [r1]
317 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
318 subs r3, r3, #1
319 stmia r0, {r4-r5}
320 add r0, r0, r2
321 bne 2b
322 ldmfd sp!, {r4-r10,pc}
323 .align 8
324 3:
325 ldmia r1, {r4-r5, r10}
326 add r1, r1, r2
327 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
328 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
329 pld [r1]
330 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
331 subs r3, r3, #1
332 stmia r0, {r4-r5}
333 add r0, r0, r2
334 bne 3b
335 ldmfd sp!, {r4-r10,pc}
336 .align 8
337 4:
338 ldmia r1, {r4-r5, r10}
339 add r1, r1, r2
340 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
341 pld [r1]
342 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
343 subs r3, r3, #1
344 stmia r0, {r8-r9}
345 add r0, r0, r2
346 bne 4b
347 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
348 .align 8
349 5:
350 .word 0xFEFEFEFE
351 .word 2b
352 .word 3b
353 .word 4b
354 .endfunc
355
356
357 @ ----------------------------------------------------------------
358 .align 8
359 function put_pixels8_y2_arm, export=1
360 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
361 @ block = word aligned, pixles = unaligned
362 pld [r1]
363 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
364 adr r5, 5f
365 ands r4, r1, #3
366 mov r3, r3, lsr #1
367 ldr r12, [r5]
368 add r5, r5, r4, lsl #2
369 bic r1, r1, #3
370 ldrne pc, [r5]
371 1:
372 ldmia r1, {r4-r5}
373 add r1, r1, r2
374 6: ldmia r1, {r6-r7}
375 add r1, r1, r2
376 pld [r1]
377 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
378 ldmia r1, {r4-r5}
379 add r1, r1, r2
380 stmia r0, {r8-r9}
381 add r0, r0, r2
382 pld [r1]
383 RND_AVG32 r8, r9, r6, r7, r4, r5, r12
384 subs r3, r3, #1
385 stmia r0, {r8-r9}
386 add r0, r0, r2
387 bne 6b
388 ldmfd sp!, {r4-r11,pc}
389 .align 8
390 2:
391 ldmia r1, {r4-r6}
392 add r1, r1, r2
393 pld [r1]
394 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
395 6: ldmia r1, {r7-r9}
396 add r1, r1, r2
397 pld [r1]
398 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
399 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
400 stmia r0, {r10-r11}
401 add r0, r0, r2
402 ldmia r1, {r4-r6}
403 add r1, r1, r2
404 pld [r1]
405 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
406 subs r3, r3, #1
407 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
408 stmia r0, {r10-r11}
409 add r0, r0, r2
410 bne 6b
411 ldmfd sp!, {r4-r11,pc}
412 .align 8
413 3:
414 ldmia r1, {r4-r6}
415 add r1, r1, r2
416 pld [r1]
417 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
418 6: ldmia r1, {r7-r9}
419 add r1, r1, r2
420 pld [r1]
421 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
422 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
423 stmia r0, {r10-r11}
424 add r0, r0, r2
425 ldmia r1, {r4-r6}
426 add r1, r1, r2
427 pld [r1]
428 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
429 subs r3, r3, #1
430 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
431 stmia r0, {r10-r11}
432 add r0, r0, r2
433 bne 6b
434 ldmfd sp!, {r4-r11,pc}
435 .align 8
436 4:
437 ldmia r1, {r4-r6}
438 add r1, r1, r2
439 pld [r1]
440 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
441 6: ldmia r1, {r7-r9}
442 add r1, r1, r2
443 pld [r1]
444 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
445 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
446 stmia r0, {r10-r11}
447 add r0, r0, r2
448 ldmia r1, {r4-r6}
449 add r1, r1, r2
450 pld [r1]
451 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
452 subs r3, r3, #1
453 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
454 stmia r0, {r10-r11}
455 add r0, r0, r2
456 bne 6b
457 ldmfd sp!, {r4-r11,pc}
458
459 .align 8
460 5:
461 .word 0xFEFEFEFE
462 .word 2b
463 .word 3b
464 .word 4b
465 .endfunc
466
467 .align 8
468 function put_no_rnd_pixels8_y2_arm, export=1
469 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
470 @ block = word aligned, pixles = unaligned
471 pld [r1]
472 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
473 adr r5, 5f
474 ands r4, r1, #3
475 mov r3, r3, lsr #1
476 ldr r12, [r5]
477 add r5, r5, r4, lsl #2
478 bic r1, r1, #3
479 ldrne pc, [r5]
480 1:
481 ldmia r1, {r4-r5}
482 add r1, r1, r2
483 6: ldmia r1, {r6-r7}
484 add r1, r1, r2
485 pld [r1]
486 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
487 ldmia r1, {r4-r5}
488 add r1, r1, r2
489 stmia r0, {r8-r9}
490 add r0, r0, r2
491 pld [r1]
492 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
493 subs r3, r3, #1
494 stmia r0, {r8-r9}
495 add r0, r0, r2
496 bne 6b
497 ldmfd sp!, {r4-r11,pc}
498 .align 8
499 2:
500 ldmia r1, {r4-r6}
501 add r1, r1, r2
502 pld [r1]
503 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
504 6: ldmia r1, {r7-r9}
505 add r1, r1, r2
506 pld [r1]
507 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
508 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
509 stmia r0, {r10-r11}
510 add r0, r0, r2
511 ldmia r1, {r4-r6}
512 add r1, r1, r2
513 pld [r1]
514 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
515 subs r3, r3, #1
516 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
517 stmia r0, {r10-r11}
518 add r0, r0, r2
519 bne 6b
520 ldmfd sp!, {r4-r11,pc}
521 .align 8
522 3:
523 ldmia r1, {r4-r6}
524 add r1, r1, r2
525 pld [r1]
526 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
527 6: ldmia r1, {r7-r9}
528 add r1, r1, r2
529 pld [r1]
530 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
531 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
532 stmia r0, {r10-r11}
533 add r0, r0, r2
534 ldmia r1, {r4-r6}
535 add r1, r1, r2
536 pld [r1]
537 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
538 subs r3, r3, #1
539 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
540 stmia r0, {r10-r11}
541 add r0, r0, r2
542 bne 6b
543 ldmfd sp!, {r4-r11,pc}
544 .align 8
545 4:
546 ldmia r1, {r4-r6}
547 add r1, r1, r2
548 pld [r1]
549 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
550 6: ldmia r1, {r7-r9}
551 add r1, r1, r2
552 pld [r1]
553 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
554 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
555 stmia r0, {r10-r11}
556 add r0, r0, r2
557 ldmia r1, {r4-r6}
558 add r1, r1, r2
559 pld [r1]
560 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
561 subs r3, r3, #1
562 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
563 stmia r0, {r10-r11}
564 add r0, r0, r2
565 bne 6b
566 ldmfd sp!, {r4-r11,pc}
567 .align 8
568 5:
569 .word 0xFEFEFEFE
570 .word 2b
571 .word 3b
572 .word 4b
573 .endfunc
574
575 @ ----------------------------------------------------------------
576 .macro RND_XY2_IT align
577 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
578 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
579 .if \align == 0
580 ldmia r1, {r6-r8}
581 .elseif \align == 3
582 ldmia r1, {r5-r7}
583 .else
584 ldmia r1, {r8-r10}
585 .endif
586 add r1, r1, r2
587 pld [r1]
588 .if \align == 0
589 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
590 .elseif \align == 1
591 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
592 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
593 .elseif \align == 2
594 ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
595 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
596 .elseif \align == 3
597 ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
598 .endif
599 ldr r14, [r12, #0] @ 0x03030303
600 tst r3, #1
601 and r8, r4, r14
602 and r9, r5, r14
603 and r10, r6, r14
604 and r11, r7, r14
605 ldreq r14, [r12, #16] @ 0x02020202/0x01010101
606 add r8, r8, r10
607 add r9, r9, r11
608 addeq r8, r8, r14
609 addeq r9, r9, r14
610 ldr r14, [r12, #20] @ 0xFCFCFCFC >> 2
611 and r4, r14, r4, lsr #2
612 and r5, r14, r5, lsr #2
613 and r6, r14, r6, lsr #2
614 and r7, r14, r7, lsr #2
615 add r10, r4, r6
616 add r11, r5, r7
617 subs r3, r3, #1
618 .endm
619
620 .macro RND_XY2_EXPAND align
621 RND_XY2_IT \align
622 6: stmfd sp!, {r8-r11}
623 RND_XY2_IT \align
624 ldmfd sp!, {r4-r7}
625 add r4, r4, r8
626 add r5, r5, r9
627 add r6, r6, r10
628 add r7, r7, r11
629 ldr r14, [r12, #24] @ 0x0F0F0F0F
630 and r4, r14, r4, lsr #2
631 and r5, r14, r5, lsr #2
632 add r4, r4, r6
633 add r5, r5, r7
634 stmia r0, {r4-r5}
635 add r0, r0, r2
636 bge 6b
637 ldmfd sp!, {r4-r11,pc}
638 .endm
639
640 .align 8
641 function put_pixels8_xy2_arm, export=1
642 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
643 @ block = word aligned, pixles = unaligned
644 pld [r1]
645 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
646 adrl r12, 5f
647 ands r4, r1, #3
648 add r5, r12, r4, lsl #2
649 bic r1, r1, #3
650 ldrne pc, [r5]
651 1:
652 RND_XY2_EXPAND 0
653
654 .align 8
655 2:
656 RND_XY2_EXPAND 1
657
658 .align 8
659 3:
660 RND_XY2_EXPAND 2
661
662 .align 8
663 4:
664 RND_XY2_EXPAND 3
665
666 5:
667 .word 0x03030303
668 .word 2b
669 .word 3b
670 .word 4b
671 .word 0x02020202
672 .word 0xFCFCFCFC >> 2
673 .word 0x0F0F0F0F
674 .endfunc
675
676 .align 8
677 function put_no_rnd_pixels8_xy2_arm, export=1
678 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
679 @ block = word aligned, pixles = unaligned
680 pld [r1]
681 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
682 adrl r12, 5f
683 ands r4, r1, #3
684 add r5, r12, r4, lsl #2
685 bic r1, r1, #3
686 ldrne pc, [r5]
687 1:
688 RND_XY2_EXPAND 0
689
690 .align 8
691 2:
692 RND_XY2_EXPAND 1
693
694 .align 8
695 3:
696 RND_XY2_EXPAND 2
697
698 .align 8
699 4:
700 RND_XY2_EXPAND 3
701
702 5:
703 .word 0x03030303
704 .word 2b
705 .word 3b
706 .word 4b
707 .word 0x01010101
708 .word 0xFCFCFCFC >> 2
709 .word 0x0F0F0F0F
710 .endfunc
711
712 @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
713 function ff_add_pixels_clamped_ARM, export=1
714 push {r4-r10}
715 mov r10, #8
716 1:
717 ldr r4, [r1] /* load dest */
718 /* block[0] and block[1]*/
719 ldrsh r5, [r0]
720 ldrsh r7, [r0, #2]
721 and r6, r4, #0xFF
722 and r8, r4, #0xFF00
723 add r6, r5, r6
724 add r8, r7, r8, lsr #8
725 mvn r5, r5
726 mvn r7, r7
727 tst r6, #0x100
728 movne r6, r5, lsr #24
729 tst r8, #0x100
730 movne r8, r7, lsr #24
731 mov r9, r6
732 ldrsh r5, [r0, #4] /* moved form [A] */
733 orr r9, r9, r8, lsl #8
734 /* block[2] and block[3] */
735 /* [A] */
736 ldrsh r7, [r0, #6]
737 and r6, r4, #0xFF0000
738 and r8, r4, #0xFF000000
739 add r6, r5, r6, lsr #16
740 add r8, r7, r8, lsr #24
741 mvn r5, r5
742 mvn r7, r7
743 tst r6, #0x100
744 movne r6, r5, lsr #24
745 tst r8, #0x100
746 movne r8, r7, lsr #24
747 orr r9, r9, r6, lsl #16
748 ldr r4, [r1, #4] /* moved form [B] */
749 orr r9, r9, r8, lsl #24
750 /* store dest */
751 ldrsh r5, [r0, #8] /* moved form [C] */
752 str r9, [r1]
753
754 /* load dest */
755 /* [B] */
756 /* block[4] and block[5] */
757 /* [C] */
758 ldrsh r7, [r0, #10]
759 and r6, r4, #0xFF
760 and r8, r4, #0xFF00
761 add r6, r5, r6
762 add r8, r7, r8, lsr #8
763 mvn r5, r5
764 mvn r7, r7
765 tst r6, #0x100
766 movne r6, r5, lsr #24
767 tst r8, #0x100
768 movne r8, r7, lsr #24
769 mov r9, r6
770 ldrsh r5, [r0, #12] /* moved from [D] */
771 orr r9, r9, r8, lsl #8
772 /* block[6] and block[7] */
773 /* [D] */
774 ldrsh r7, [r0, #14]
775 and r6, r4, #0xFF0000
776 and r8, r4, #0xFF000000
777 add r6, r5, r6, lsr #16
778 add r8, r7, r8, lsr #24
779 mvn r5, r5
780 mvn r7, r7
781 tst r6, #0x100
782 movne r6, r5, lsr #24
783 tst r8, #0x100
784 movne r8, r7, lsr #24
785 orr r9, r9, r6, lsl #16
786 add r0, r0, #16 /* moved from [E] */
787 orr r9, r9, r8, lsl #24
788 subs r10, r10, #1 /* moved from [F] */
789 /* store dest */
790 str r9, [r1, #4]
791
792 /* [E] */
793 /* [F] */
794 add r1, r1, r2
795 bne 1b
796
797 pop {r4-r10}
798 bx lr
799 .endfunc