ARM: set size of asm functions in object files
[libav.git] / libavcodec / arm / dsputil_arm.S
1 @
2 @ ARMv4 optimized DSP utils
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4 @
5 @ This file is part of FFmpeg.
6 @
7 @ FFmpeg is free software; you can redistribute it and/or
8 @ modify it under the terms of the GNU Lesser General Public
9 @ License as published by the Free Software Foundation; either
10 @ version 2.1 of the License, or (at your option) any later version.
11 @
12 @ FFmpeg is distributed in the hope that it will be useful,
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 @ Lesser General Public License for more details.
16 @
17 @ You should have received a copy of the GNU Lesser General Public
18 @ License along with FFmpeg; if not, write to the Free Software
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 @
21
22 #include "config.h"
23 #include "asm.S"
24
25 preserve8
26
27 #if !HAVE_PLD
28 .macro pld reg
29 .endm
30 #endif
31
32 #if HAVE_ARMV5TE
33 function ff_prefetch_arm, export=1
34 subs r2, r2, #1
35 pld [r0]
36 add r0, r0, r1
37 bne ff_prefetch_arm
38 bx lr
39 endfunc
40 #endif
41
42 .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43 mov \Rd0, \Rn0, lsr #(\shift * 8)
44 mov \Rd1, \Rn1, lsr #(\shift * 8)
45 mov \Rd2, \Rn2, lsr #(\shift * 8)
46 mov \Rd3, \Rn3, lsr #(\shift * 8)
47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51 .endm
52 .macro ALIGN_DWORD shift, R0, R1, R2
53 mov \R0, \R0, lsr #(\shift * 8)
54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
55 mov \R1, \R1, lsr #(\shift * 8)
56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
57 .endm
58 .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63 .endm
64
65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67 @ Rmask = 0xFEFEFEFE
68 @ Rn = destroy
69 eor \Rd0, \Rn0, \Rm0
70 eor \Rd1, \Rn1, \Rm1
71 orr \Rn0, \Rn0, \Rm0
72 orr \Rn1, \Rn1, \Rm1
73 and \Rd0, \Rd0, \Rmask
74 and \Rd1, \Rd1, \Rmask
75 sub \Rd0, \Rn0, \Rd0, lsr #1
76 sub \Rd1, \Rn1, \Rd1, lsr #1
77 .endm
78
79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81 @ Rmask = 0xFEFEFEFE
82 @ Rn = destroy
83 eor \Rd0, \Rn0, \Rm0
84 eor \Rd1, \Rn1, \Rm1
85 and \Rn0, \Rn0, \Rm0
86 and \Rn1, \Rn1, \Rm1
87 and \Rd0, \Rd0, \Rmask
88 and \Rd1, \Rd1, \Rmask
89 add \Rd0, \Rn0, \Rd0, lsr #1
90 add \Rd1, \Rn1, \Rd1, lsr #1
91 .endm
92
93 .macro JMP_ALIGN tmp, reg
94 ands \tmp, \reg, #3
95 bic \reg, \reg, #3
96 beq 1f
97 subs \tmp, \tmp, #1
98 beq 2f
99 subs \tmp, \tmp, #1
100 beq 3f
101 b 4f
102 .endm
103
104 @ ----------------------------------------------------------------
105 .align 5
106 function ff_put_pixels16_arm, export=1
107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108 @ block = word aligned, pixles = unaligned
109 pld [r1]
110 push {r4-r11, lr}
111 JMP_ALIGN r5, r1
112 1:
113 ldm r1, {r4-r7}
114 add r1, r1, r2
115 stm r0, {r4-r7}
116 pld [r1]
117 subs r3, r3, #1
118 add r0, r0, r2
119 bne 1b
120 pop {r4-r11, pc}
121 .align 5
122 2:
123 ldm r1, {r4-r8}
124 add r1, r1, r2
125 ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
126 pld [r1]
127 subs r3, r3, #1
128 stm r0, {r9-r12}
129 add r0, r0, r2
130 bne 2b
131 pop {r4-r11, pc}
132 .align 5
133 3:
134 ldm r1, {r4-r8}
135 add r1, r1, r2
136 ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
137 pld [r1]
138 subs r3, r3, #1
139 stm r0, {r9-r12}
140 add r0, r0, r2
141 bne 3b
142 pop {r4-r11, pc}
143 .align 5
144 4:
145 ldm r1, {r4-r8}
146 add r1, r1, r2
147 ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
148 pld [r1]
149 subs r3, r3, #1
150 stm r0, {r9-r12}
151 add r0, r0, r2
152 bne 4b
153 pop {r4-r11,pc}
154 endfunc
155
156 @ ----------------------------------------------------------------
157 .align 5
158 function ff_put_pixels8_arm, export=1
159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160 @ block = word aligned, pixles = unaligned
161 pld [r1]
162 push {r4-r5,lr}
163 JMP_ALIGN r5, r1
164 1:
165 ldm r1, {r4-r5}
166 add r1, r1, r2
167 subs r3, r3, #1
168 pld [r1]
169 stm r0, {r4-r5}
170 add r0, r0, r2
171 bne 1b
172 pop {r4-r5,pc}
173 .align 5
174 2:
175 ldm r1, {r4-r5, r12}
176 add r1, r1, r2
177 ALIGN_DWORD 1, r4, r5, r12
178 pld [r1]
179 subs r3, r3, #1
180 stm r0, {r4-r5}
181 add r0, r0, r2
182 bne 2b
183 pop {r4-r5,pc}
184 .align 5
185 3:
186 ldm r1, {r4-r5, r12}
187 add r1, r1, r2
188 ALIGN_DWORD 2, r4, r5, r12
189 pld [r1]
190 subs r3, r3, #1
191 stm r0, {r4-r5}
192 add r0, r0, r2
193 bne 3b
194 pop {r4-r5,pc}
195 .align 5
196 4:
197 ldm r1, {r4-r5, r12}
198 add r1, r1, r2
199 ALIGN_DWORD 3, r4, r5, r12
200 pld [r1]
201 subs r3, r3, #1
202 stm r0, {r4-r5}
203 add r0, r0, r2
204 bne 4b
205 pop {r4-r5,pc}
206 endfunc
207
208 @ ----------------------------------------------------------------
209 .align 5
210 function ff_put_pixels8_x2_arm, export=1
211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212 @ block = word aligned, pixles = unaligned
213 pld [r1]
214 push {r4-r10,lr}
215 ldr r12, =0xfefefefe
216 JMP_ALIGN r5, r1
217 1:
218 ldm r1, {r4-r5, r10}
219 add r1, r1, r2
220 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
221 pld [r1]
222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
223 subs r3, r3, #1
224 stm r0, {r8-r9}
225 add r0, r0, r2
226 bne 1b
227 pop {r4-r10,pc}
228 .align 5
229 2:
230 ldm r1, {r4-r5, r10}
231 add r1, r1, r2
232 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
233 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
234 pld [r1]
235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
236 subs r3, r3, #1
237 stm r0, {r4-r5}
238 add r0, r0, r2
239 bne 2b
240 pop {r4-r10,pc}
241 .align 5
242 3:
243 ldm r1, {r4-r5, r10}
244 add r1, r1, r2
245 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
246 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
247 pld [r1]
248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
249 subs r3, r3, #1
250 stm r0, {r4-r5}
251 add r0, r0, r2
252 bne 3b
253 pop {r4-r10,pc}
254 .align 5
255 4:
256 ldm r1, {r4-r5, r10}
257 add r1, r1, r2
258 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
259 pld [r1]
260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12
261 subs r3, r3, #1
262 stm r0, {r8-r9}
263 add r0, r0, r2
264 bne 4b
265 pop {r4-r10,pc}
266 endfunc
267
268 .align 5
269 function ff_put_no_rnd_pixels8_x2_arm, export=1
270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271 @ block = word aligned, pixles = unaligned
272 pld [r1]
273 push {r4-r10,lr}
274 ldr r12, =0xfefefefe
275 JMP_ALIGN r5, r1
276 1:
277 ldm r1, {r4-r5, r10}
278 add r1, r1, r2
279 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
280 pld [r1]
281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
282 subs r3, r3, #1
283 stm r0, {r8-r9}
284 add r0, r0, r2
285 bne 1b
286 pop {r4-r10,pc}
287 .align 5
288 2:
289 ldm r1, {r4-r5, r10}
290 add r1, r1, r2
291 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
292 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
293 pld [r1]
294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
295 subs r3, r3, #1
296 stm r0, {r4-r5}
297 add r0, r0, r2
298 bne 2b
299 pop {r4-r10,pc}
300 .align 5
301 3:
302 ldm r1, {r4-r5, r10}
303 add r1, r1, r2
304 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
305 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
306 pld [r1]
307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
308 subs r3, r3, #1
309 stm r0, {r4-r5}
310 add r0, r0, r2
311 bne 3b
312 pop {r4-r10,pc}
313 .align 5
314 4:
315 ldm r1, {r4-r5, r10}
316 add r1, r1, r2
317 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
318 pld [r1]
319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
320 subs r3, r3, #1
321 stm r0, {r8-r9}
322 add r0, r0, r2
323 bne 4b
324 pop {r4-r10,pc}
325 endfunc
326
327
328 @ ----------------------------------------------------------------
329 .align 5
330 function ff_put_pixels8_y2_arm, export=1
331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332 @ block = word aligned, pixles = unaligned
333 pld [r1]
334 push {r4-r11,lr}
335 mov r3, r3, lsr #1
336 ldr r12, =0xfefefefe
337 JMP_ALIGN r5, r1
338 1:
339 ldm r1, {r4-r5}
340 add r1, r1, r2
341 6: ldm r1, {r6-r7}
342 add r1, r1, r2
343 pld [r1]
344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
345 ldm r1, {r4-r5}
346 add r1, r1, r2
347 stm r0, {r8-r9}
348 add r0, r0, r2
349 pld [r1]
350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12
351 subs r3, r3, #1
352 stm r0, {r8-r9}
353 add r0, r0, r2
354 bne 6b
355 pop {r4-r11,pc}
356 .align 5
357 2:
358 ldm r1, {r4-r6}
359 add r1, r1, r2
360 pld [r1]
361 ALIGN_DWORD 1, r4, r5, r6
362 6: ldm r1, {r7-r9}
363 add r1, r1, r2
364 pld [r1]
365 ALIGN_DWORD 1, r7, r8, r9
366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
367 stm r0, {r10-r11}
368 add r0, r0, r2
369 ldm r1, {r4-r6}
370 add r1, r1, r2
371 pld [r1]
372 ALIGN_DWORD 1, r4, r5, r6
373 subs r3, r3, #1
374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
375 stm r0, {r10-r11}
376 add r0, r0, r2
377 bne 6b
378 pop {r4-r11,pc}
379 .align 5
380 3:
381 ldm r1, {r4-r6}
382 add r1, r1, r2
383 pld [r1]
384 ALIGN_DWORD 2, r4, r5, r6
385 6: ldm r1, {r7-r9}
386 add r1, r1, r2
387 pld [r1]
388 ALIGN_DWORD 2, r7, r8, r9
389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
390 stm r0, {r10-r11}
391 add r0, r0, r2
392 ldm r1, {r4-r6}
393 add r1, r1, r2
394 pld [r1]
395 ALIGN_DWORD 2, r4, r5, r6
396 subs r3, r3, #1
397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
398 stm r0, {r10-r11}
399 add r0, r0, r2
400 bne 6b
401 pop {r4-r11,pc}
402 .align 5
403 4:
404 ldm r1, {r4-r6}
405 add r1, r1, r2
406 pld [r1]
407 ALIGN_DWORD 3, r4, r5, r6
408 6: ldm r1, {r7-r9}
409 add r1, r1, r2
410 pld [r1]
411 ALIGN_DWORD 3, r7, r8, r9
412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
413 stm r0, {r10-r11}
414 add r0, r0, r2
415 ldm r1, {r4-r6}
416 add r1, r1, r2
417 pld [r1]
418 ALIGN_DWORD 3, r4, r5, r6
419 subs r3, r3, #1
420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
421 stm r0, {r10-r11}
422 add r0, r0, r2
423 bne 6b
424 pop {r4-r11,pc}
425 endfunc
426
427 .align 5
428 function ff_put_no_rnd_pixels8_y2_arm, export=1
429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430 @ block = word aligned, pixles = unaligned
431 pld [r1]
432 push {r4-r11,lr}
433 mov r3, r3, lsr #1
434 ldr r12, =0xfefefefe
435 JMP_ALIGN r5, r1
436 1:
437 ldm r1, {r4-r5}
438 add r1, r1, r2
439 6: ldm r1, {r6-r7}
440 add r1, r1, r2
441 pld [r1]
442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
443 ldm r1, {r4-r5}
444 add r1, r1, r2
445 stm r0, {r8-r9}
446 add r0, r0, r2
447 pld [r1]
448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
449 subs r3, r3, #1
450 stm r0, {r8-r9}
451 add r0, r0, r2
452 bne 6b
453 pop {r4-r11,pc}
454 .align 5
455 2:
456 ldm r1, {r4-r6}
457 add r1, r1, r2
458 pld [r1]
459 ALIGN_DWORD 1, r4, r5, r6
460 6: ldm r1, {r7-r9}
461 add r1, r1, r2
462 pld [r1]
463 ALIGN_DWORD 1, r7, r8, r9
464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
465 stm r0, {r10-r11}
466 add r0, r0, r2
467 ldm r1, {r4-r6}
468 add r1, r1, r2
469 pld [r1]
470 ALIGN_DWORD 1, r4, r5, r6
471 subs r3, r3, #1
472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
473 stm r0, {r10-r11}
474 add r0, r0, r2
475 bne 6b
476 pop {r4-r11,pc}
477 .align 5
478 3:
479 ldm r1, {r4-r6}
480 add r1, r1, r2
481 pld [r1]
482 ALIGN_DWORD 2, r4, r5, r6
483 6: ldm r1, {r7-r9}
484 add r1, r1, r2
485 pld [r1]
486 ALIGN_DWORD 2, r7, r8, r9
487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
488 stm r0, {r10-r11}
489 add r0, r0, r2
490 ldm r1, {r4-r6}
491 add r1, r1, r2
492 pld [r1]
493 ALIGN_DWORD 2, r4, r5, r6
494 subs r3, r3, #1
495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
496 stm r0, {r10-r11}
497 add r0, r0, r2
498 bne 6b
499 pop {r4-r11,pc}
500 .align 5
501 4:
502 ldm r1, {r4-r6}
503 add r1, r1, r2
504 pld [r1]
505 ALIGN_DWORD 3, r4, r5, r6
506 6: ldm r1, {r7-r9}
507 add r1, r1, r2
508 pld [r1]
509 ALIGN_DWORD 3, r7, r8, r9
510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
511 stm r0, {r10-r11}
512 add r0, r0, r2
513 ldm r1, {r4-r6}
514 add r1, r1, r2
515 pld [r1]
516 ALIGN_DWORD 3, r4, r5, r6
517 subs r3, r3, #1
518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
519 stm r0, {r10-r11}
520 add r0, r0, r2
521 bne 6b
522 pop {r4-r11,pc}
523 endfunc
524
525 .ltorg
526
527 @ ----------------------------------------------------------------
528 .macro RND_XY2_IT align, rnd
529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531 .if \align == 0
532 ldm r1, {r6-r8}
533 .elseif \align == 3
534 ldm r1, {r5-r7}
535 .else
536 ldm r1, {r8-r10}
537 .endif
538 add r1, r1, r2
539 pld [r1]
540 .if \align == 0
541 ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
542 .elseif \align == 1
543 ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
544 ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
545 .elseif \align == 2
546 ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
547 ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
548 .elseif \align == 3
549 ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
550 .endif
551 ldr r14, =0x03030303
552 tst r3, #1
553 and r8, r4, r14
554 and r9, r5, r14
555 and r10, r6, r14
556 and r11, r7, r14
557 andeq r14, r14, r14, \rnd #1
558 add r8, r8, r10
559 add r9, r9, r11
560 ldr r12, =0xfcfcfcfc >> 2
561 addeq r8, r8, r14
562 addeq r9, r9, r14
563 and r4, r12, r4, lsr #2
564 and r5, r12, r5, lsr #2
565 and r6, r12, r6, lsr #2
566 and r7, r12, r7, lsr #2
567 add r10, r4, r6
568 add r11, r5, r7
569 subs r3, r3, #1
570 .endm
571
572 .macro RND_XY2_EXPAND align, rnd
573 RND_XY2_IT \align, \rnd
574 6: push {r8-r11}
575 RND_XY2_IT \align, \rnd
576 pop {r4-r7}
577 add r4, r4, r8
578 add r5, r5, r9
579 ldr r14, =0x0f0f0f0f
580 add r6, r6, r10
581 add r7, r7, r11
582 and r4, r14, r4, lsr #2
583 and r5, r14, r5, lsr #2
584 add r4, r4, r6
585 add r5, r5, r7
586 stm r0, {r4-r5}
587 add r0, r0, r2
588 bge 6b
589 pop {r4-r11,pc}
590 .endm
591
592 .align 5
593 function ff_put_pixels8_xy2_arm, export=1
594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595 @ block = word aligned, pixles = unaligned
596 pld [r1]
597 push {r4-r11,lr} @ R14 is also called LR
598 JMP_ALIGN r5, r1
599 1: RND_XY2_EXPAND 0, lsl
600 .align 5
601 2: RND_XY2_EXPAND 1, lsl
602 .align 5
603 3: RND_XY2_EXPAND 2, lsl
604 .align 5
605 4: RND_XY2_EXPAND 3, lsl
606 endfunc
607
608 .align 5
609 function ff_put_no_rnd_pixels8_xy2_arm, export=1
610 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
611 @ block = word aligned, pixles = unaligned
612 pld [r1]
613 push {r4-r11,lr}
614 JMP_ALIGN r5, r1
615 1: RND_XY2_EXPAND 0, lsr
616 .align 5
617 2: RND_XY2_EXPAND 1, lsr
618 .align 5
619 3: RND_XY2_EXPAND 2, lsr
620 .align 5
621 4: RND_XY2_EXPAND 3, lsr
622 endfunc
623
624 .align 5
625 @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
626 function ff_add_pixels_clamped_arm, export=1
627 push {r4-r10}
628 mov r10, #8
629 1:
630 ldr r4, [r1] /* load dest */
631 /* block[0] and block[1]*/
632 ldrsh r5, [r0]
633 ldrsh r7, [r0, #2]
634 and r6, r4, #0xFF
635 and r8, r4, #0xFF00
636 add r6, r5, r6
637 add r8, r7, r8, lsr #8
638 mvn r5, r5
639 mvn r7, r7
640 tst r6, #0x100
641 movne r6, r5, lsr #24
642 tst r8, #0x100
643 movne r8, r7, lsr #24
644 mov r9, r6
645 ldrsh r5, [r0, #4] /* moved form [A] */
646 orr r9, r9, r8, lsl #8
647 /* block[2] and block[3] */
648 /* [A] */
649 ldrsh r7, [r0, #6]
650 and r6, r4, #0xFF0000
651 and r8, r4, #0xFF000000
652 add r6, r5, r6, lsr #16
653 add r8, r7, r8, lsr #24
654 mvn r5, r5
655 mvn r7, r7
656 tst r6, #0x100
657 movne r6, r5, lsr #24
658 tst r8, #0x100
659 movne r8, r7, lsr #24
660 orr r9, r9, r6, lsl #16
661 ldr r4, [r1, #4] /* moved form [B] */
662 orr r9, r9, r8, lsl #24
663 /* store dest */
664 ldrsh r5, [r0, #8] /* moved form [C] */
665 str r9, [r1]
666
667 /* load dest */
668 /* [B] */
669 /* block[4] and block[5] */
670 /* [C] */
671 ldrsh r7, [r0, #10]
672 and r6, r4, #0xFF
673 and r8, r4, #0xFF00
674 add r6, r5, r6
675 add r8, r7, r8, lsr #8
676 mvn r5, r5
677 mvn r7, r7
678 tst r6, #0x100
679 movne r6, r5, lsr #24
680 tst r8, #0x100
681 movne r8, r7, lsr #24
682 mov r9, r6
683 ldrsh r5, [r0, #12] /* moved from [D] */
684 orr r9, r9, r8, lsl #8
685 /* block[6] and block[7] */
686 /* [D] */
687 ldrsh r7, [r0, #14]
688 and r6, r4, #0xFF0000
689 and r8, r4, #0xFF000000
690 add r6, r5, r6, lsr #16
691 add r8, r7, r8, lsr #24
692 mvn r5, r5
693 mvn r7, r7
694 tst r6, #0x100
695 movne r6, r5, lsr #24
696 tst r8, #0x100
697 movne r8, r7, lsr #24
698 orr r9, r9, r6, lsl #16
699 add r0, r0, #16 /* moved from [E] */
700 orr r9, r9, r8, lsl #24
701 subs r10, r10, #1 /* moved from [F] */
702 /* store dest */
703 str r9, [r1, #4]
704
705 /* [E] */
706 /* [F] */
707 add r1, r1, r2
708 bne 1b
709
710 pop {r4-r10}
711 bx lr
712 endfunc