ARMv5TE optimized IDCT
[libav.git] / libavcodec / armv4l / simple_idct_armv5te.S
1 /*
2 * Simple IDCT
3 *
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5 * Copyright (c) 2006 Mans Rullgard <mru@inprovide.com>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
23 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
24 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
25 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
26 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
27 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
28 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
29 #define ROW_SHIFT 11
30 #define COL_SHIFT 20
31
32 #define W13 (W1 | (W3 << 16))
33 #define W26 (W2 | (W6 << 16))
34 #define W57 (W5 | (W7 << 16))
35
36 .text
37 .align
38 w13: .long W13
39 w26: .long W26
40 w57: .long W57
41
42 .align
43 .func idct_row_armv5te
44 idct_row_armv5te:
45 str lr, [sp, #-4]!
46
47 ldrd v1, [a1, #8]
48 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */
49 orrs v1, v1, v2
50 cmpeq v1, a4
51 cmpeq v1, a3, lsr #16
52 beq row_dc_only
53
54 mov v1, #(1<<(ROW_SHIFT-1))
55 mov ip, #16384
56 sub ip, ip, #1 /* ip = W4 */
57 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */
58 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
59 smultb a2, ip, a4
60 smulbb lr, ip, a4
61 add v2, v1, a2
62 sub v3, v1, a2
63 sub v4, v1, lr
64 add v1, v1, lr
65
66 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
67 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */
68 smulbt v5, ip, a3
69 smultt v6, lr, a4
70 smlatt v5, ip, a4, v5
71 smultt a2, ip, a3
72 smulbt v7, lr, a3
73 sub v6, v6, a2
74 smulbt a2, ip, a4
75 smultt fp, lr, a3
76 sub v7, v7, a2
77 smulbt a2, lr, a4
78 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
79 sub fp, fp, a2
80
81 orrs a2, a3, a4
82 beq 1f
83
84 smlabt v5, lr, a3, v5
85 smlabt v6, ip, a3, v6
86 smlatt v5, lr, a4, v5
87 smlabt v6, lr, a4, v6
88 smlatt v7, lr, a3, v7
89 smlatt fp, ip, a3, fp
90 smulbt a2, ip, a4
91 smlatt v7, ip, a4, v7
92 sub fp, fp, a2
93
94 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
95 mov a2, #16384
96 sub a2, a2, #1 /* a2 = W4 */
97 smulbb a2, a2, a3 /* a2 = W4*row[4] */
98 smultb lr, ip, a4 /* lr = W6*row[6] */
99 add v1, v1, a2 /* v1 += W4*row[4] */
100 add v1, v1, lr /* v1 += W6*row[6] */
101 add v4, v4, a2 /* v4 += W4*row[4] */
102 sub v4, v4, lr /* v4 -= W6*row[6] */
103 smulbb lr, ip, a4 /* lr = W2*row[6] */
104 sub v2, v2, a2 /* v2 -= W4*row[4] */
105 sub v2, v2, lr /* v2 -= W2*row[6] */
106 sub v3, v3, a2 /* v3 -= W4*row[4] */
107 add v3, v3, lr /* v3 += W2*row[6] */
108
109 1: add a2, v1, v5
110 mov a3, a2, lsr #11
111 bic a3, a3, #0x1f0000
112 sub a2, v2, v6
113 mov a2, a2, lsr #11
114 add a3, a3, a2, lsl #16
115 add a2, v3, v7
116 mov a4, a2, lsr #11
117 bic a4, a4, #0x1f0000
118 add a2, v4, fp
119 mov a2, a2, lsr #11
120 add a4, a4, a2, lsl #16
121 strd a3, [a1]
122
123 sub a2, v4, fp
124 mov a3, a2, lsr #11
125 bic a3, a3, #0x1f0000
126 sub a2, v3, v7
127 mov a2, a2, lsr #11
128 add a3, a3, a2, lsl #16
129 add a2, v2, v6
130 mov a4, a2, lsr #11
131 bic a4, a4, #0x1f0000
132 sub a2, v1, v5
133 mov a2, a2, lsr #11
134 add a4, a4, a2, lsl #16
135 strd a3, [a1, #8]
136
137 ldr pc, [sp], #4
138
139 row_dc_only:
140 orr a3, a3, a3, lsl #16
141 bic a3, a3, #0xe000
142 mov a3, a3, lsl #3
143 mov a4, a3
144 strd a3, [a1]
145 strd a3, [a1, #8]
146
147 ldr pc, [sp], #4
148 .endfunc
149
150 .macro idct_col
151 ldr a4, [a1] /* a4 = col[1:0] */
152 mov ip, #16384
153 sub ip, ip, #1 /* ip = W4 */
154 #if 0
155 mov v1, #(1<<(COL_SHIFT-1))
156 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
157 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
158 ldr a4, [a1, #(16*4)]
159 #else
160 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
161 add v2, v1, a4, asr #16
162 rsb v2, v2, v2, lsl #14
163 mov a4, a4, lsl #16
164 add v1, v1, a4, asr #16
165 ldr a4, [a1, #(16*4)]
166 rsb v1, v1, v1, lsl #14
167 #endif
168
169 smulbb lr, ip, a4
170 smulbt a3, ip, a4
171 sub v3, v1, lr
172 sub v5, v1, lr
173 add v7, v1, lr
174 add v1, v1, lr
175 sub v4, v2, a3
176 sub v6, v2, a3
177 add fp, v2, a3
178 ldr ip, [pc, #(w26-.-8)]
179 ldr a4, [a1, #(16*2)]
180 add v2, v2, a3
181
182 smulbb lr, ip, a4
183 smultb a3, ip, a4
184 add v1, v1, lr
185 sub v7, v7, lr
186 add v3, v3, a3
187 sub v5, v5, a3
188 smulbt lr, ip, a4
189 smultt a3, ip, a4
190 add v2, v2, lr
191 sub fp, fp, lr
192 add v4, v4, a3
193 ldr a4, [a1, #(16*6)]
194 sub v6, v6, a3
195
196 smultb lr, ip, a4
197 smulbb a3, ip, a4
198 add v1, v1, lr
199 sub v7, v7, lr
200 sub v3, v3, a3
201 add v5, v5, a3
202 smultt lr, ip, a4
203 smulbt a3, ip, a4
204 add v2, v2, lr
205 sub fp, fp, lr
206 sub v4, v4, a3
207 add v6, v6, a3
208
209 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
210
211 ldr ip, [pc, #(w13-.-8)]
212 ldr a4, [a1, #(16*1)]
213 ldr lr, [pc, #(w57-.-8)]
214 smulbb v1, ip, a4
215 smultb v3, ip, a4
216 smulbb v5, lr, a4
217 smultb v7, lr, a4
218 smulbt v2, ip, a4
219 smultt v4, ip, a4
220 smulbt v6, lr, a4
221 smultt fp, lr, a4
222 rsb v4, v4, #0
223 ldr a4, [a1, #(16*3)]
224 rsb v3, v3, #0
225
226 smlatb v1, ip, a4, v1
227 smlatb v3, lr, a4, v3
228 smulbb a3, ip, a4
229 smulbb a2, lr, a4
230 sub v5, v5, a3
231 sub v7, v7, a2
232 smlatt v2, ip, a4, v2
233 smlatt v4, lr, a4, v4
234 smulbt a3, ip, a4
235 smulbt a2, lr, a4
236 sub v6, v6, a3
237 ldr a4, [a1, #(16*5)]
238 sub fp, fp, a2
239
240 smlabb v1, lr, a4, v1
241 smlabb v3, ip, a4, v3
242 smlatb v5, lr, a4, v5
243 smlatb v7, ip, a4, v7
244 smlabt v2, lr, a4, v2
245 smlabt v4, ip, a4, v4
246 smlatt v6, lr, a4, v6
247 ldr a3, [a1, #(16*7)]
248 smlatt fp, ip, a4, fp
249
250 smlatb v1, lr, a3, v1
251 smlabb v3, lr, a3, v3
252 smlatb v5, ip, a3, v5
253 smulbb a4, ip, a3
254 smlatt v2, lr, a3, v2
255 sub v7, v7, a4
256 smlabt v4, lr, a3, v4
257 smulbt a4, ip, a3
258 smlatt v6, ip, a3, v6
259 sub fp, fp, a4
260 .endm
261
262 .align
263 .func idct_col_armv5te
264 idct_col_armv5te:
265 str lr, [sp, #-4]!
266
267 idct_col
268
269 ldmfd sp!, {a3, a4}
270 adds a2, a3, v1
271 mov a2, a2, lsr #20
272 orrmi a2, a2, #0xf000
273 add ip, a4, v2
274 mov ip, ip, asr #20
275 orr a2, a2, ip, lsl #16
276 str a2, [a1]
277 subs a3, a3, v1
278 mov a2, a3, lsr #20
279 orrmi a2, a2, #0xf000
280 sub a4, a4, v2
281 mov a4, a4, asr #20
282 orr a2, a2, a4, lsl #16
283 ldmfd sp!, {a3, a4}
284 str a2, [a1, #(16*7)]
285
286 subs a2, a3, v3
287 mov a2, a2, lsr #20
288 orrmi a2, a2, #0xf000
289 sub ip, a4, v4
290 mov ip, ip, asr #20
291 orr a2, a2, ip, lsl #16
292 str a2, [a1, #(16*1)]
293 adds a3, a3, v3
294 mov a2, a3, lsr #20
295 orrmi a2, a2, #0xf000
296 add a4, a4, v4
297 mov a4, a4, asr #20
298 orr a2, a2, a4, lsl #16
299 ldmfd sp!, {a3, a4}
300 str a2, [a1, #(16*6)]
301
302 adds a2, a3, v5
303 mov a2, a2, lsr #20
304 orrmi a2, a2, #0xf000
305 add ip, a4, v6
306 mov ip, ip, asr #20
307 orr a2, a2, ip, lsl #16
308 str a2, [a1, #(16*2)]
309 subs a3, a3, v5
310 mov a2, a3, lsr #20
311 orrmi a2, a2, #0xf000
312 sub a4, a4, v6
313 mov a4, a4, asr #20
314 orr a2, a2, a4, lsl #16
315 ldmfd sp!, {a3, a4}
316 str a2, [a1, #(16*5)]
317
318 adds a2, a3, v7
319 mov a2, a2, lsr #20
320 orrmi a2, a2, #0xf000
321 add ip, a4, fp
322 mov ip, ip, asr #20
323 orr a2, a2, ip, lsl #16
324 str a2, [a1, #(16*3)]
325 subs a3, a3, v7
326 mov a2, a3, lsr #20
327 orrmi a2, a2, #0xf000
328 sub a4, a4, fp
329 mov a4, a4, asr #20
330 orr a2, a2, a4, lsl #16
331 str a2, [a1, #(16*4)]
332
333 ldr pc, [sp], #4
334 .endfunc
335
336 .align
337 .func idct_col_put_armv5te
338 idct_col_put_armv5te:
339 str lr, [sp, #-4]!
340
341 idct_col
342
343 ldmfd sp!, {a3, a4}
344 ldr lr, [sp, #32]
345 add a2, a3, v1
346 movs a2, a2, asr #20
347 movmi a2, #0
348 cmp a2, #255
349 movgt a2, #255
350 add ip, a4, v2
351 movs ip, ip, asr #20
352 movmi ip, #0
353 cmp ip, #255
354 movgt ip, #255
355 orr a2, a2, ip, lsl #8
356 sub a3, a3, v1
357 movs a3, a3, asr #20
358 movmi a3, #0
359 cmp a3, #255
360 movgt a3, #255
361 sub a4, a4, v2
362 movs a4, a4, asr #20
363 movmi a4, #0
364 cmp a4, #255
365 ldr v1, [sp, #28]
366 movgt a4, #255
367 strh a2, [v1]
368 add a2, v1, #2
369 str a2, [sp, #28]
370 orr a2, a3, a4, lsl #8
371 rsb v2, lr, lr, lsl #3
372 ldmfd sp!, {a3, a4}
373 strh a2, [v2, v1]!
374
375 sub a2, a3, v3
376 movs a2, a2, asr #20
377 movmi a2, #0
378 cmp a2, #255
379 movgt a2, #255
380 sub ip, a4, v4
381 movs ip, ip, asr #20
382 movmi ip, #0
383 cmp ip, #255
384 movgt ip, #255
385 orr a2, a2, ip, lsl #8
386 strh a2, [v1, lr]!
387 add a3, a3, v3
388 movs a2, a3, asr #20
389 movmi a2, #0
390 cmp a2, #255
391 movgt a2, #255
392 add a4, a4, v4
393 movs a4, a4, asr #20
394 movmi a4, #0
395 cmp a4, #255
396 movgt a4, #255
397 orr a2, a2, a4, lsl #8
398 ldmfd sp!, {a3, a4}
399 strh a2, [v2, -lr]!
400
401 add a2, a3, v5
402 movs a2, a2, asr #20
403 movmi a2, #0
404 cmp a2, #255
405 movgt a2, #255
406 add ip, a4, v6
407 movs ip, ip, asr #20
408 movmi ip, #0
409 cmp ip, #255
410 movgt ip, #255
411 orr a2, a2, ip, lsl #8
412 strh a2, [v1, lr]!
413 sub a3, a3, v5
414 movs a2, a3, asr #20
415 movmi a2, #0
416 cmp a2, #255
417 movgt a2, #255
418 sub a4, a4, v6
419 movs a4, a4, asr #20
420 movmi a4, #0
421 cmp a4, #255
422 movgt a4, #255
423 orr a2, a2, a4, lsl #8
424 ldmfd sp!, {a3, a4}
425 strh a2, [v2, -lr]!
426
427 add a2, a3, v7
428 movs a2, a2, asr #20
429 movmi a2, #0
430 cmp a2, #255
431 movgt a2, #255
432 add ip, a4, fp
433 movs ip, ip, asr #20
434 movmi ip, #0
435 cmp ip, #255
436 movgt ip, #255
437 orr a2, a2, ip, lsl #8
438 strh a2, [v1, lr]
439 sub a3, a3, v7
440 movs a2, a3, asr #20
441 movmi a2, #0
442 cmp a2, #255
443 movgt a2, #255
444 sub a4, a4, fp
445 movs a4, a4, asr #20
446 movmi a4, #0
447 cmp a4, #255
448 movgt a4, #255
449 orr a2, a2, a4, lsl #8
450 strh a2, [v2, -lr]
451
452 ldr pc, [sp], #4
453 .endfunc
454
455 .align
456 .func idct_col_add_armv5te
457 idct_col_add_armv5te:
458 str lr, [sp, #-4]!
459
460 idct_col
461
462 ldr lr, [sp, #36]
463
464 ldmfd sp!, {a3, a4}
465 ldrh ip, [lr]
466 add a2, a3, v1
467 mov a2, a2, asr #20
468 sub a3, a3, v1
469 and v1, ip, #255
470 adds a2, a2, v1
471 movmi a2, #0
472 cmp a2, #255
473 movgt a2, #255
474 add v1, a4, v2
475 mov v1, v1, asr #20
476 adds v1, v1, ip, lsr #8
477 movmi v1, #0
478 cmp v1, #255
479 movgt v1, #255
480 orr a2, a2, v1, lsl #8
481 ldr v1, [sp, #32]
482 sub a4, a4, v2
483 rsb v2, v1, v1, lsl #3
484 ldrh ip, [v2, lr]!
485 strh a2, [lr]
486 mov a3, a3, asr #20
487 and a2, ip, #255
488 adds a3, a3, a2
489 movmi a3, #0
490 cmp a3, #255
491 movgt a3, #255
492 mov a4, a4, asr #20
493 adds a4, a4, ip, lsr #8
494 movmi a4, #0
495 cmp a4, #255
496 movgt a4, #255
497 add a2, lr, #2
498 str a2, [sp, #28]
499 orr a2, a3, a4, lsl #8
500 strh a2, [v2]
501
502 ldmfd sp!, {a3, a4}
503 ldrh ip, [lr, v1]!
504 sub a2, a3, v3
505 mov a2, a2, asr #20
506 add a3, a3, v3
507 and v3, ip, #255
508 adds a2, a2, v3
509 movmi a2, #0
510 cmp a2, #255
511 movgt a2, #255
512 sub v3, a4, v4
513 mov v3, v3, asr #20
514 adds v3, v3, ip, lsr #8
515 movmi v3, #0
516 cmp v3, #255
517 movgt v3, #255
518 orr a2, a2, v3, lsl #8
519 add a4, a4, v4
520 ldrh ip, [v2, -v1]!
521 strh a2, [lr]
522 mov a3, a3, asr #20
523 and a2, ip, #255
524 adds a3, a3, a2
525 movmi a3, #0
526 cmp a3, #255
527 movgt a3, #255
528 mov a4, a4, asr #20
529 adds a4, a4, ip, lsr #8
530 movmi a4, #0
531 cmp a4, #255
532 movgt a4, #255
533 orr a2, a3, a4, lsl #8
534 strh a2, [v2]
535
536 ldmfd sp!, {a3, a4}
537 ldrh ip, [lr, v1]!
538 add a2, a3, v5
539 mov a2, a2, asr #20
540 sub a3, a3, v5
541 and v3, ip, #255
542 adds a2, a2, v3
543 movmi a2, #0
544 cmp a2, #255
545 movgt a2, #255
546 add v3, a4, v6
547 mov v3, v3, asr #20
548 adds v3, v3, ip, lsr #8
549 movmi v3, #0
550 cmp v3, #255
551 movgt v3, #255
552 orr a2, a2, v3, lsl #8
553 sub a4, a4, v6
554 ldrh ip, [v2, -v1]!
555 strh a2, [lr]
556 mov a3, a3, asr #20
557 and a2, ip, #255
558 adds a3, a3, a2
559 movmi a3, #0
560 cmp a3, #255
561 movgt a3, #255
562 mov a4, a4, asr #20
563 adds a4, a4, ip, lsr #8
564 movmi a4, #0
565 cmp a4, #255
566 movgt a4, #255
567 orr a2, a3, a4, lsl #8
568 strh a2, [v2]
569
570 ldmfd sp!, {a3, a4}
571 ldrh ip, [lr, v1]!
572 add a2, a3, v7
573 mov a2, a2, asr #20
574 sub a3, a3, v7
575 and v3, ip, #255
576 adds a2, a2, v3
577 movmi a2, #0
578 cmp a2, #255
579 movgt a2, #255
580 add v3, a4, fp
581 mov v3, v3, asr #20
582 adds v3, v3, ip, lsr #8
583 movmi v3, #0
584 cmp v3, #255
585 movgt v3, #255
586 orr a2, a2, v3, lsl #8
587 sub a4, a4, fp
588 ldrh ip, [v2, -v1]!
589 strh a2, [lr]
590 mov a3, a3, asr #20
591 and a2, ip, #255
592 adds a3, a3, a2
593 movmi a3, #0
594 cmp a3, #255
595 movgt a3, #255
596 mov a4, a4, asr #20
597 adds a4, a4, ip, lsr #8
598 movmi a4, #0
599 cmp a4, #255
600 movgt a4, #255
601 orr a2, a3, a4, lsl #8
602 strh a2, [v2]
603
604 ldr pc, [sp], #4
605 .endfunc
606
607 .align
608 .global simple_idct_armv5te
609 .func simple_idct_armv5te
610 simple_idct_armv5te:
611 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
612
613 bl idct_row_armv5te
614 add a1, a1, #16
615 bl idct_row_armv5te
616 add a1, a1, #16
617 bl idct_row_armv5te
618 add a1, a1, #16
619 bl idct_row_armv5te
620 add a1, a1, #16
621 bl idct_row_armv5te
622 add a1, a1, #16
623 bl idct_row_armv5te
624 add a1, a1, #16
625 bl idct_row_armv5te
626 add a1, a1, #16
627 bl idct_row_armv5te
628
629 sub a1, a1, #(16*7)
630
631 bl idct_col_armv5te
632 add a1, a1, #4
633 bl idct_col_armv5te
634 add a1, a1, #4
635 bl idct_col_armv5te
636 add a1, a1, #4
637 bl idct_col_armv5te
638
639 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
640 .endfunc
641
642 .align
643 .global simple_idct_add_armv5te
644 .func simple_idct_add_armv5te
645 simple_idct_add_armv5te:
646 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
647
648 mov a1, a3
649
650 bl idct_row_armv5te
651 add a1, a1, #16
652 bl idct_row_armv5te
653 add a1, a1, #16
654 bl idct_row_armv5te
655 add a1, a1, #16
656 bl idct_row_armv5te
657 add a1, a1, #16
658 bl idct_row_armv5te
659 add a1, a1, #16
660 bl idct_row_armv5te
661 add a1, a1, #16
662 bl idct_row_armv5te
663 add a1, a1, #16
664 bl idct_row_armv5te
665
666 sub a1, a1, #(16*7)
667
668 bl idct_col_add_armv5te
669 add a1, a1, #4
670 bl idct_col_add_armv5te
671 add a1, a1, #4
672 bl idct_col_add_armv5te
673 add a1, a1, #4
674 bl idct_col_add_armv5te
675
676 add sp, sp, #8
677 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
678 .endfunc
679
680 .align
681 .global simple_idct_put_armv5te
682 .func simple_idct_put_armv5te
683 simple_idct_put_armv5te:
684 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
685
686 mov a1, a3
687
688 bl idct_row_armv5te
689 add a1, a1, #16
690 bl idct_row_armv5te
691 add a1, a1, #16
692 bl idct_row_armv5te
693 add a1, a1, #16
694 bl idct_row_armv5te
695 add a1, a1, #16
696 bl idct_row_armv5te
697 add a1, a1, #16
698 bl idct_row_armv5te
699 add a1, a1, #16
700 bl idct_row_armv5te
701 add a1, a1, #16
702 bl idct_row_armv5te
703
704 sub a1, a1, #(16*7)
705
706 bl idct_col_put_armv5te
707 add a1, a1, #4
708 bl idct_col_put_armv5te
709 add a1, a1, #4
710 bl idct_col_put_armv5te
711 add a1, a1, #4
712 bl idct_col_put_armv5te
713
714 add sp, sp, #8
715 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
716 .endfunc