2 * ARMv4L optimized DSP utils
3 * Copyright (c) 2001 Lionel Ulmer.
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "../dsputil.h"
27 extern void dsputil_init_iwmmxt(DSPContext
* c
, AVCodecContext
*avctx
);
29 extern void j_rev_dct_ARM(DCTELEM
*data
);
30 extern void simple_idct_ARM(DCTELEM
*data
);
32 extern void simple_idct_armv5te(DCTELEM
*data
);
33 extern void simple_idct_put_armv5te(uint8_t *dest
, int line_size
,
35 extern void simple_idct_add_armv5te(uint8_t *dest
, int line_size
,
39 static void (*ff_put_pixels_clamped
)(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
40 static void (*ff_add_pixels_clamped
)(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
42 void put_pixels8_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
43 void put_pixels8_x2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
44 void put_pixels8_y2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
45 void put_pixels8_xy2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
47 void put_no_rnd_pixels8_x2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
48 void put_no_rnd_pixels8_y2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
49 void put_no_rnd_pixels8_xy2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
51 void put_pixels16_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
53 CALL_2X_PIXELS(put_pixels16_x2_arm
, put_pixels8_x2_arm
, 8)
54 CALL_2X_PIXELS(put_pixels16_y2_arm
, put_pixels8_y2_arm
, 8)
55 CALL_2X_PIXELS(put_pixels16_xy2_arm
, put_pixels8_xy2_arm
, 8)
56 CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm
, put_no_rnd_pixels8_x2_arm
, 8)
57 CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm
, put_no_rnd_pixels8_y2_arm
, 8)
58 CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm
, put_no_rnd_pixels8_xy2_arm
, 8)
60 static void add_pixels_clamped_ARM(short *block
, unsigned char *dest
, int line_size
)
69 /* block[0] and block[1]*/
71 "ldrsh r7, [%0, #2] \n\t"
72 "and r6, r4, #0xFF \n\t"
73 "and r8, r4, #0xFF00 \n\t"
75 "add r8, r7, r8, lsr #8 \n\t"
79 "movne r6, r5, lsr #24 \n\t"
81 "movne r8, r7, lsr #24 \n\t"
83 "ldrsh r5, [%0, #4] \n\t" /* moved form [A] */
84 "orr r9, r9, r8, lsl #8 \n\t"
85 /* block[2] and block[3] */
87 "ldrsh r7, [%0, #6] \n\t"
88 "and r6, r4, #0xFF0000 \n\t"
89 "and r8, r4, #0xFF000000 \n\t"
90 "add r6, r5, r6, lsr #16 \n\t"
91 "add r8, r7, r8, lsr #24 \n\t"
95 "movne r6, r5, lsr #24 \n\t"
97 "movne r8, r7, lsr #24 \n\t"
98 "orr r9, r9, r6, lsl #16 \n\t"
99 "ldr r4, [%1, #4] \n\t" /* moved form [B] */
100 "orr r9, r9, r8, lsl #24 \n\t"
102 "ldrsh r5, [%0, #8] \n\t" /* moved form [C] */
107 /* block[4] and block[5] */
109 "ldrsh r7, [%0, #10] \n\t"
110 "and r6, r4, #0xFF \n\t"
111 "and r8, r4, #0xFF00 \n\t"
112 "add r6, r5, r6 \n\t"
113 "add r8, r7, r8, lsr #8 \n\t"
116 "tst r6, #0x100 \n\t"
117 "movne r6, r5, lsr #24 \n\t"
118 "tst r8, #0x100 \n\t"
119 "movne r8, r7, lsr #24 \n\t"
121 "ldrsh r5, [%0, #12] \n\t" /* moved from [D] */
122 "orr r9, r9, r8, lsl #8 \n\t"
123 /* block[6] and block[7] */
125 "ldrsh r7, [%0, #14] \n\t"
126 "and r6, r4, #0xFF0000 \n\t"
127 "and r8, r4, #0xFF000000 \n\t"
128 "add r6, r5, r6, lsr #16 \n\t"
129 "add r8, r7, r8, lsr #24 \n\t"
132 "tst r6, #0x100 \n\t"
133 "movne r6, r5, lsr #24 \n\t"
134 "tst r8, #0x100 \n\t"
135 "movne r8, r7, lsr #24 \n\t"
136 "orr r9, r9, r6, lsl #16 \n\t"
137 "add %0, %0, #16 \n\t" /* moved from [E] */
138 "orr r9, r9, r8, lsl #24 \n\t"
139 "subs r10, r10, #1 \n\t" /* moved from [F] */
141 "str r9, [%1, #4] \n\t"
145 "add %1, %1, %2 \n\t"
150 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
153 /* XXX: those functions should be suppressed ASAP when all IDCTs are
155 static void j_rev_dct_ARM_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
157 j_rev_dct_ARM (block
);
158 ff_put_pixels_clamped(block
, dest
, line_size
);
160 static void j_rev_dct_ARM_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
162 j_rev_dct_ARM (block
);
163 ff_add_pixels_clamped(block
, dest
, line_size
);
165 static void simple_idct_ARM_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
167 simple_idct_ARM (block
);
168 ff_put_pixels_clamped(block
, dest
, line_size
);
170 static void simple_idct_ARM_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
172 simple_idct_ARM (block
);
173 ff_add_pixels_clamped(block
, dest
, line_size
);
177 static void simple_idct_ipp(DCTELEM
*block
)
179 ippiDCT8x8Inv_Video_16s_C1I(block
);
181 static void simple_idct_ipp_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
183 ippiDCT8x8Inv_Video_16s8u_C1R(block
, dest
, line_size
);
186 void add_pixels_clamped_iwmmxt(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
188 static void simple_idct_ipp_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
190 ippiDCT8x8Inv_Video_16s_C1I(block
);
192 add_pixels_clamped_iwmmxt(block
, dest
, line_size
);
194 add_pixels_clamped_ARM(block
, dest
, line_size
);
199 void dsputil_init_armv4l(DSPContext
* c
, AVCodecContext
*avctx
)
201 int idct_algo
= avctx
->idct_algo
;
203 ff_put_pixels_clamped
= c
->put_pixels_clamped
;
204 ff_add_pixels_clamped
= c
->add_pixels_clamped
;
206 if(idct_algo
== FF_IDCT_AUTO
){
207 #if defined(HAVE_IPP)
208 idct_algo
= FF_IDCT_IPP
;
209 #elif defined(HAVE_ARMV5TE)
210 idct_algo
= FF_IDCT_SIMPLEARMV5TE
;
212 idct_algo
= FF_IDCT_ARM
;
216 if(idct_algo
==FF_IDCT_ARM
){
217 c
->idct_put
= j_rev_dct_ARM_put
;
218 c
->idct_add
= j_rev_dct_ARM_add
;
219 c
->idct
= j_rev_dct_ARM
;
220 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;/* FF_NO_IDCT_PERM */
221 } else if (idct_algo
==FF_IDCT_SIMPLEARM
){
222 c
->idct_put
= simple_idct_ARM_put
;
223 c
->idct_add
= simple_idct_ARM_add
;
224 c
->idct
= simple_idct_ARM
;
225 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
227 } else if (idct_algo
==FF_IDCT_SIMPLEARMV5TE
){
228 c
->idct_put
= simple_idct_put_armv5te
;
229 c
->idct_add
= simple_idct_add_armv5te
;
230 c
->idct
= simple_idct_armv5te
;
231 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
234 } else if (idct_algo
==FF_IDCT_IPP
){
235 c
->idct_put
= simple_idct_ipp_put
;
236 c
->idct_add
= simple_idct_ipp_add
;
237 c
->idct
= simple_idct_ipp
;
238 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
242 /* c->put_pixels_tab[0][0] = put_pixels16_arm; */ // NG!
243 c
->put_pixels_tab
[0][1] = put_pixels16_x2_arm
; //OK!
244 c
->put_pixels_tab
[0][2] = put_pixels16_y2_arm
; //OK!
245 /* c->put_pixels_tab[0][3] = put_pixels16_xy2_arm; /\* NG *\/ */
246 /* c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm; */
247 c
->put_no_rnd_pixels_tab
[0][1] = put_no_rnd_pixels16_x2_arm
; // OK
248 c
->put_no_rnd_pixels_tab
[0][2] = put_no_rnd_pixels16_y2_arm
; //OK
249 /* c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm; //NG */
250 c
->put_pixels_tab
[1][0] = put_pixels8_arm
; //OK
251 c
->put_pixels_tab
[1][1] = put_pixels8_x2_arm
; //OK
252 /* c->put_pixels_tab[1][2] = put_pixels8_y2_arm; //NG */
253 /* c->put_pixels_tab[1][3] = put_pixels8_xy2_arm; //NG */
254 c
->put_no_rnd_pixels_tab
[1][0] = put_pixels8_arm
;//OK
255 c
->put_no_rnd_pixels_tab
[1][1] = put_no_rnd_pixels8_x2_arm
; //OK
256 c
->put_no_rnd_pixels_tab
[1][2] = put_no_rnd_pixels8_y2_arm
; //OK
257 /* c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;//NG */
260 dsputil_init_iwmmxt(c
, avctx
);