Better ARM support for mplayer/ffmpeg, ported from atty fork
[libav.git] / libavcodec / armv4l / dsputil_arm.c
Content-type: text/html git.libav.org Git - libav.git/blame - libavcodec/armv4l/dsputil_arm.c


500 - Internal Server Error

Malformed UTF-8 character (fatal) at (eval 5) line 1, <$fd> line 580.
CommitLineData
92651f67
FB
1/*
2 * ARMv4L optimized DSP utils
3 * Copyright (c) 2001 Lionel Ulmer.
4 *
ff4ec49e
FB
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
92651f67 9 *
ff4ec49e 10 * This library is distributed in the hope that it will be useful,
92651f67 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
92651f67 14 *
ff4ec49e
FB
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
92651f67
FB
18 */
19
20#include "../dsputil.h"
6ad1fa5a
BR
21#ifdef HAVE_IPP
22#include "ipp.h"
23#endif
24
25#ifdef HAVE_IWMMXT
26extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
27#endif
92651f67
FB
28
29extern void j_rev_dct_ARM(DCTELEM *data);
bd7d1ea7 30extern void simple_idct_ARM(DCTELEM *data);
92651f67 31
b0368839
MN
32/* XXX: local hack */
33static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
34static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
35
6ad1fa5a
BR
36void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
37void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
38void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
39void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
40
41void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
42void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
43void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
44
45void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
46static void put_pixels16_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h)
47{
48 put_pixels8_x2_arm(block, pixels, line_size, h);
49 put_pixels8_x2_arm(block + 8, pixels + 8, line_size, h);
50}
51
52static void put_pixels16_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h)
53{
54 put_pixels8_y2_arm(block, pixels, line_size, h);
55 put_pixels8_y2_arm(block + 8, pixels + 8, line_size, h);
56}
57
58static void put_pixels16_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h)
59{
60 put_pixels8_xy2_arm(block, pixels, line_size, h);
61 put_pixels8_xy2_arm(block + 8, pixels + 8, line_size, h);
62}
63
64static void put_no_rnd_pixels16_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h)
65{
66 put_no_rnd_pixels8_x2_arm(block, pixels, line_size, h);
67 put_no_rnd_pixels8_x2_arm(block + 8, pixels + 8, line_size, h);
68}
69
70static void put_no_rnd_pixels16_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h)
71{
72 put_no_rnd_pixels8_y2_arm(block, pixels, line_size, h);
73 put_no_rnd_pixels8_y2_arm(block + 8, pixels + 8, line_size, h);
74}
75
76static void put_no_rnd_pixels16_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h)
77{
78 put_no_rnd_pixels8_xy2_arm(block, pixels, line_size, h);
79 put_no_rnd_pixels8_xy2_arm(block + 8, pixels + 8, line_size, h);
80}
81
82static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size)
83{
84 asm volatile (
85 "mov r10, #8 \n\t"
86
87 "1: \n\t"
88
89 /* load dest */
90 "ldr r4, [%1] \n\t"
91 /* block[0] and block[1]*/
92 "ldrsh r5, [%0] \n\t"
93 "ldrsh r7, [%0, #2] \n\t"
94 "and r6, r4, #0xFF \n\t"
95 "and r8, r4, #0xFF00 \n\t"
96 "add r6, r5, r6 \n\t"
97 "add r8, r7, r8, lsr #8 \n\t"
98 "mvn r5, r5 \n\t"
99 "mvn r7, r7 \n\t"
100 "tst r6, #0x100 \n\t"
101 "movne r6, r5, lsr #24 \n\t"
102 "tst r8, #0x100 \n\t"
103 "movne r8, r7, lsr #24 \n\t"
104 "mov r9, r6 \n\t"
105 "ldrsh r5, [%0, #4] \n\t" /* moved form [A] */
106 "orr r9, r9, r8, lsl #8 \n\t"
107 /* block[2] and block[3] */
108 /* [A] */
109 "ldrsh r7, [%0, #6] \n\t"
110 "and r6, r4, #0xFF0000 \n\t"
111 "and r8, r4, #0xFF000000 \n\t"
112 "add r6, r5, r6, lsr #16 \n\t"
113 "add r8, r7, r8, lsr #24 \n\t"
114 "mvn r5, r5 \n\t"
115 "mvn r7, r7 \n\t"
116 "tst r6, #0x100 \n\t"
117 "movne r6, r5, lsr #24 \n\t"
118 "tst r8, #0x100 \n\t"
119 "movne r8, r7, lsr #24 \n\t"
120 "orr r9, r9, r6, lsl #16 \n\t"
121 "ldr r4, [%1, #4] \n\t" /* moved form [B] */
122 "orr r9, r9, r8, lsl #24 \n\t"
123 /* store dest */
124 "ldrsh r5, [%0, #8] \n\t" /* moved form [C] */
125 "str r9, [%1] \n\t"
126
127 /* load dest */
128 /* [B] */
129 /* block[4] and block[5] */
130 /* [C] */
131 "ldrsh r7, [%0, #10] \n\t"
132 "and r6, r4, #0xFF \n\t"
133 "and r8, r4, #0xFF00 \n\t"
134 "add r6, r5, r6 \n\t"
135 "add r8, r7, r8, lsr #8 \n\t"
136 "mvn r5, r5 \n\t"
137 "mvn r7, r7 \n\t"
138 "tst r6, #0x100 \n\t"
139 "movne r6, r5, lsr #24 \n\t"
140 "tst r8, #0x100 \n\t"
141 "movne r8, r7, lsr #24 \n\t"
142 "mov r9, r6 \n\t"
143 "ldrsh r5, [%0, #12] \n\t" /* moved from [D] */
144 "orr r9, r9, r8, lsl #8 \n\t"
145 /* block[6] and block[7] */
146 /* [D] */
147 "ldrsh r7, [%0, #14] \n\t"
148 "and r6, r4, #0xFF0000 \n\t"
149 "and r8, r4, #0xFF000000 \n\t"
150 "add r6, r5, r6, lsr #16 \n\t"
151 "add r8, r7, r8, lsr #24 \n\t"
152 "mvn r5, r5 \n\t"
153 "mvn r7, r7 \n\t"
154 "tst r6, #0x100 \n\t"
155 "movne r6, r5, lsr #24 \n\t"
156 "tst r8, #0x100 \n\t"
157 "movne r8, r7, lsr #24 \n\t"
158 "orr r9, r9, r6, lsl #16 \n\t"
159 "add %0, %0, #16 \n\t" /* moved from [E] */
160 "orr r9, r9, r8, lsl #24 \n\t"
161 "subs r10, r10, #1 \n\t" /* moved from [F] */
162 /* store dest */
163 "str r9, [%1, #4] \n\t"
164
165 /* [E] */
166 /* [F] */
167 "add %1, %1, %2 \n\t"
168 "bne 1b \n\t"
169 :
170 : "r"(block),
171 "r"(dest),
172 "r"(line_size)
173 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
174}
175
b0368839
MN
176/* XXX: those functions should be suppressed ASAP when all IDCTs are
177 converted */
bd7d1ea7 178static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
b0368839
MN
179{
180 j_rev_dct_ARM (block);
181 ff_put_pixels_clamped(block, dest, line_size);
182}
bd7d1ea7 183static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
b0368839
MN
184{
185 j_rev_dct_ARM (block);
186 ff_add_pixels_clamped(block, dest, line_size);
187}
bd7d1ea7
AB
188static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
189{
190 simple_idct_ARM (block);
191 ff_put_pixels_clamped(block, dest, line_size);
192}
193static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
194{
195 simple_idct_ARM (block);
196 ff_add_pixels_clamped(block, dest, line_size);
197}
6ad1fa5a
BR
198static void simple_idct_ipp(DCTELEM *block)
199{
200#ifdef HAVE_IPP
201 ippiDCT8x8Inv_Video_16s_C1I(block);
202#endif
203}
204static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block)
205{
206#ifdef HAVE_IPP
207 ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size);
208#endif
209}
210
211#ifdef HAVE_IWMMXT
212void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size);
213#endif
214
215static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
216{
217#ifdef HAVE_IPP
218 ippiDCT8x8Inv_Video_16s_C1I(block);
219#ifdef HAVE_IWMMXT
220 add_pixels_clamped_iwmmxt(block, dest, line_size);
221#else
222 add_pixels_clamped_ARM(block, dest, line_size);
223#endif
224#endif
225}
b0368839
MN
226
227void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
92651f67 228{
b0368839
MN
229 const int idct_algo= avctx->idct_algo;
230
231 ff_put_pixels_clamped = c->put_pixels_clamped;
232 ff_add_pixels_clamped = c->add_pixels_clamped;
233
6ad1fa5a
BR
234#ifdef HAVE_IPP
235 if(idct_algo==FF_IDCT_ARM){
236#else
b0368839 237 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_ARM){
6ad1fa5a 238#endif
bd7d1ea7
AB
239 c->idct_put= j_rev_dct_ARM_put;
240 c->idct_add= j_rev_dct_ARM_add;
241 c->idct = j_rev_dct_ARM;
b0368839 242 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
bd7d1ea7
AB
243 } else if (idct_algo==FF_IDCT_SIMPLEARM){
244 c->idct_put= simple_idct_ARM_put;
245 c->idct_add= simple_idct_ARM_add;
246 c->idct = simple_idct_ARM;
247 c->idct_permutation_type= FF_NO_IDCT_PERM;
6ad1fa5a
BR
248#ifdef HAVE_IPP
249 } else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){
250#else
251 } else if (idct_algo==FF_IDCT_IPP){
252#endif
253 c->idct_put= simple_idct_ipp_put;
254 c->idct_add= simple_idct_ipp_add;
255 c->idct = simple_idct_ipp;
256 c->idct_permutation_type= FF_NO_IDCT_PERM;
b0368839 257 }
6ad1fa5a
BR
258
259/* c->put_pixels_tab[0][0] = put_pixels16_arm; */ // NG!
260 c->put_pixels_tab[0][1] = put_pixels16_x2_arm; //OK!
261 c->put_pixels_tab[0][2] = put_pixels16_y2_arm; //OK!
262/* c->put_pixels_tab[0][3] = put_pixels16_xy2_arm; /\* NG *\/ */
263