9736d569f09457c8d3d3a28123bb3da11339771d
[libav.git] / libavcodec / bfin / dsputil_bfin.c
1 /*
2 * BlackFin DSPUTILS
3 *
4 * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * Copyright (c) 2006 Michael Benjamin <michael.benjamin@analog.com>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include <unistd.h>
25 #include <bits/bfin_sram.h>
26 #include "avcodec.h"
27 #include "dsputil.h"
28
29 #define USE_L1CODE
30
31 #ifdef USE_L1CODE
32 #define L1CODE __attribute__ ((l1_text))
33 #else
34 #define L1CODE
35 #endif
36 int off;
37
38
39 extern void ff_bfin_idct (DCTELEM *block) L1CODE;
40 extern void ff_bfin_fdct (DCTELEM *block) L1CODE;
41 extern void ff_bfin_add_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) L1CODE;
42 extern void ff_bfin_put_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) L1CODE;
43 extern void ff_bfin_diff_pixels (DCTELEM *block, uint8_t *s1, uint8_t *s2, int stride) L1CODE;
44 extern void ff_bfin_get_pixels (DCTELEM *restrict block, const uint8_t *pixels, int line_size) L1CODE;
45 extern int ff_bfin_pix_norm1 (uint8_t * pix, int line_size) L1CODE;
46 extern int ff_bfin_z_sad8x8 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) L1CODE;
47 extern int ff_bfin_z_sad16x16 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) L1CODE;
48
49 extern void ff_bfin_z_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) L1CODE;
50 extern void ff_bfin_z_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) L1CODE;
51 extern void ff_bfin_put_pixels16_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) L1CODE;
52 extern void ff_bfin_put_pixels8_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) L1CODE;
53
54
55 extern int ff_bfin_pix_sum (uint8_t *p, int stride) L1CODE;
56
57 extern void ff_bfin_put_pixels8uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) L1CODE;
58 extern void ff_bfin_put_pixels16uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) L1CODE;
59 extern void ff_bfin_put_pixels8uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) L1CODE;
60 extern void ff_bfin_put_pixels16uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) L1CODE;
61
62 extern int ff_bfin_sse4 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) L1CODE;
63 extern int ff_bfin_sse8 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) L1CODE;
64 extern int ff_bfin_sse16 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) L1CODE;
65
66
67 static void bfin_idct_add (uint8_t *dest, int line_size, DCTELEM *block)
68 {
69 ff_bfin_idct (block);
70 ff_bfin_add_pixels_clamped (block, dest, line_size);
71 }
72
73 static void bfin_idct_put (uint8_t *dest, int line_size, DCTELEM *block)
74 {
75 ff_bfin_idct (block);
76 ff_bfin_put_pixels_clamped (block, dest, line_size);
77 }
78
79
80 static void bfin_clear_blocks (DCTELEM *blocks)
81 {
82 // This is just a simple memset.
83 //
84 asm("P0=192; "
85 "I0=%0; "
86 "R0=0; "
87 "LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;"
88 "clear_blocks_blkfn_lab:"
89 "[I0++]=R0;"
90 ::"a" (blocks):"P0","I0","R0");
91 }
92
93
94
95 static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
96 {
97 ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h);
98 }
99
100 static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
101 {
102 ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h);
103 }
104
105 static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
106 {
107 ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h);
108 }
109
110 static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h)
111 {
112 ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h);
113 }
114
115 static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
116 {
117 ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h);
118 }
119
120 static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
121 {
122 ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h);
123 }
124
125 static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
126 {
127 ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h);
128 }
129
130 static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h)
131 {
132 ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h);
133 }
134
135 void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
136 {
137 ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h);
138 }
139
140 static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
141 {
142 ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h);
143 }
144
145 static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
146 {
147 ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h);
148 }
149
150
151 void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
152 {
153 ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h);
154 }
155
156 static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
157 {
158 ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h);
159 }
160
161 static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
162 {
163 ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h);
164 }
165
166 static int bfin_pix_abs16 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
167 {
168 return ff_bfin_z_sad16x16 (blk1,blk2,line_size,line_size,h);
169 }
170
171 static uint8_t vtmp_blk[256] __attribute__((l1_data_B));
172
173 static int bfin_pix_abs16_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
174 {
175 ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+1, 16, line_size, h);
176 return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h);
177 }
178
179 static int bfin_pix_abs16_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
180 {
181 ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+line_size, 16, line_size, h);
182 return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h);
183 }
184
185 static int bfin_pix_abs16_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
186 {
187 ff_bfin_z_put_pixels16_xy2 (vtmp_blk, blk2, 16, line_size, h);
188 return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h);
189 }
190
191 static int bfin_pix_abs8 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
192 {
193 return ff_bfin_z_sad8x8 (blk1,blk2,line_size,line_size, h);
194 }
195
196 static int bfin_pix_abs8_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
197 {
198 ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+1, 8, line_size, h);
199 return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h);
200 }
201
202 static int bfin_pix_abs8_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
203 {
204 ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+line_size, 8, line_size, h);
205 return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h);
206 }
207
208 static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
209 {
210 ff_bfin_z_put_pixels8_xy2 (vtmp_blk, blk2, 8, line_size, h);
211 return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h);
212 }
213
214
215 /*
216 decoder optimization
217 start on 2/11 100 frames of 352x240@25 compiled with no optimization -g debugging
218 9.824s ~ 2.44x off
219 6.360s ~ 1.58x off with -O2
220 5.740s ~ 1.43x off with idcts
221
222 2.64s 2/20 same sman.mp4 decode only
223
224 */
225
226 void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
227 {
228 c->get_pixels = ff_bfin_get_pixels;
229 c->diff_pixels = ff_bfin_diff_pixels;
230 c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
231 c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
232
233 c->clear_blocks = bfin_clear_blocks;
234 c->pix_sum = ff_bfin_pix_sum;
235 c->pix_norm1 = ff_bfin_pix_norm1;
236
237 c->sad[0] = bfin_pix_abs16;
238 c->sad[1] = bfin_pix_abs8;
239
240 /* TODO [0] 16 [1] 8 */
241 c->pix_abs[0][0] = bfin_pix_abs16;
242 c->pix_abs[0][1] = bfin_pix_abs16_x2;
243 c->pix_abs[0][2] = bfin_pix_abs16_y2;
244 c->pix_abs[0][3] = bfin_pix_abs16_xy2;
245
246 c->pix_abs[1][0] = bfin_pix_abs8;
247 c->pix_abs[1][1] = bfin_pix_abs8_x2;
248 c->pix_abs[1][2] = bfin_pix_abs8_y2;
249 c->pix_abs[1][3] = bfin_pix_abs8_xy2;
250
251
252 c->sse[0] = ff_bfin_sse16;
253 c->sse[1] = ff_bfin_sse8;
254 c->sse[2] = ff_bfin_sse4;
255
256
257 /**
258 * Halfpel motion compensation with rounding (a+b+1)>>1.
259 * This is an array[4][4] of motion compensation functions for 4
260 * horizontal blocksizes (8,16) and the 4 halfpel positions
261 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
262 * @param block destination where the result is stored
263 * @param pixels source
264 * @param line_size number of bytes in a horizontal line of block
265 * @param h height
266 */
267
268 c->put_pixels_tab[0][0] = bfin_put_pixels16;
269 c->put_pixels_tab[0][1] = bfin_put_pixels16_x2;
270 c->put_pixels_tab[0][2] = bfin_put_pixels16_y2;
271 c->put_pixels_tab[0][3] = bfin_put_pixels16_xy2;
272
273 c->put_pixels_tab[1][0] = bfin_put_pixels8;
274 c->put_pixels_tab[1][1] = bfin_put_pixels8_x2;
275 c->put_pixels_tab[1][2] = bfin_put_pixels8_y2;
276 c->put_pixels_tab[1][3] = bfin_put_pixels8_xy2;
277
278 c->put_no_rnd_pixels_tab[1][0] = bfin_put_pixels8_nornd;
279 c->put_no_rnd_pixels_tab[1][1] = bfin_put_pixels8_x2_nornd;
280 c->put_no_rnd_pixels_tab[1][2] = bfin_put_pixels8_y2_nornd;
281 c->put_no_rnd_pixels_tab[1][3] = ff_bfin_put_pixels8_xy2_nornd;
282
283 c->put_no_rnd_pixels_tab[0][0] = bfin_put_pixels16_nornd;
284 c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd;
285 c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd;
286 c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd;
287
288 c->idct_permutation_type = FF_NO_IDCT_PERM;
289 c->fdct = ff_bfin_fdct;
290 c->idct = ff_bfin_idct;
291 c->idct_add = bfin_idct_add;
292 c->idct_put = bfin_idct_put;
293 }
294
295
296