renaming L1CODE to attribute_l1_text, which is defined in dsputil_bfin.h
[libav.git] / libavcodec / bfin / dsputil_bfin.c
1 /*
2 * BlackFin DSPUTILS
3 *
4 * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * Copyright (c) 2006 Michael Benjamin <michael.benjamin@analog.com>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include <unistd.h>
25 #include <bits/bfin_sram.h>
26 #include "avcodec.h"
27 #include "dsputil.h"
28 #include "dsputil_bfin.h"
29
30 int off;
31
32
33 extern void ff_bfin_idct (DCTELEM *block) attribute_l1_text;
34 extern void ff_bfin_fdct (DCTELEM *block) attribute_l1_text;
35 extern void ff_bfin_add_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
36 extern void ff_bfin_put_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
37 extern void ff_bfin_diff_pixels (DCTELEM *block, uint8_t *s1, uint8_t *s2, int stride) attribute_l1_text;
38 extern void ff_bfin_get_pixels (DCTELEM *restrict block, const uint8_t *pixels, int line_size) attribute_l1_text;
39 extern int ff_bfin_pix_norm1 (uint8_t * pix, int line_size) attribute_l1_text;
40 extern int ff_bfin_z_sad8x8 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) attribute_l1_text;
41 extern int ff_bfin_z_sad16x16 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) attribute_l1_text;
42
43 extern void ff_bfin_z_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) attribute_l1_text;
44 extern void ff_bfin_z_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) attribute_l1_text;
45 extern void ff_bfin_put_pixels16_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) attribute_l1_text;
46 extern void ff_bfin_put_pixels8_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) attribute_l1_text;
47
48
49 extern int ff_bfin_pix_sum (uint8_t *p, int stride) attribute_l1_text;
50
51 extern void ff_bfin_put_pixels8uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) attribute_l1_text;
52 extern void ff_bfin_put_pixels16uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) attribute_l1_text;
53 extern void ff_bfin_put_pixels8uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) attribute_l1_text;
54 extern void ff_bfin_put_pixels16uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) attribute_l1_text;
55
56 extern int ff_bfin_sse4 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text;
57 extern int ff_bfin_sse8 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text;
58 extern int ff_bfin_sse16 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text;
59
60
61 static void bfin_idct_add (uint8_t *dest, int line_size, DCTELEM *block)
62 {
63 ff_bfin_idct (block);
64 ff_bfin_add_pixels_clamped (block, dest, line_size);
65 }
66
67 static void bfin_idct_put (uint8_t *dest, int line_size, DCTELEM *block)
68 {
69 ff_bfin_idct (block);
70 ff_bfin_put_pixels_clamped (block, dest, line_size);
71 }
72
73
74 static void bfin_clear_blocks (DCTELEM *blocks)
75 {
76 // This is just a simple memset.
77 //
78 asm("P0=192; "
79 "I0=%0; "
80 "R0=0; "
81 "LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;"
82 "clear_blocks_blkfn_lab:"
83 "[I0++]=R0;"
84 ::"a" (blocks):"P0","I0","R0");
85 }
86
87
88
89 static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
90 {
91 ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h);
92 }
93
94 static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
95 {
96 ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h);
97 }
98
99 static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
100 {
101 ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h);
102 }
103
104 static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h)
105 {
106 ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h);
107 }
108
109 static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
110 {
111 ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h);
112 }
113
114 static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
115 {
116 ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h);
117 }
118
119 static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
120 {
121 ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h);
122 }
123
124 static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h)
125 {
126 ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h);
127 }
128
129 void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
130 {
131 ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h);
132 }
133
134 static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
135 {
136 ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h);
137 }
138
139 static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
140 {
141 ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h);
142 }
143
144
145 void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
146 {
147 ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h);
148 }
149
150 static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
151 {
152 ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h);
153 }
154
155 static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
156 {
157 ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h);
158 }
159
160 static int bfin_pix_abs16 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
161 {
162 return ff_bfin_z_sad16x16 (blk1,blk2,line_size,line_size,h);
163 }
164
165 static uint8_t vtmp_blk[256] __attribute__((l1_data_B));
166
167 static int bfin_pix_abs16_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
168 {
169 ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+1, 16, line_size, h);
170 return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h);
171 }
172
173 static int bfin_pix_abs16_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
174 {
175 ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+line_size, 16, line_size, h);
176 return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h);
177 }
178
179 static int bfin_pix_abs16_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
180 {
181 ff_bfin_z_put_pixels16_xy2 (vtmp_blk, blk2, 16, line_size, h);
182 return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h);
183 }
184
185 static int bfin_pix_abs8 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
186 {
187 return ff_bfin_z_sad8x8 (blk1,blk2,line_size,line_size, h);
188 }
189
190 static int bfin_pix_abs8_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
191 {
192 ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+1, 8, line_size, h);
193 return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h);
194 }
195
196 static int bfin_pix_abs8_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
197 {
198 ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+line_size, 8, line_size, h);
199 return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h);
200 }
201
202 static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
203 {
204 ff_bfin_z_put_pixels8_xy2 (vtmp_blk, blk2, 8, line_size, h);
205 return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h);
206 }
207
208
209 /*
210 decoder optimization
211 start on 2/11 100 frames of 352x240@25 compiled with no optimization -g debugging
212 9.824s ~ 2.44x off
213 6.360s ~ 1.58x off with -O2
214 5.740s ~ 1.43x off with idcts
215
216 2.64s 2/20 same sman.mp4 decode only
217
218 */
219
220 void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
221 {
222 c->get_pixels = ff_bfin_get_pixels;
223 c->diff_pixels = ff_bfin_diff_pixels;
224 c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
225 c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
226
227 c->clear_blocks = bfin_clear_blocks;
228 c->pix_sum = ff_bfin_pix_sum;
229 c->pix_norm1 = ff_bfin_pix_norm1;
230
231 c->sad[0] = bfin_pix_abs16;
232 c->sad[1] = bfin_pix_abs8;
233
234 /* TODO [0] 16 [1] 8 */
235 c->pix_abs[0][0] = bfin_pix_abs16;
236 c->pix_abs[0][1] = bfin_pix_abs16_x2;
237 c->pix_abs[0][2] = bfin_pix_abs16_y2;
238 c->pix_abs[0][3] = bfin_pix_abs16_xy2;
239
240 c->pix_abs[1][0] = bfin_pix_abs8;
241 c->pix_abs[1][1] = bfin_pix_abs8_x2;
242 c->pix_abs[1][2] = bfin_pix_abs8_y2;
243 c->pix_abs[1][3] = bfin_pix_abs8_xy2;
244
245
246 c->sse[0] = ff_bfin_sse16;
247 c->sse[1] = ff_bfin_sse8;
248 c->sse[2] = ff_bfin_sse4;
249
250
251 /**
252 * Halfpel motion compensation with rounding (a+b+1)>>1.
253 * This is an array[4][4] of motion compensation functions for 4
254 * horizontal blocksizes (8,16) and the 4 halfpel positions
255 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
256 * @param block destination where the result is stored
257 * @param pixels source
258 * @param line_size number of bytes in a horizontal line of block
259 * @param h height
260 */
261
262 c->put_pixels_tab[0][0] = bfin_put_pixels16;
263 c->put_pixels_tab[0][1] = bfin_put_pixels16_x2;
264 c->put_pixels_tab[0][2] = bfin_put_pixels16_y2;
265 c->put_pixels_tab[0][3] = bfin_put_pixels16_xy2;
266
267 c->put_pixels_tab[1][0] = bfin_put_pixels8;
268 c->put_pixels_tab[1][1] = bfin_put_pixels8_x2;
269 c->put_pixels_tab[1][2] = bfin_put_pixels8_y2;
270 c->put_pixels_tab[1][3] = bfin_put_pixels8_xy2;
271
272 c->put_no_rnd_pixels_tab[1][0] = bfin_put_pixels8_nornd;
273 c->put_no_rnd_pixels_tab[1][1] = bfin_put_pixels8_x2_nornd;
274 c->put_no_rnd_pixels_tab[1][2] = bfin_put_pixels8_y2_nornd;
275 c->put_no_rnd_pixels_tab[1][3] = ff_bfin_put_pixels8_xy2_nornd;
276
277 c->put_no_rnd_pixels_tab[0][0] = bfin_put_pixels16_nornd;
278 c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd;
279 c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd;
280 c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd;
281
282 c->idct_permutation_type = FF_NO_IDCT_PERM;
283 c->fdct = ff_bfin_fdct;
284 c->idct = ff_bfin_idct;
285 c->idct_add = bfin_idct_add;
286 c->idct_put = bfin_idct_put;
287 }
288
289
290