374309e37834a087290f8544777df7052efde991
[libav.git] / libavcodec / ppc / dsputil_ppc.c
1 /*
2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20 #include "../dsputil.h"
21
22 #include "dsputil_ppc.h"
23
24 #ifdef HAVE_ALTIVEC
25 #include "dsputil_altivec.h"
26 #endif
27
28 extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
29 extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
30
31 int mm_flags = 0;
32
33 int mm_support(void)
34 {
35 int result = 0;
36 #if HAVE_ALTIVEC
37 if (has_altivec()) {
38 result |= MM_ALTIVEC;
39 }
40 #endif /* result */
41 return result;
42 }
43
44 #ifdef POWERPC_TBL_PERFORMANCE_REPORT
45 unsigned long long perfdata[powerpc_perf_total][powerpc_data_total];
46 /* list below must match enum in dsputil_ppc.h */
47 static unsigned char* perfname[] = {
48 "fft_calc_altivec",
49 "gmc1_altivec",
50 "dct_unquantize_h263_altivec",
51 "idct_add_altivec",
52 "idct_put_altivec",
53 "put_pixels16_altivec",
54 "avg_pixels16_altivec",
55 "avg_pixels8_altivec",
56 "put_pixels8_xy2_altivec",
57 "put_no_rnd_pixels8_xy2_altivec",
58 "put_pixels16_xy2_altivec",
59 "put_no_rnd_pixels16_xy2_altivec",
60 "clear_blocks_dcbz32_ppc"
61 };
62 #ifdef POWERPC_PERF_USE_PMC
63 unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total];
64 #endif
65 #include <stdio.h>
66 #endif
67
68 #ifdef POWERPC_TBL_PERFORMANCE_REPORT
69 void powerpc_display_perf_report(void)
70 {
71 int i;
72 #ifndef POWERPC_PERF_USE_PMC
73 fprintf(stderr, "PowerPC performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n");
74 #else /* POWERPC_PERF_USE_PMC */
75 fprintf(stderr, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
76 #endif /* POWERPC_PERF_USE_PMC */
77 for(i = 0 ; i < powerpc_perf_total ; i++)
78 {
79 if (perfdata[i][powerpc_data_num] != (unsigned long long)0)
80 fprintf(stderr, " Function \"%s\" (pmc1):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
81 perfname[i],
82 perfdata[i][powerpc_data_min],
83 perfdata[i][powerpc_data_max],
84 (double)perfdata[i][powerpc_data_sum] /
85 (double)perfdata[i][powerpc_data_num],
86 perfdata[i][powerpc_data_num]);
87 #ifdef POWERPC_PERF_USE_PMC
88 if (perfdata_miss[i][powerpc_data_num] != (unsigned long long)0)
89 fprintf(stderr, " Function \"%s\" (pmc2):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
90 perfname[i],
91 perfdata_miss[i][powerpc_data_min],
92 perfdata_miss[i][powerpc_data_max],
93 (double)perfdata_miss[i][powerpc_data_sum] /
94 (double)perfdata_miss[i][powerpc_data_num],
95 perfdata_miss[i][powerpc_data_num]);
96 #endif
97 }
98 }
99 #endif /* POWERPC_TBL_PERFORMANCE_REPORT */
100
101 /* ***** WARNING ***** WARNING ***** WARNING ***** */
102 /*
103 clear_blocks_dcbz32_ppc will not work properly
104 on PowerPC processors with a cache line size
105 not equal to 32 bytes.
106 Fortunately all processor used by Apple up to
107 at least the 7450 (aka second generation G4)
108 use 32 bytes cache line.
109 This is due to the use of the 'dcbz' instruction.
110 It simply clear to zero a single cache line,
111 so you need to know the cache line size to use it !
112 It's absurd, but it's fast...
113 */
114 void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
115 {
116 POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz32, 1);
117 register int misal = ((unsigned long)blocks & 0x00000010);
118 register int i = 0;
119 POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
120 #if 1
121 if (misal) {
122 ((unsigned long*)blocks)[0] = 0L;
123 ((unsigned long*)blocks)[1] = 0L;
124 ((unsigned long*)blocks)[2] = 0L;
125 ((unsigned long*)blocks)[3] = 0L;
126 i += 16;
127 }
128 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) {
129 asm volatile("dcbz %0,%1" : : "r" (blocks), "r" (i) : "memory");
130 }
131 if (misal) {
132 ((unsigned long*)blocks)[188] = 0L;
133 ((unsigned long*)blocks)[189] = 0L;
134 ((unsigned long*)blocks)[190] = 0L;
135 ((unsigned long*)blocks)[191] = 0L;
136 i += 16;
137 }
138 #else
139 memset(blocks, 0, sizeof(DCTELEM)*6*64);
140 #endif
141 POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
142 }
143
144 /* check dcbz report how many bytes are set to 0 by dcbz */
145 long check_dcbz_effect(void)
146 {
147 register char *fakedata = (char*)av_malloc(1024);
148 register char *fakedata_middle;
149 register long zero = 0;
150 register long i = 0;
151 long count = 0;
152
153 if (!fakedata)
154 {
155 return 0L;
156 }
157
158 fakedata_middle = (fakedata + 512);
159
160 memset(fakedata, 0xFF, 1024);
161
162 asm volatile("dcbz %0, %1" : : "r" (fakedata_middle), "r" (zero));
163
164 for (i = 0; i < 1024 ; i ++)
165 {
166 if (fakedata[i] == (char)0)
167 count++;
168 }
169
170 av_free(fakedata);
171
172 return count;
173 }
174
175 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
176 {
177 // Common optimisations whether Altivec or not
178
179 switch (check_dcbz_effect()) {
180 case 32:
181 c->clear_blocks = clear_blocks_dcbz32_ppc;
182 break;
183 default:
184 break;
185 }
186
187 #if HAVE_ALTIVEC
188 if (has_altivec()) {
189 mm_flags |= MM_ALTIVEC;
190
191 // Altivec specific optimisations
192 c->pix_abs16x16_x2 = pix_abs16x16_x2_altivec;
193 c->pix_abs16x16_y2 = pix_abs16x16_y2_altivec;
194 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_altivec;
195 c->pix_abs16x16 = pix_abs16x16_altivec;
196 c->pix_abs8x8 = pix_abs8x8_altivec;
197 c->sad[0]= sad16x16_altivec;
198 c->sad[1]= sad8x8_altivec;
199 c->pix_norm1 = pix_norm1_altivec;
200 c->sse[1]= sse8_altivec;
201 c->sse[0]= sse16_altivec;
202 c->pix_sum = pix_sum_altivec;
203 c->diff_pixels = diff_pixels_altivec;
204 c->get_pixels = get_pixels_altivec;
205 // next one disabled as it's untested.
206 #if 0
207 c->add_bytes= add_bytes_altivec;
208 #endif /* 0 */
209 c->put_pixels_tab[0][0] = put_pixels16_altivec;
210 c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
211 // next one disabled as it's untested.
212 #if 0
213 c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
214 #endif /* 0 */
215 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
216 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
217 c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
218 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
219
220 c->gmc1 = gmc1_altivec;
221
222 if ((avctx->idct_algo == FF_IDCT_AUTO) ||
223 (avctx->idct_algo == FF_IDCT_ALTIVEC))
224 {
225 c->idct_put = idct_put_altivec;
226 c->idct_add = idct_add_altivec;
227 #ifndef ALTIVEC_USE_REFERENCE_C_CODE
228 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
229 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
230 c->idct_permutation_type = FF_NO_IDCT_PERM;
231 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
232 }
233
234 #ifdef POWERPC_TBL_PERFORMANCE_REPORT
235 {
236 int i;
237 for (i = 0 ; i < powerpc_perf_total ; i++)
238 {
239 perfdata[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
240 perfdata[i][powerpc_data_max] = 0x0000000000000000;
241 perfdata[i][powerpc_data_sum] = 0x0000000000000000;
242 perfdata[i][powerpc_data_num] = 0x0000000000000000;
243 #ifdef POWERPC_PERF_USE_PMC
244 perfdata_miss[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
245 perfdata_miss[i][powerpc_data_max] = 0x0000000000000000;
246 perfdata_miss[i][powerpc_data_sum] = 0x0000000000000000;
247 perfdata_miss[i][powerpc_data_num] = 0x0000000000000000;
248 #endif /* POWERPC_PERF_USE_PMC */
249 }
250 }
251 #endif /* POWERPC_TBL_PERFORMANCE_REPORT */
252 } else
253 #endif /* HAVE_ALTIVEC */
254 {
255 // Non-AltiVec PPC optimisations
256
257 // ... pending ...
258 }
259 }