we really shouldnt use M$* as default codec -> use MPEG4 as default
[libav.git] / libavcodec / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
9858f773 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3057fa66
A
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
b304569a
MN
19/**
20 * @file postprocess.c
21 * postprocessing.
22 */
23
3057fa66 24/*
3b58b885 25 C MMX MMX2 3DNow
3057fa66
A
26isVertDC Ec Ec
27isVertMinMaxOk Ec Ec
3b58b885 28doVertLowPass E e e
7f16f6e6 29doVertDefFilter Ec Ec e e
3057fa66 30isHorizDC Ec Ec
4e4dcbc5
MN
31isHorizMinMaxOk a E
32doHorizLowPass E e e
7f16f6e6 33doHorizDefFilter Ec Ec e e
2e212618 34deRing E e e*
3b58b885 35Vertical RKAlgo1 E a a
e5c30e06 36Horizontal RKAlgo1 a a
117e45b0
MN
37Vertical X1# a E E
38Horizontal X1# a E E
acced553
MN
39LinIpolDeinterlace e E E*
40CubicIpolDeinterlace a e e*
41LinBlendDeinterlace e E E*
9b1663fc 42MedianDeinterlace# E Ec Ec
be44a4d7 43TempDeNoiser# E e e
d5a1a995 44
117e45b0
MN
45* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
46# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 47E = Exact implementation
acced553 48e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
49a = alternative / approximate impl
50c = checked against the other implementations (-vo md5)
51*/
52
53/*
54TODO:
3057fa66 55reduce the time wasted on the mem transfer
3057fa66 56unroll stuff if instructions depend too much on the prior one
3057fa66 57move YScale thing to the end instead of fixing QP
13e00528 58write a faster and higher quality deblocking filter :)
d5a1a995
MN
59make the mainloop more flexible (variable number of blocks at once
60 (the if/else stuff per block is slowing things down)
9f45d04d 61compare the quality & speed of all filters
9f45d04d 62split this huge file
8405b3fd 63optimize c versions
117e45b0 64try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 65...
13e00528
A
66*/
67
a6be8111 68//Changelog: use the CVS log
3057fa66 69
9858f773 70#include "config.h"
3057fa66
A
71#include <inttypes.h>
72#include <stdio.h>
d5a1a995 73#include <stdlib.h>
911879d1 74#include <string.h>
dda87e9f
PL
75#ifdef HAVE_MALLOC_H
76#include <malloc.h>
77#endif
3057fa66 78//#undef HAVE_MMX2
13e00528 79//#define HAVE_3DNOW
3057fa66 80//#undef HAVE_MMX
cc9b0679 81//#undef ARCH_X86
7f16f6e6 82//#define DEBUG_BRIGHTNESS
bba9b16c
MN
83#ifdef USE_FASTMEMCPY
84#include "libvo/fastmemcpy.h"
70d4f2da 85#endif
13e00528 86#include "postprocess.h"
c41d972d 87#include "postprocess_internal.h"
bba9b16c
MN
88
89#include "mangle.h" //FIXME should be supressed
3057fa66 90
ca390e72
ZK
91#ifndef HAVE_MEMALIGN
92#define memalign(a,b) malloc(b)
93#endif
94
e939e1c3
A
95#define MIN(a,b) ((a) > (b) ? (b) : (a))
96#define MAX(a,b) ((a) < (b) ? (b) : (a))
97#define ABS(a) ((a) > 0 ? (a) : (-(a)))
98#define SIGN(a) ((a) > 0 ? 1 : -1)
99
911879d1
MN
100#define GET_MODE_BUFFER_SIZE 500
101#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
102#define BLOCK_SIZE 8
103#define TEMP_STRIDE 8
104//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 105
cc9b0679 106#ifdef ARCH_X86
b28daef8
MN
107static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL;
108static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL;
b28daef8
MN
109static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL;
110static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL;
111static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL;
b28daef8 112static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL;
b28daef8 113static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL;
b28daef8 114#endif
3057fa66 115
134eb1e5
MN
116
117static uint8_t clip_table[3*256];
118static uint8_t * const clip_tab= clip_table + 256;
119
df8d4d0e 120static int verbose= 0;
45b4f285 121
df8d4d0e 122static const int deringThreshold= 20;
3057fa66 123
9c9e467d 124
911879d1
MN
125static struct PPFilter filters[]=
126{
127 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
128 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
9c9e467d
MN
129/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
130 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
911879d1
MN
131 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
132 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
133 {"dr", "dering", 1, 5, 6, DERING},
134 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
43d52f76
MN
135 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
136 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
137 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
138 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
9c9e467d 139 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
134eb1e5 140 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
117e45b0 141 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
8aaac435 142 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
911879d1
MN
143 {NULL, NULL,0,0,0,0} //End Marker
144};
145
146static char *replaceTable[]=
147{
117e45b0
MN
148 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
149 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
150 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
151 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
911879d1
MN
152 NULL //End Marker
153};
154
9c9e467d 155#ifdef ARCH_X86
e5c30e06
MN
156static inline void unusedVariableWarningFixer()
157{
9c9e467d 158 if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
e5c30e06 159}
b28daef8 160#endif
e5c30e06 161
3057fa66 162
9c9e467d 163#ifdef ARCH_X86
3057fa66
A
164static inline void prefetchnta(void *p)
165{
166 asm volatile( "prefetchnta (%0)\n\t"
167 : : "r" (p)
168 );
169}
170
171static inline void prefetcht0(void *p)
172{
173 asm volatile( "prefetcht0 (%0)\n\t"
174 : : "r" (p)
175 );
176}
177
178static inline void prefetcht1(void *p)
179{
180 asm volatile( "prefetcht1 (%0)\n\t"
181 : : "r" (p)
182 );
183}
184
185static inline void prefetcht2(void *p)
186{
187 asm volatile( "prefetcht2 (%0)\n\t"
188 : : "r" (p)
189 );
190}
9a722af7 191#endif
3057fa66 192
cc9b0679 193// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 194
cf5ec61d
MN
195/**
196 * Check if the given 8x8 Block is mostly "flat"
197 */
9c9e467d 198static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
cf5ec61d
MN
199{
200 int numEq= 0;
201 int y;
ec487e5d
MN
202 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
203 const int dcThreshold= dcOffset*2 + 1;
cf5ec61d
MN
204 for(y=0; y<BLOCK_SIZE; y++)
205 {
9c9e467d
MN
206 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
207 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
208 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
209 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
210 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
211 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
212 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
213 src+= stride;
214 }
215 return numEq > c->ppMode.flatnessThreshold;
216}
217
218/**
219 * Check if the middle 8x8 Block in the given 8x16 block is flat
220 */
221static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
222 int numEq= 0;
223 int y;
ec487e5d
MN
224 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
225 const int dcThreshold= dcOffset*2 + 1;
9c9e467d
MN
226 src+= stride*4; // src points to begin of the 8x8 Block
227 for(y=0; y<BLOCK_SIZE-1; y++)
228 {
229 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
230 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
231 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
232 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
233 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
cf5ec61d
MN
237 src+= stride;
238 }
9c9e467d 239 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
240}
241
242static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
243{
244 if(abs(src[0] - src[7]) > 2*QP) return 0;
245
246 return 1;
247}
248
249static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
250{
251 int y;
252 for(y=0; y<BLOCK_SIZE; y++)
253 {
254 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
255
256 if(ABS(middleEnergy) < 8*QP)
257 {
258 const int q=(dst[3] - dst[4])/2;
259 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
260 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
261
262 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
263 d= MAX(d, 0);
264
265 d= (5*d + 32) >> 6;
266 d*= SIGN(-middleEnergy);
267
268 if(q>0)
269 {
270 d= d<0 ? 0 : d;
271 d= d>q ? q : d;
272 }
273 else
274 {
275 d= d>0 ? 0 : d;
276 d= d<q ? q : d;
277 }
278
279 dst[3]-= d;
280 dst[4]+= d;
281 }
282 dst+= stride;
283 }
284}
285
286/**
287 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
288 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
289 */
290static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
291{
292
293 int y;
294 for(y=0; y<BLOCK_SIZE; y++)
295 {
296 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
297 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
298
299 int sums[9];
300 sums[0] = first + dst[0];
301 sums[1] = dst[0] + dst[1];
302 sums[2] = dst[1] + dst[2];
303 sums[3] = dst[2] + dst[3];
304 sums[4] = dst[3] + dst[4];
305 sums[5] = dst[4] + dst[5];
306 sums[6] = dst[5] + dst[6];
307 sums[7] = dst[6] + dst[7];
308 sums[8] = dst[7] + last;
309
310 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
311 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
312 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
313 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
314 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
315 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
316 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
317 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
318
319 dst+= stride;
320 }
321}
322
4e4dcbc5 323/**
cc9b0679
MN
324 * Experimental Filter 1 (Horizontal)
325 * will not damage linear gradients
326 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
327 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
328 * MMX2 version does correct clipping C version doesnt
329 * not identical with the vertical one
4e4dcbc5 330 */
cc9b0679
MN
331static inline void horizX1Filter(uint8_t *src, int stride, int QP)
332{
117e45b0 333 int y;
cc9b0679
MN
334 static uint64_t *lut= NULL;
335 if(lut==NULL)
117e45b0 336 {
cc9b0679
MN
337 int i;
338 lut= (uint64_t*)memalign(8, 256*8);
339 for(i=0; i<256; i++)
117e45b0 340 {
cc9b0679 341 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 342/*
cc9b0679
MN
343//Simulate 112242211 9-Tap filter
344 uint64_t a= (v/16) & 0xFF;
345 uint64_t b= (v/8) & 0xFF;
346 uint64_t c= (v/4) & 0xFF;
347 uint64_t d= (3*v/8) & 0xFF;
117e45b0 348*/
cc9b0679
MN
349//Simulate piecewise linear interpolation
350 uint64_t a= (v/16) & 0xFF;
351 uint64_t b= (v*3/16) & 0xFF;
352 uint64_t c= (v*5/16) & 0xFF;
353 uint64_t d= (7*v/16) & 0xFF;
354 uint64_t A= (0x100 - a)&0xFF;
355 uint64_t B= (0x100 - b)&0xFF;
356 uint64_t C= (0x100 - c)&0xFF;
357 uint64_t D= (0x100 - c)&0xFF;
358
359 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
360 (D<<24) | (C<<16) | (B<<8) | (A);
361 //lut[i] = (v<<32) | (v<<24);
117e45b0
MN
362 }
363 }
cc9b0679
MN
364
365 for(y=0; y<BLOCK_SIZE; y++)
117e45b0 366 {
cc9b0679
MN
367 int a= src[1] - src[2];
368 int b= src[3] - src[4];
369 int c= src[5] - src[6];
370
371 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
372
373 if(d < QP)
117e45b0 374 {
cc9b0679
MN
375 int v = d * SIGN(-b);
376
377 src[1] +=v/8;
378 src[2] +=v/4;
379 src[3] +=3*v/8;
380 src[4] -=3*v/8;
381 src[5] -=v/4;
382 src[6] -=v/8;
383
117e45b0 384 }
cc9b0679 385 src+=stride;
117e45b0 386 }
cc9b0679
MN
387}
388
389
e89952aa 390//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 391//Plain C versions
e89952aa
MN
392#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
393#define COMPILE_C
394#endif
395
9c9e467d 396#ifdef ARCH_X86
e89952aa
MN
397
398#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
399#define COMPILE_MMX
400#endif
401
402#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
403#define COMPILE_MMX2
404#endif
405
406#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
407#define COMPILE_3DNOW
408#endif
9c9e467d 409#endif //ARCH_X86
e89952aa
MN
410
411#undef HAVE_MMX
412#undef HAVE_MMX2
413#undef HAVE_3DNOW
414#undef ARCH_X86
415
416#ifdef COMPILE_C
cc9b0679
MN
417#undef HAVE_MMX
418#undef HAVE_MMX2
419#undef HAVE_3DNOW
420#undef ARCH_X86
421#define RENAME(a) a ## _C
422#include "postprocess_template.c"
e89952aa 423#endif
cc9b0679
MN
424
425//MMX versions
e89952aa 426#ifdef COMPILE_MMX
cc9b0679
MN
427#undef RENAME
428#define HAVE_MMX
429#undef HAVE_MMX2
430#undef HAVE_3DNOW
431#define ARCH_X86
432#define RENAME(a) a ## _MMX
433#include "postprocess_template.c"
e89952aa 434#endif
cc9b0679
MN
435
436//MMX2 versions
e89952aa 437#ifdef COMPILE_MMX2
cc9b0679
MN
438#undef RENAME
439#define HAVE_MMX
440#define HAVE_MMX2
441#undef HAVE_3DNOW
442#define ARCH_X86
443#define RENAME(a) a ## _MMX2
444#include "postprocess_template.c"
e89952aa 445#endif
cc9b0679
MN
446
447//3DNOW versions
e89952aa 448#ifdef COMPILE_3DNOW
cc9b0679
MN
449#undef RENAME
450#define HAVE_MMX
451#undef HAVE_MMX2
452#define HAVE_3DNOW
453#define ARCH_X86
454#define RENAME(a) a ## _3DNow
455#include "postprocess_template.c"
e89952aa 456#endif
cc9b0679
MN
457
458// minor note: the HAVE_xyz is messed up after that line so dont use it
459
460static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
c41d972d 461 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 462{
9c9e467d 463 PPContext *c= (PPContext *)vc;
c41d972d 464 PPMode *ppMode= (PPMode *)vm;
9c9e467d
MN
465 c->ppMode= *ppMode; //FIXME
466
cc9b0679
MN
467 // useing ifs here as they are faster than function pointers allthough the
468 // difference wouldnt be messureable here but its much better because
469 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 470#ifdef RUNTIME_CPUDETECT
9c9e467d 471#ifdef ARCH_X86
cc9b0679 472 // ordered per speed fasterst first
fa6ea14e 473 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
9c9e467d 474 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 475 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
9c9e467d 476 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 477 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
9c9e467d 478 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 479 else
9c9e467d 480 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 481#else
9c9e467d 482 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 483#endif
e89952aa
MN
484#else //RUNTIME_CPUDETECT
485#ifdef HAVE_MMX2
9c9e467d 486 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 487#elif defined (HAVE_3DNOW)
9c9e467d 488 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 489#elif defined (HAVE_MMX)
9c9e467d 490 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 491#else
9c9e467d 492 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
493#endif
494#endif //!RUNTIME_CPUDETECT
117e45b0
MN
495}
496
cc9b0679
MN
497//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
498// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 499
911879d1 500/* -pp Command line Help
911879d1 501*/
4407a3c4 502char *pp_help=
b01be121 503"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
4b001a13 504"long form example:\n"
b01be121 505"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
4b001a13 506"short form example:\n"
b01be121 507"vb:a/hb:a/lb de,-vb\n"
4b001a13 508"more examples:\n"
1d9324fd 509"tn:64:128:256\n"
4b001a13
MN
510"Filters Options\n"
511"short long name short long option Description\n"
512"* * a autoq cpu power dependant enabler\n"
513" c chrom chrominance filtring enabled\n"
514" y nochrom chrominance filtring disabled\n"
515"hb hdeblock (2 Threshold) horizontal deblocking filter\n"
b01be121
MN
516" 1. difference factor: default=64, higher -> more deblocking\n"
517" 2. flatness threshold: default=40, lower -> more deblocking\n"
4b001a13
MN
518" the h & v deblocking filters share these\n"
519" so u cant set different thresholds for h / v\n"
520"vb vdeblock (2 Threshold) vertical deblocking filter\n"
4b001a13
MN
521"h1 x1hdeblock Experimental h deblock filter 1\n"
522"v1 x1vdeblock Experimental v deblock filter 1\n"
523"dr dering Deringing filter\n"
524"al autolevels automatic brightness / contrast\n"
525" f fullyrange stretch luminance to (0..255)\n"
526"lb linblenddeint linear blend deinterlacer\n"
527"li linipoldeint linear interpolating deinterlace\n"
528"ci cubicipoldeint cubic interpolating deinterlacer\n"
529"md mediandeint median deinterlacer\n"
9c9e467d 530"fd ffmpegdeint ffmpeg deinterlacer\n"
4b001a13
MN
531"de default hb:a,vb:a,dr:a,al\n"
532"fa fast h1:a,v1:a,dr:a,al\n"
533"tn tmpnoise (3 Thresholds) Temporal Noise Reducer\n"
534" 1. <= 2. <= 3. larger -> stronger filtering\n"
535"fq forceQuant <quantizer> Force quantizer\n"
536;
911879d1 537
c41d972d 538pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1
MN
539{
540 char temp[GET_MODE_BUFFER_SIZE];
541 char *p= temp;
9c9e467d 542 char *filterDelimiters= ",/";
911879d1 543 char *optionDelimiters= ":";
c41d972d 544 struct PPMode *ppMode;
911879d1
MN
545 char *filterToken;
546
c41d972d
MN
547 ppMode= memalign(8, sizeof(PPMode));
548
549 ppMode->lumMode= 0;
550 ppMode->chromMode= 0;
551 ppMode->maxTmpNoise[0]= 700;
552 ppMode->maxTmpNoise[1]= 1500;
553 ppMode->maxTmpNoise[2]= 3000;
554 ppMode->maxAllowedY= 234;
555 ppMode->minAllowedY= 16;
556 ppMode->baseDcDiff= 256/4;
c41d972d
MN
557 ppMode->flatnessThreshold= 56-16;
558 ppMode->maxClippedThreshold= 0.01;
559 ppMode->error=0;
df8d4d0e 560
911879d1
MN
561 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
562
162c9c2e 563 if(verbose>1) printf("pp: %s\n", name);
117e45b0 564
911879d1 565 for(;;){
911879d1 566 char *filterName;
326d40af 567 int q= 1000000; //PP_QUALITY_MAX;
911879d1
MN
568 int chrom=-1;
569 char *option;
570 char *options[OPTIONS_ARRAY_SIZE];
571 int i;
572 int filterNameOk=0;
573 int numOfUnknownOptions=0;
574 int enable=1; //does the user want us to enabled or disabled the filter
575
576 filterToken= strtok(p, filterDelimiters);
577 if(filterToken == NULL) break;
117e45b0 578 p+= strlen(filterToken) + 1; // p points to next filterToken
911879d1 579 filterName= strtok(filterToken, optionDelimiters);
162c9c2e 580 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
911879d1
MN
581
582 if(*filterName == '-')
583 {
584 enable=0;
585 filterName++;
586 }
117e45b0 587
911879d1
MN
588 for(;;){ //for all options
589 option= strtok(NULL, optionDelimiters);
590 if(option == NULL) break;
591
162c9c2e 592 if(verbose>1) printf("pp: option: %s\n", option);
911879d1
MN
593 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
594 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
595 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
596 else
597 {
598 options[numOfUnknownOptions] = option;
599 numOfUnknownOptions++;
911879d1
MN
600 }
601 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
602 }
117e45b0 603 options[numOfUnknownOptions] = NULL;
911879d1
MN
604
605 /* replace stuff from the replace Table */
606 for(i=0; replaceTable[2*i]!=NULL; i++)
607 {
608 if(!strcmp(replaceTable[2*i], filterName))
609 {
610 int newlen= strlen(replaceTable[2*i + 1]);
611 int plen;
612 int spaceLeft;
613
614 if(p==NULL) p= temp, *p=0; //last filter
615 else p--, *p=','; //not last filter
616
617 plen= strlen(p);
8cd91a44 618 spaceLeft= p - temp + plen;
911879d1
MN
619 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
620 {
c41d972d 621 ppMode->error++;
911879d1
MN
622 break;
623 }
624 memmove(p + newlen, p, plen+1);
625 memcpy(p, replaceTable[2*i + 1], newlen);
626 filterNameOk=1;
627 }
628 }
629
630 for(i=0; filters[i].shortName!=NULL; i++)
631 {
117e45b0 632// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
911879d1
MN
633 if( !strcmp(filters[i].longName, filterName)
634 || !strcmp(filters[i].shortName, filterName))
635 {
c41d972d
MN
636 ppMode->lumMode &= ~filters[i].mask;
637 ppMode->chromMode &= ~filters[i].mask;
911879d1
MN
638
639 filterNameOk=1;
640 if(!enable) break; // user wants to disable it
641
642 if(q >= filters[i].minLumQuality)
c41d972d 643 ppMode->lumMode|= filters[i].mask;
911879d1
MN
644 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
645 if(q >= filters[i].minChromQuality)
c41d972d 646 ppMode->chromMode|= filters[i].mask;
911879d1
MN
647
648 if(filters[i].mask == LEVEL_FIX)
649 {
650 int o;
c41d972d
MN
651 ppMode->minAllowedY= 16;
652 ppMode->maxAllowedY= 234;
911879d1 653 for(o=0; options[o]!=NULL; o++)
07f8991b 654 {
911879d1
MN
655 if( !strcmp(options[o],"fullyrange")
656 ||!strcmp(options[o],"f"))
657 {
c41d972d
MN
658 ppMode->minAllowedY= 0;
659 ppMode->maxAllowedY= 255;
911879d1
MN
660 numOfUnknownOptions--;
661 }
07f8991b 662 }
911879d1 663 }
117e45b0
MN
664 else if(filters[i].mask == TEMP_NOISE_FILTER)
665 {
666 int o;
667 int numOfNoises=0;
117e45b0
MN
668
669 for(o=0; options[o]!=NULL; o++)
670 {
671 char *tail;
c41d972d 672 ppMode->maxTmpNoise[numOfNoises]=
117e45b0
MN
673 strtol(options[o], &tail, 0);
674 if(tail!=options[o])
675 {
676 numOfNoises++;
677 numOfUnknownOptions--;
678 if(numOfNoises >= 3) break;
679 }
680 }
681 }
43d52f76
MN
682 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
683 {
684 int o;
685
686 for(o=0; options[o]!=NULL && o<2; o++)
687 {
688 char *tail;
689 int val= strtol(options[o], &tail, 0);
690 if(tail==options[o]) break;
691
692 numOfUnknownOptions--;
c41d972d
MN
693 if(o==0) ppMode->baseDcDiff= val;
694 else ppMode->flatnessThreshold= val;
43d52f76
MN
695 }
696 }
8aaac435
MN
697 else if(filters[i].mask == FORCE_QUANT)
698 {
699 int o;
c41d972d 700 ppMode->forcedQuant= 15;
8aaac435
MN
701
702 for(o=0; options[o]!=NULL && o<1; o++)
703 {
704 char *tail;
705 int val= strtol(options[o], &tail, 0);
706 if(tail==options[o]) break;
707
708 numOfUnknownOptions--;
c41d972d 709 ppMode->forcedQuant= val;
8aaac435
MN
710 }
711 }
911879d1
MN
712 }
713 }
c41d972d
MN
714 if(!filterNameOk) ppMode->error++;
715 ppMode->error += numOfUnknownOptions;
911879d1
MN
716 }
717
c41d972d
MN
718 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
719 if(ppMode->error)
720 {
721 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
722 free(ppMode);
723 return NULL;
724 }
911879d1
MN
725 return ppMode;
726}
727
c41d972d
MN
728void pp_free_mode(pp_mode_t *mode){
729 if(mode) free(mode);
730}
731
88c0bc7e
MN
732static void reallocAlign(void **p, int alignment, int size){
733 if(*p) free(*p);
734 *p= memalign(alignment, size);
735 memset(*p, 0, size);
736}
737
738static void reallocBuffers(PPContext *c, int width, int height, int stride){
ec487e5d
MN
739 int mbWidth = (width+15)>>4;
740 int mbHeight= (height+15)>>4;
88c0bc7e
MN
741 int i;
742
743 c->stride= stride;
9c9e467d 744
88c0bc7e
MN
745 reallocAlign((void **)&c->tempDst, 8, stride*24);
746 reallocAlign((void **)&c->tempSrc, 8, stride*24);
747 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
748 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
9c9e467d
MN
749 for(i=0; i<256; i++)
750 c->yHistogram[i]= width*height/64*15/256;
751
752 for(i=0; i<3; i++)
211c4920 753 {
9c9e467d 754 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
88c0bc7e
MN
755 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
756 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
211c4920 757 }
45b4f285 758
134eb1e5 759 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
88c0bc7e
MN
760 reallocAlign((void **)&c->nonBQPTable, 8, mbWidth*mbHeight*sizeof(QP_STORE_T));
761 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
762}
763
134eb1e5
MN
764static void global_init(){
765 int i;
766 memset(clip_table, 0, 256);
767 for(i=256; i<512; i++)
768 clip_table[i]= i;
769 memset(clip_table+512, 0, 256);
770}
771
88c0bc7e
MN
772pp_context_t *pp_get_context(int width, int height, int cpuCaps){
773 PPContext *c= memalign(32, sizeof(PPContext));
88c0bc7e
MN
774 int stride= (width+15)&(~15); //assumed / will realloc if needed
775
134eb1e5
MN
776 global_init();
777
88c0bc7e
MN
778 memset(c, 0, sizeof(PPContext));
779 c->cpuCaps= cpuCaps;
e9effafd
MN
780 if(cpuCaps&PP_FORMAT){
781 c->hChromaSubSample= cpuCaps&0x3;
782 c->vChromaSubSample= (cpuCaps>>4)&0x3;
783 }else{
784 c->hChromaSubSample= 1;
785 c->vChromaSubSample= 1;
786 }
88c0bc7e
MN
787
788 reallocBuffers(c, width, height, stride);
789
9c9e467d 790 c->frameNum=-1;
45b4f285 791
9c9e467d 792 return c;
45b4f285
MN
793}
794
9cb54f43 795void pp_free_context(void *vc){
9c9e467d
MN
796 PPContext *c = (PPContext*)vc;
797 int i;
798
799 for(i=0; i<3; i++) free(c->tempBlured[i]);
800 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
801
802 free(c->tempBlocks);
803 free(c->yHistogram);
804 free(c->tempDst);
805 free(c->tempSrc);
9c9e467d 806 free(c->deintTemp);
ec487e5d 807 free(c->nonBQPTable);
88c0bc7e
MN
808 free(c->forcedQPTable);
809
810 memset(c, 0, sizeof(PPContext));
811
9c9e467d
MN
812 free(c);
813}
814
9cb54f43 815void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 816 uint8_t * dst[3], int dstStride[3],
ec487e5d 817 int width, int height,
9c9e467d 818 QP_STORE_T *QP_store, int QPStride,
c41d972d 819 pp_mode_t *vm, void *vc, int pict_type)
911879d1 820{
ec487e5d
MN
821 int mbWidth = (width+15)>>4;
822 int mbHeight= (height+15)>>4;
c41d972d 823 PPMode *mode = (PPMode*)vm;
ec487e5d 824 PPContext *c = (PPContext*)vc;
88c0bc7e
MN
825 int minStride= MAX(srcStride[0], dstStride[0]);
826
827 if(c->stride < minStride)
828 reallocBuffers(c, width, height, minStride);
9c9e467d 829
8aaac435 830 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
815cbfe7 831 {
8aaac435 832 int i;
88c0bc7e 833 QP_store= c->forcedQPTable;
9c9e467d 834 QPStride= 0;
8aaac435 835 if(mode->lumMode & FORCE_QUANT)
88c0bc7e 836 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
8aaac435 837 else
88c0bc7e 838 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
815cbfe7 839 }
ec487e5d
MN
840if(0){
841int x,y;
842for(y=0; y<mbHeight; y++){
843 for(x=0; x<mbWidth; x++){
844 printf("%2d ", QP_store[x + y*QPStride]);
845 }
846 printf("\n");
847}
848 printf("\n");
849}
850//printf("pict_type:%d\n", pict_type);
51e19dcc 851
ec487e5d
MN
852 if(pict_type!=3)
853 {
854 int x,y;
855 for(y=0; y<mbHeight; y++){
856 for(x=0; x<mbWidth; x++){
857 int qscale= QP_store[x + y*QPStride];
858 if(qscale&~31)
859 qscale=31;
860 c->nonBQPTable[y*mbWidth + x]= qscale;
861 }
862 }
863 }
815cbfe7 864
df8d4d0e 865 if(verbose>2)
162c9c2e
MN
866 {
867 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
162c9c2e
MN
868 }
869
9c9e467d 870 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
b2a3fcb7 871 width, height, QP_store, QPStride, 0, mode, c);
911879d1 872
e9effafd
MN
873 width = (width )>>c->hChromaSubSample;
874 height = (height)>>c->vChromaSubSample;
911879d1 875
4e1349d4
MN
876 if(mode->chromMode)
877 {
9c9e467d 878 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
b2a3fcb7 879 width, height, QP_store, QPStride, 1, mode, c);
9c9e467d 880 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
b2a3fcb7 881 width, height, QP_store, QPStride, 2, mode, c);
4e1349d4 882 }
9c9e467d 883 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
4e1349d4 884 {
ec487e5d
MN
885 memcpy(dst[1], src[1], srcStride[1]*height);
886 memcpy(dst[2], src[2], srcStride[2]*height);
4e1349d4
MN
887 }
888 else
889 {
890 int y;
ec487e5d 891 for(y=0; y<height; y++)
4e1349d4 892 {
ec487e5d
MN
893 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
894 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
4e1349d4
MN
895 }
896 }
911879d1
MN
897}
898