tv standard selection support for dv1394 and grab (v4l)
[libav.git] / libavcodec / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
9858f773 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3057fa66
A
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
b304569a
MN
19/**
20 * @file postprocess.c
21 * postprocessing.
22 */
23
3057fa66 24/*
3b58b885 25 C MMX MMX2 3DNow
3057fa66
A
26isVertDC Ec Ec
27isVertMinMaxOk Ec Ec
3b58b885 28doVertLowPass E e e
7f16f6e6 29doVertDefFilter Ec Ec e e
3057fa66 30isHorizDC Ec Ec
4e4dcbc5
MN
31isHorizMinMaxOk a E
32doHorizLowPass E e e
7f16f6e6 33doHorizDefFilter Ec Ec e e
2e212618 34deRing E e e*
3b58b885 35Vertical RKAlgo1 E a a
e5c30e06 36Horizontal RKAlgo1 a a
117e45b0
MN
37Vertical X1# a E E
38Horizontal X1# a E E
acced553
MN
39LinIpolDeinterlace e E E*
40CubicIpolDeinterlace a e e*
41LinBlendDeinterlace e E E*
9b1663fc 42MedianDeinterlace# E Ec Ec
be44a4d7 43TempDeNoiser# E e e
d5a1a995 44
117e45b0
MN
45* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
46# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 47E = Exact implementation
acced553 48e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
49a = alternative / approximate impl
50c = checked against the other implementations (-vo md5)
51*/
52
53/*
54TODO:
3057fa66 55reduce the time wasted on the mem transfer
3057fa66 56unroll stuff if instructions depend too much on the prior one
3057fa66 57move YScale thing to the end instead of fixing QP
13e00528 58write a faster and higher quality deblocking filter :)
d5a1a995
MN
59make the mainloop more flexible (variable number of blocks at once
60 (the if/else stuff per block is slowing things down)
9f45d04d 61compare the quality & speed of all filters
9f45d04d 62split this huge file
8405b3fd 63optimize c versions
117e45b0 64try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 65...
13e00528
A
66*/
67
a6be8111 68//Changelog: use the CVS log
3057fa66 69
9858f773 70#include "config.h"
3057fa66
A
71#include <inttypes.h>
72#include <stdio.h>
d5a1a995 73#include <stdlib.h>
911879d1 74#include <string.h>
dda87e9f
PL
75#ifdef HAVE_MALLOC_H
76#include <malloc.h>
77#endif
3057fa66 78//#undef HAVE_MMX2
13e00528 79//#define HAVE_3DNOW
3057fa66 80//#undef HAVE_MMX
cc9b0679 81//#undef ARCH_X86
7f16f6e6 82//#define DEBUG_BRIGHTNESS
bba9b16c 83#ifdef USE_FASTMEMCPY
96163551 84#include "../fastmemcpy.h"
70d4f2da 85#endif
13e00528 86#include "postprocess.h"
c41d972d 87#include "postprocess_internal.h"
bba9b16c
MN
88
89#include "mangle.h" //FIXME should be supressed
3057fa66 90
ca390e72
ZK
91#ifndef HAVE_MEMALIGN
92#define memalign(a,b) malloc(b)
93#endif
94
e939e1c3
A
95#define MIN(a,b) ((a) > (b) ? (b) : (a))
96#define MAX(a,b) ((a) < (b) ? (b) : (a))
97#define ABS(a) ((a) > 0 ? (a) : (-(a)))
98#define SIGN(a) ((a) > 0 ? 1 : -1)
99
911879d1
MN
100#define GET_MODE_BUFFER_SIZE 500
101#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
102#define BLOCK_SIZE 8
103#define TEMP_STRIDE 8
104//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 105
cc9b0679 106#ifdef ARCH_X86
b28daef8
MN
107static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL;
108static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL;
b28daef8
MN
109static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL;
110static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL;
111static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL;
b28daef8 112static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL;
b28daef8 113static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL;
b28daef8 114#endif
3057fa66 115
134eb1e5
MN
116
117static uint8_t clip_table[3*256];
118static uint8_t * const clip_tab= clip_table + 256;
119
df8d4d0e 120static int verbose= 0;
45b4f285 121
df8d4d0e 122static const int deringThreshold= 20;
3057fa66 123
9c9e467d 124
911879d1
MN
125static struct PPFilter filters[]=
126{
127 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
128 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
9c9e467d
MN
129/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
130 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
911879d1
MN
131 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
132 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
133 {"dr", "dering", 1, 5, 6, DERING},
134 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
43d52f76
MN
135 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
136 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
137 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
138 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
9c9e467d 139 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
134eb1e5 140 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
117e45b0 141 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
8aaac435 142 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
911879d1
MN
143 {NULL, NULL,0,0,0,0} //End Marker
144};
145
146static char *replaceTable[]=
147{
117e45b0
MN
148 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
149 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
150 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
151 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
911879d1
MN
152 NULL //End Marker
153};
154
9c9e467d 155#ifdef ARCH_X86
e5c30e06
MN
156static inline void unusedVariableWarningFixer()
157{
9c9e467d 158 if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
e5c30e06 159}
b28daef8 160#endif
e5c30e06 161
3057fa66 162
9c9e467d 163#ifdef ARCH_X86
3057fa66
A
164static inline void prefetchnta(void *p)
165{
166 asm volatile( "prefetchnta (%0)\n\t"
167 : : "r" (p)
168 );
169}
170
171static inline void prefetcht0(void *p)
172{
173 asm volatile( "prefetcht0 (%0)\n\t"
174 : : "r" (p)
175 );
176}
177
178static inline void prefetcht1(void *p)
179{
180 asm volatile( "prefetcht1 (%0)\n\t"
181 : : "r" (p)
182 );
183}
184
185static inline void prefetcht2(void *p)
186{
187 asm volatile( "prefetcht2 (%0)\n\t"
188 : : "r" (p)
189 );
190}
9a722af7 191#endif
3057fa66 192
cc9b0679 193// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 194
cf5ec61d
MN
195/**
196 * Check if the given 8x8 Block is mostly "flat"
197 */
9c9e467d 198static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
cf5ec61d
MN
199{
200 int numEq= 0;
201 int y;
0426af31 202 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
ec487e5d 203 const int dcThreshold= dcOffset*2 + 1;
0426af31 204
cf5ec61d
MN
205 for(y=0; y<BLOCK_SIZE; y++)
206 {
9c9e467d
MN
207 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
208 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
209 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
210 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
211 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
212 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
214 src+= stride;
215 }
216 return numEq > c->ppMode.flatnessThreshold;
217}
218
219/**
220 * Check if the middle 8x8 Block in the given 8x16 block is flat
221 */
222static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
223 int numEq= 0;
224 int y;
0426af31 225 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
ec487e5d 226 const int dcThreshold= dcOffset*2 + 1;
0426af31 227
9c9e467d
MN
228 src+= stride*4; // src points to begin of the 8x8 Block
229 for(y=0; y<BLOCK_SIZE-1; y++)
230 {
231 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
232 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
233 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
cf5ec61d
MN
239 src+= stride;
240 }
9c9e467d 241 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
242}
243
244static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
245{
246 if(abs(src[0] - src[7]) > 2*QP) return 0;
247
248 return 1;
249}
250
251static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
252{
253 int y;
254 for(y=0; y<BLOCK_SIZE; y++)
255 {
256 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
257
258 if(ABS(middleEnergy) < 8*QP)
259 {
260 const int q=(dst[3] - dst[4])/2;
261 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
262 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
263
264 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
265 d= MAX(d, 0);
266
267 d= (5*d + 32) >> 6;
268 d*= SIGN(-middleEnergy);
269
270 if(q>0)
271 {
272 d= d<0 ? 0 : d;
273 d= d>q ? q : d;
274 }
275 else
276 {
277 d= d>0 ? 0 : d;
278 d= d<q ? q : d;
279 }
280
281 dst[3]-= d;
282 dst[4]+= d;
283 }
284 dst+= stride;
285 }
286}
287
288/**
289 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
290 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
291 */
292static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
293{
294
295 int y;
296 for(y=0; y<BLOCK_SIZE; y++)
297 {
298 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
299 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
300
301 int sums[9];
302 sums[0] = first + dst[0];
303 sums[1] = dst[0] + dst[1];
304 sums[2] = dst[1] + dst[2];
305 sums[3] = dst[2] + dst[3];
306 sums[4] = dst[3] + dst[4];
307 sums[5] = dst[4] + dst[5];
308 sums[6] = dst[5] + dst[6];
309 sums[7] = dst[6] + dst[7];
310 sums[8] = dst[7] + last;
311
312 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
313 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
314 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
315 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
316 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
317 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
318 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
319 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
320
321 dst+= stride;
322 }
323}
324
4e4dcbc5 325/**
cc9b0679
MN
326 * Experimental Filter 1 (Horizontal)
327 * will not damage linear gradients
328 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
329 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
330 * MMX2 version does correct clipping C version doesnt
331 * not identical with the vertical one
4e4dcbc5 332 */
cc9b0679
MN
333static inline void horizX1Filter(uint8_t *src, int stride, int QP)
334{
117e45b0 335 int y;
cc9b0679
MN
336 static uint64_t *lut= NULL;
337 if(lut==NULL)
117e45b0 338 {
cc9b0679
MN
339 int i;
340 lut= (uint64_t*)memalign(8, 256*8);
341 for(i=0; i<256; i++)
117e45b0 342 {
cc9b0679 343 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 344/*
cc9b0679
MN
345//Simulate 112242211 9-Tap filter
346 uint64_t a= (v/16) & 0xFF;
347 uint64_t b= (v/8) & 0xFF;
348 uint64_t c= (v/4) & 0xFF;
349 uint64_t d= (3*v/8) & 0xFF;
117e45b0 350*/
cc9b0679
MN
351//Simulate piecewise linear interpolation
352 uint64_t a= (v/16) & 0xFF;
353 uint64_t b= (v*3/16) & 0xFF;
354 uint64_t c= (v*5/16) & 0xFF;
355 uint64_t d= (7*v/16) & 0xFF;
356 uint64_t A= (0x100 - a)&0xFF;
357 uint64_t B= (0x100 - b)&0xFF;
358 uint64_t C= (0x100 - c)&0xFF;
359 uint64_t D= (0x100 - c)&0xFF;
360
361 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
362 (D<<24) | (C<<16) | (B<<8) | (A);
363 //lut[i] = (v<<32) | (v<<24);
117e45b0
MN
364 }
365 }
cc9b0679
MN
366
367 for(y=0; y<BLOCK_SIZE; y++)
117e45b0 368 {
cc9b0679
MN
369 int a= src[1] - src[2];
370 int b= src[3] - src[4];
371 int c= src[5] - src[6];
372
373 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
374
375 if(d < QP)
117e45b0 376 {
cc9b0679
MN
377 int v = d * SIGN(-b);
378
379 src[1] +=v/8;
380 src[2] +=v/4;
381 src[3] +=3*v/8;
382 src[4] -=3*v/8;
383 src[5] -=v/4;
384 src[6] -=v/8;
385
117e45b0 386 }
cc9b0679 387 src+=stride;
117e45b0 388 }
cc9b0679
MN
389}
390
391
e89952aa 392//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 393//Plain C versions
e89952aa
MN
394#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
395#define COMPILE_C
396#endif
397
9c9e467d 398#ifdef ARCH_X86
e89952aa
MN
399
400#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
401#define COMPILE_MMX
402#endif
403
404#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
405#define COMPILE_MMX2
406#endif
407
408#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
409#define COMPILE_3DNOW
410#endif
9c9e467d 411#endif //ARCH_X86
e89952aa
MN
412
413#undef HAVE_MMX
414#undef HAVE_MMX2
415#undef HAVE_3DNOW
416#undef ARCH_X86
417
418#ifdef COMPILE_C
cc9b0679
MN
419#undef HAVE_MMX
420#undef HAVE_MMX2
421#undef HAVE_3DNOW
422#undef ARCH_X86
423#define RENAME(a) a ## _C
424#include "postprocess_template.c"
e89952aa 425#endif
cc9b0679
MN
426
427//MMX versions
e89952aa 428#ifdef COMPILE_MMX
cc9b0679
MN
429#undef RENAME
430#define HAVE_MMX
431#undef HAVE_MMX2
432#undef HAVE_3DNOW
433#define ARCH_X86
434#define RENAME(a) a ## _MMX
435#include "postprocess_template.c"
e89952aa 436#endif
cc9b0679
MN
437
438//MMX2 versions
e89952aa 439#ifdef COMPILE_MMX2
cc9b0679
MN
440#undef RENAME
441#define HAVE_MMX
442#define HAVE_MMX2
443#undef HAVE_3DNOW
444#define ARCH_X86
445#define RENAME(a) a ## _MMX2
446#include "postprocess_template.c"
e89952aa 447#endif
cc9b0679
MN
448
449//3DNOW versions
e89952aa 450#ifdef COMPILE_3DNOW
cc9b0679
MN
451#undef RENAME
452#define HAVE_MMX
453#undef HAVE_MMX2
454#define HAVE_3DNOW
455#define ARCH_X86
456#define RENAME(a) a ## _3DNow
457#include "postprocess_template.c"
e89952aa 458#endif
cc9b0679
MN
459
460// minor note: the HAVE_xyz is messed up after that line so dont use it
461
462static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
c41d972d 463 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 464{
9c9e467d 465 PPContext *c= (PPContext *)vc;
c41d972d 466 PPMode *ppMode= (PPMode *)vm;
9c9e467d
MN
467 c->ppMode= *ppMode; //FIXME
468
cc9b0679
MN
469 // useing ifs here as they are faster than function pointers allthough the
470 // difference wouldnt be messureable here but its much better because
471 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 472#ifdef RUNTIME_CPUDETECT
9c9e467d 473#ifdef ARCH_X86
cc9b0679 474 // ordered per speed fasterst first
fa6ea14e 475 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
9c9e467d 476 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 477 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
9c9e467d 478 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 479 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
9c9e467d 480 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 481 else
9c9e467d 482 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 483#else
9c9e467d 484 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 485#endif
e89952aa
MN
486#else //RUNTIME_CPUDETECT
487#ifdef HAVE_MMX2
9c9e467d 488 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 489#elif defined (HAVE_3DNOW)
9c9e467d 490 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 491#elif defined (HAVE_MMX)
9c9e467d 492 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 493#else
9c9e467d 494 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
495#endif
496#endif //!RUNTIME_CPUDETECT
117e45b0
MN
497}
498
cc9b0679
MN
499//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
500// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 501
911879d1 502/* -pp Command line Help
911879d1 503*/
4407a3c4 504char *pp_help=
b01be121 505"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
4b001a13 506"long form example:\n"
b01be121 507"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
4b001a13 508"short form example:\n"
b01be121 509"vb:a/hb:a/lb de,-vb\n"
4b001a13 510"more examples:\n"
1d9324fd 511"tn:64:128:256\n"
4b001a13
MN
512"Filters Options\n"
513"short long name short long option Description\n"
514"* * a autoq cpu power dependant enabler\n"
515" c chrom chrominance filtring enabled\n"
516" y nochrom chrominance filtring disabled\n"
517"hb hdeblock (2 Threshold) horizontal deblocking filter\n"
68bf295e
MN
518" 1. difference factor: default=32, higher -> more deblocking\n"
519" 2. flatness threshold: default=39, lower -> more deblocking\n"
4b001a13
MN
520" the h & v deblocking filters share these\n"
521" so u cant set different thresholds for h / v\n"
522"vb vdeblock (2 Threshold) vertical deblocking filter\n"
4b001a13
MN
523"h1 x1hdeblock Experimental h deblock filter 1\n"
524"v1 x1vdeblock Experimental v deblock filter 1\n"
525"dr dering Deringing filter\n"
526"al autolevels automatic brightness / contrast\n"
527" f fullyrange stretch luminance to (0..255)\n"
528"lb linblenddeint linear blend deinterlacer\n"
529"li linipoldeint linear interpolating deinterlace\n"
530"ci cubicipoldeint cubic interpolating deinterlacer\n"
531"md mediandeint median deinterlacer\n"
9c9e467d 532"fd ffmpegdeint ffmpeg deinterlacer\n"
4b001a13
MN
533"de default hb:a,vb:a,dr:a,al\n"
534"fa fast h1:a,v1:a,dr:a,al\n"
535"tn tmpnoise (3 Thresholds) Temporal Noise Reducer\n"
536" 1. <= 2. <= 3. larger -> stronger filtering\n"
537"fq forceQuant <quantizer> Force quantizer\n"
538;
911879d1 539
c41d972d 540pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1
MN
541{
542 char temp[GET_MODE_BUFFER_SIZE];
543 char *p= temp;
9c9e467d 544 char *filterDelimiters= ",/";
911879d1 545 char *optionDelimiters= ":";
c41d972d 546 struct PPMode *ppMode;
911879d1
MN
547 char *filterToken;
548
c41d972d
MN
549 ppMode= memalign(8, sizeof(PPMode));
550
551 ppMode->lumMode= 0;
552 ppMode->chromMode= 0;
553 ppMode->maxTmpNoise[0]= 700;
554 ppMode->maxTmpNoise[1]= 1500;
555 ppMode->maxTmpNoise[2]= 3000;
556 ppMode->maxAllowedY= 234;
557 ppMode->minAllowedY= 16;
68bf295e
MN
558 ppMode->baseDcDiff= 256/8;
559 ppMode->flatnessThreshold= 56-16-1;
c41d972d
MN
560 ppMode->maxClippedThreshold= 0.01;
561 ppMode->error=0;
df8d4d0e 562
911879d1
MN
563 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
564
162c9c2e 565 if(verbose>1) printf("pp: %s\n", name);
117e45b0 566
911879d1 567 for(;;){
911879d1 568 char *filterName;
326d40af 569 int q= 1000000; //PP_QUALITY_MAX;
911879d1
MN
570 int chrom=-1;
571 char *option;
572 char *options[OPTIONS_ARRAY_SIZE];
573 int i;
574 int filterNameOk=0;
575 int numOfUnknownOptions=0;
576 int enable=1; //does the user want us to enabled or disabled the filter
577
578 filterToken= strtok(p, filterDelimiters);
579 if(filterToken == NULL) break;
117e45b0 580 p+= strlen(filterToken) + 1; // p points to next filterToken
911879d1 581 filterName= strtok(filterToken, optionDelimiters);
162c9c2e 582 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
911879d1
MN
583
584 if(*filterName == '-')
585 {
586 enable=0;
587 filterName++;
588 }
117e45b0 589
911879d1
MN
590 for(;;){ //for all options
591 option= strtok(NULL, optionDelimiters);
592 if(option == NULL) break;
593
162c9c2e 594 if(verbose>1) printf("pp: option: %s\n", option);
911879d1
MN
595 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
596 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
597 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
598 else
599 {
600 options[numOfUnknownOptions] = option;
601 numOfUnknownOptions++;
911879d1
MN
602 }
603 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
604 }
117e45b0 605 options[numOfUnknownOptions] = NULL;
911879d1
MN
606
607 /* replace stuff from the replace Table */
608 for(i=0; replaceTable[2*i]!=NULL; i++)
609 {
610 if(!strcmp(replaceTable[2*i], filterName))
611 {
612 int newlen= strlen(replaceTable[2*i + 1]);
613 int plen;
614 int spaceLeft;
615
616 if(p==NULL) p= temp, *p=0; //last filter
617 else p--, *p=','; //not last filter
618
619 plen= strlen(p);
8cd91a44 620 spaceLeft= p - temp + plen;
911879d1
MN
621 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
622 {
c41d972d 623 ppMode->error++;
911879d1
MN
624 break;
625 }
626 memmove(p + newlen, p, plen+1);
627 memcpy(p, replaceTable[2*i + 1], newlen);
628 filterNameOk=1;
629 }
630 }
631
632 for(i=0; filters[i].shortName!=NULL; i++)
633 {
117e45b0 634// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
911879d1
MN
635 if( !strcmp(filters[i].longName, filterName)
636 || !strcmp(filters[i].shortName, filterName))
637 {
c41d972d
MN
638 ppMode->lumMode &= ~filters[i].mask;
639 ppMode->chromMode &= ~filters[i].mask;
911879d1
MN
640
641 filterNameOk=1;
642 if(!enable) break; // user wants to disable it
643
644 if(q >= filters[i].minLumQuality)
c41d972d 645 ppMode->lumMode|= filters[i].mask;
911879d1
MN
646 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
647 if(q >= filters[i].minChromQuality)
c41d972d 648 ppMode->chromMode|= filters[i].mask;
911879d1
MN
649
650 if(filters[i].mask == LEVEL_FIX)
651 {
652 int o;
c41d972d
MN
653 ppMode->minAllowedY= 16;
654 ppMode->maxAllowedY= 234;
911879d1 655 for(o=0; options[o]!=NULL; o++)
07f8991b 656 {
911879d1
MN
657 if( !strcmp(options[o],"fullyrange")
658 ||!strcmp(options[o],"f"))
659 {
c41d972d
MN
660 ppMode->minAllowedY= 0;
661 ppMode->maxAllowedY= 255;
911879d1
MN
662 numOfUnknownOptions--;
663 }
07f8991b 664 }
911879d1 665 }
117e45b0
MN
666 else if(filters[i].mask == TEMP_NOISE_FILTER)
667 {
668 int o;
669 int numOfNoises=0;
117e45b0
MN
670
671 for(o=0; options[o]!=NULL; o++)
672 {
673 char *tail;
c41d972d 674 ppMode->maxTmpNoise[numOfNoises]=
117e45b0
MN
675 strtol(options[o], &tail, 0);
676 if(tail!=options[o])
677 {
678 numOfNoises++;
679 numOfUnknownOptions--;
680 if(numOfNoises >= 3) break;
681 }
682 }
683 }
43d52f76
MN
684 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
685 {
686 int o;
687
688 for(o=0; options[o]!=NULL && o<2; o++)
689 {
690 char *tail;
691 int val= strtol(options[o], &tail, 0);
692 if(tail==options[o]) break;
693
694 numOfUnknownOptions--;
c41d972d
MN
695 if(o==0) ppMode->baseDcDiff= val;
696 else ppMode->flatnessThreshold= val;
43d52f76
MN
697 }
698 }
8aaac435
MN
699 else if(filters[i].mask == FORCE_QUANT)
700 {
701 int o;
c41d972d 702 ppMode->forcedQuant= 15;
8aaac435
MN
703
704 for(o=0; options[o]!=NULL && o<1; o++)
705 {
706 char *tail;
707 int val= strtol(options[o], &tail, 0);
708 if(tail==options[o]) break;
709
710 numOfUnknownOptions--;
c41d972d 711 ppMode->forcedQuant= val;
8aaac435
MN
712 }
713 }
911879d1
MN
714 }
715 }
c41d972d
MN
716 if(!filterNameOk) ppMode->error++;
717 ppMode->error += numOfUnknownOptions;
911879d1
MN
718 }
719
c41d972d
MN
720 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
721 if(ppMode->error)
722 {
723 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
724 free(ppMode);
725 return NULL;
726 }
911879d1
MN
727 return ppMode;
728}
729
c41d972d
MN
730void pp_free_mode(pp_mode_t *mode){
731 if(mode) free(mode);
732}
733
88c0bc7e
MN
734static void reallocAlign(void **p, int alignment, int size){
735 if(*p) free(*p);
736 *p= memalign(alignment, size);
737 memset(*p, 0, size);
738}
739
0426af31 740static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
ec487e5d
MN
741 int mbWidth = (width+15)>>4;
742 int mbHeight= (height+15)>>4;
88c0bc7e
MN
743 int i;
744
745 c->stride= stride;
0426af31 746 c->qpStride= qpStride;
9c9e467d 747
88c0bc7e
MN
748 reallocAlign((void **)&c->tempDst, 8, stride*24);
749 reallocAlign((void **)&c->tempSrc, 8, stride*24);
750 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
751 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
9c9e467d
MN
752 for(i=0; i<256; i++)
753 c->yHistogram[i]= width*height/64*15/256;
754
755 for(i=0; i<3; i++)
211c4920 756 {
9c9e467d 757 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
88c0bc7e
MN
758 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
759 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
211c4920 760 }
45b4f285 761
134eb1e5 762 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
0426af31
MN
763 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
764 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
88c0bc7e
MN
765 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
766}
767
4cfbf61b 768static void global_init(void){
134eb1e5
MN
769 int i;
770 memset(clip_table, 0, 256);
771 for(i=256; i<512; i++)
772 clip_table[i]= i;
773 memset(clip_table+512, 0, 256);
774}
775
88c0bc7e
MN
776pp_context_t *pp_get_context(int width, int height, int cpuCaps){
777 PPContext *c= memalign(32, sizeof(PPContext));
88c0bc7e 778 int stride= (width+15)&(~15); //assumed / will realloc if needed
0426af31 779 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
88c0bc7e 780
134eb1e5
MN
781 global_init();
782
88c0bc7e
MN
783 memset(c, 0, sizeof(PPContext));
784 c->cpuCaps= cpuCaps;
e9effafd
MN
785 if(cpuCaps&PP_FORMAT){
786 c->hChromaSubSample= cpuCaps&0x3;
787 c->vChromaSubSample= (cpuCaps>>4)&0x3;
788 }else{
789 c->hChromaSubSample= 1;
790 c->vChromaSubSample= 1;
791 }
88c0bc7e 792
0426af31 793 reallocBuffers(c, width, height, stride, qpStride);
88c0bc7e 794
9c9e467d 795 c->frameNum=-1;
45b4f285 796
9c9e467d 797 return c;
45b4f285
MN
798}
799
9cb54f43 800void pp_free_context(void *vc){
9c9e467d
MN
801 PPContext *c = (PPContext*)vc;
802 int i;
803
804 for(i=0; i<3; i++) free(c->tempBlured[i]);
805 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
806
807 free(c->tempBlocks);
808 free(c->yHistogram);
809 free(c->tempDst);
810 free(c->tempSrc);
9c9e467d 811 free(c->deintTemp);
0426af31 812 free(c->stdQPTable);
ec487e5d 813 free(c->nonBQPTable);
88c0bc7e
MN
814 free(c->forcedQPTable);
815
816 memset(c, 0, sizeof(PPContext));
817
9c9e467d
MN
818 free(c);
819}
820
9cb54f43 821void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 822 uint8_t * dst[3], int dstStride[3],
ec487e5d 823 int width, int height,
9c9e467d 824 QP_STORE_T *QP_store, int QPStride,
c41d972d 825 pp_mode_t *vm, void *vc, int pict_type)
911879d1 826{
ec487e5d
MN
827 int mbWidth = (width+15)>>4;
828 int mbHeight= (height+15)>>4;
c41d972d 829 PPMode *mode = (PPMode*)vm;
ec487e5d 830 PPContext *c = (PPContext*)vc;
88c0bc7e 831 int minStride= MAX(srcStride[0], dstStride[0]);
0426af31
MN
832
833 if(c->stride < minStride || c->qpStride < QPStride)
834 reallocBuffers(c, width, height,
835 MAX(minStride, c->stride),
836 MAX(c->qpStride, QPStride));
9c9e467d 837
8aaac435 838 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
815cbfe7 839 {
8aaac435 840 int i;
88c0bc7e 841 QP_store= c->forcedQPTable;
9c9e467d 842 QPStride= 0;
8aaac435 843 if(mode->lumMode & FORCE_QUANT)
88c0bc7e 844 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
8aaac435 845 else
88c0bc7e 846 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
815cbfe7 847 }
0426af31
MN
848//printf("pict_type:%d\n", pict_type);
849
850 if(pict_type & PP_PICT_TYPE_QP2){
851 int i;
852 const int count= mbHeight * QPStride;
853 for(i=0; i<(count>>2); i++){
854 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
855 }
856 for(i<<=2; i<count; i++){
857 c->stdQPTable[i] = QP_store[i]>>1;
858 }
859 QP_store= c->stdQPTable;
860 }
861
ec487e5d
MN
862if(0){
863int x,y;
864for(y=0; y<mbHeight; y++){
865 for(x=0; x<mbWidth; x++){
866 printf("%2d ", QP_store[x + y*QPStride]);
867 }
868 printf("\n");
869}
870 printf("\n");
871}
51e19dcc 872
0426af31 873 if((pict_type&7)!=3)
ec487e5d 874 {
0426af31
MN
875 int i;
876 const int count= mbHeight * QPStride;
877 for(i=0; i<(count>>2); i++){
878 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x1F1F1F1F;
879 }
880 for(i<<=2; i<count; i++){
881 c->nonBQPTable[i] = QP_store[i] & 0x1F;
ec487e5d
MN
882 }
883 }
815cbfe7 884
df8d4d0e 885 if(verbose>2)
162c9c2e
MN
886 {
887 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
162c9c2e
MN
888 }
889
9c9e467d 890 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
b2a3fcb7 891 width, height, QP_store, QPStride, 0, mode, c);
911879d1 892
e9effafd
MN
893 width = (width )>>c->hChromaSubSample;
894 height = (height)>>c->vChromaSubSample;
911879d1 895
4e1349d4
MN
896 if(mode->chromMode)
897 {
9c9e467d 898 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
b2a3fcb7 899 width, height, QP_store, QPStride, 1, mode, c);
9c9e467d 900 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
b2a3fcb7 901 width, height, QP_store, QPStride, 2, mode, c);
4e1349d4 902 }
9c9e467d 903 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
4e1349d4 904 {
ec487e5d
MN
905 memcpy(dst[1], src[1], srcStride[1]*height);
906 memcpy(dst[2], src[2], srcStride[2]*height);
4e1349d4
MN
907 }
908 else
909 {
910 int y;
ec487e5d 911 for(y=0; y<height; y++)
4e1349d4 912 {
ec487e5d
MN
913 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
914 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
4e1349d4
MN
915 }
916 }
911879d1
MN
917}
918