FOURCC removed, using ff_get_fourcc instead (should be big-endian safe), workarounded...
[libav.git] / postproc / postprocess.c
CommitLineData
3057fa66 1/*
8aaac435 2 Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at)
3057fa66
A
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
19/*
3b58b885 20 C MMX MMX2 3DNow
3057fa66
A
21isVertDC Ec Ec
22isVertMinMaxOk Ec Ec
3b58b885 23doVertLowPass E e e
7f16f6e6 24doVertDefFilter Ec Ec e e
3057fa66 25isHorizDC Ec Ec
4e4dcbc5
MN
26isHorizMinMaxOk a E
27doHorizLowPass E e e
7f16f6e6 28doHorizDefFilter Ec Ec e e
2e212618 29deRing E e e*
3b58b885 30Vertical RKAlgo1 E a a
e5c30e06 31Horizontal RKAlgo1 a a
117e45b0
MN
32Vertical X1# a E E
33Horizontal X1# a E E
acced553
MN
34LinIpolDeinterlace e E E*
35CubicIpolDeinterlace a e e*
36LinBlendDeinterlace e E E*
117e45b0 37MedianDeinterlace# Ec Ec
be44a4d7 38TempDeNoiser# E e e
d5a1a995 39
117e45b0
MN
40* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 42E = Exact implementation
acced553 43e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
44a = alternative / approximate impl
45c = checked against the other implementations (-vo md5)
46*/
47
48/*
49TODO:
07f8991b 50remove global/static vars
3057fa66 51reduce the time wasted on the mem transfer
13e00528 52implement everything in C at least (done at the moment but ...)
3057fa66
A
53unroll stuff if instructions depend too much on the prior one
54we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4?
55move YScale thing to the end instead of fixing QP
13e00528 56write a faster and higher quality deblocking filter :)
d5a1a995
MN
57make the mainloop more flexible (variable number of blocks at once
58 (the if/else stuff per block is slowing things down)
9f45d04d 59compare the quality & speed of all filters
9f45d04d 60split this huge file
e5c30e06 61border remover
8405b3fd 62optimize c versions
117e45b0 63try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
be44a4d7 64smart blur
cd38e322 65commandline option for the deblock / dering thresholds
cc9b0679
MN
66put fastmemcpy back
67dont use #ifdef ARCH_X86 for the asm stuff ... cross compilers? (note cpudetect uses ARCH_X86)
3057fa66 68...
13e00528
A
69*/
70
a6be8111 71//Changelog: use the CVS log
3057fa66 72
6c426cff 73#include "../config.h"
3057fa66
A
74#include <inttypes.h>
75#include <stdio.h>
d5a1a995 76#include <stdlib.h>
911879d1 77#include <string.h>
cbf5fa71 78#include <errno.h>
dda87e9f
PL
79#ifdef HAVE_MALLOC_H
80#include <malloc.h>
81#endif
3057fa66 82//#undef HAVE_MMX2
13e00528 83//#define HAVE_3DNOW
3057fa66 84//#undef HAVE_MMX
cc9b0679 85//#undef ARCH_X86
7f16f6e6 86//#define DEBUG_BRIGHTNESS
cc9b0679 87//#include "../libvo/fastmemcpy.h"
13e00528 88#include "postprocess.h"
cc9b0679 89#include "../cpudetect.h"
9b464428 90#include "../mangle.h"
3057fa66 91
e939e1c3
A
92#define MIN(a,b) ((a) > (b) ? (b) : (a))
93#define MAX(a,b) ((a) < (b) ? (b) : (a))
94#define ABS(a) ((a) > 0 ? (a) : (-(a)))
95#define SIGN(a) ((a) > 0 ? 1 : -1)
96
911879d1
MN
97#define GET_MODE_BUFFER_SIZE 500
98#define OPTIONS_ARRAY_SIZE 10
99
cc9b0679
MN
100#ifdef ARCH_X86
101#define CAN_COMPILE_X86_ASM
102#endif
103
104#ifdef CAN_COMPILE_X86_ASM
3fe8e8f0
MN
105static volatile uint64_t __attribute__((aligned(8))) packedYOffset= 0x0000000000000000LL;
106static volatile uint64_t __attribute__((aligned(8))) packedYScale= 0x0100010001000100LL;
b28daef8
MN
107static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL;
108static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL;
109static uint64_t __attribute__((aligned(8))) w1400= 0x1400140014001400LL;
110static uint64_t __attribute__((aligned(8))) bm00000001= 0x00000000000000FFLL;
111static uint64_t __attribute__((aligned(8))) bm00010000= 0x000000FF00000000LL;
112static uint64_t __attribute__((aligned(8))) bm00001000= 0x00000000FF000000LL;
113static uint64_t __attribute__((aligned(8))) bm10000000= 0xFF00000000000000LL;
114static uint64_t __attribute__((aligned(8))) bm10000001= 0xFF000000000000FFLL;
115static uint64_t __attribute__((aligned(8))) bm11000011= 0xFFFF00000000FFFFLL;
116static uint64_t __attribute__((aligned(8))) bm00000011= 0x000000000000FFFFLL;
117static uint64_t __attribute__((aligned(8))) bm11111110= 0xFFFFFFFFFFFFFF00LL;
118static uint64_t __attribute__((aligned(8))) bm11000000= 0xFFFF000000000000LL;
119static uint64_t __attribute__((aligned(8))) bm00011000= 0x000000FFFF000000LL;
120static uint64_t __attribute__((aligned(8))) bm00110011= 0x0000FFFF0000FFFFLL;
121static uint64_t __attribute__((aligned(8))) bm11001100= 0xFFFF0000FFFF0000LL;
122static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL;
123static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL;
124static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL;
125static uint64_t __attribute__((aligned(8))) b0F= 0x0F0F0F0F0F0F0F0FLL;
126static uint64_t __attribute__((aligned(8))) b04= 0x0404040404040404LL;
127static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL;
128static uint64_t __attribute__((aligned(8))) bFF= 0xFFFFFFFFFFFFFFFFLL;
129static uint64_t __attribute__((aligned(8))) b20= 0x2020202020202020LL;
130static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL;
43d52f76
MN
131static uint64_t __attribute__((aligned(8))) mmxDCOffset= 0x7E7E7E7E7E7E7E7ELL;
132static uint64_t __attribute__((aligned(8))) mmxDCThreshold= 0x7C7C7C7C7C7C7C7CLL;
b28daef8
MN
133static uint64_t __attribute__((aligned(8))) b3F= 0x3F3F3F3F3F3F3F3FLL;
134static uint64_t __attribute__((aligned(8))) temp0=0;
135static uint64_t __attribute__((aligned(8))) temp1=0;
136static uint64_t __attribute__((aligned(8))) temp2=0;
137static uint64_t __attribute__((aligned(8))) temp3=0;
138static uint64_t __attribute__((aligned(8))) temp4=0;
139static uint64_t __attribute__((aligned(8))) temp5=0;
140static uint64_t __attribute__((aligned(8))) pQPb=0;
141static uint64_t __attribute__((aligned(8))) pQPb2=0;
142static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code
a9c77978 143static uint32_t __attribute__((aligned(4))) maxTmpNoise[4];
b28daef8 144#else
3057fa66
A
145static uint64_t packedYOffset= 0x0000000000000000LL;
146static uint64_t packedYScale= 0x0100010001000100LL;
b28daef8 147#endif
3057fa66 148
45b4f285
MN
149extern int divx_quality;
150int newPPFlag=0; //is set if -npp is used
151struct PPMode gPPMode[GET_PP_QUALITY_MAX+1];
cbf5fa71 152static int firstTime = 0, firstTime2 = 0;
45b4f285
MN
153
154extern int verbose;
155
3057fa66
A
156int hFlatnessThreshold= 56 - 16;
157int vFlatnessThreshold= 56 - 16;
cd38e322 158int deringThreshold= 20;
3057fa66 159
07f8991b
MN
160static int dcOffset;
161static int dcThreshold;
43d52f76 162
3057fa66
A
163//amount of "black" u r willing to loose to get a brightness corrected picture
164double maxClippedThreshold= 0.01;
165
911879d1
MN
166static struct PPFilter filters[]=
167{
168 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
169 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
6ec6e3c9
MN
170 {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
171 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},
911879d1
MN
172 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
173 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
174 {"dr", "dering", 1, 5, 6, DERING},
175 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
43d52f76
MN
176 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
177 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
178 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
179 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
117e45b0 180 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
8aaac435 181 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
911879d1
MN
182 {NULL, NULL,0,0,0,0} //End Marker
183};
184
185static char *replaceTable[]=
186{
117e45b0
MN
187 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
188 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
189 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
190 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
911879d1
MN
191 NULL //End Marker
192};
193
cc9b0679 194#ifdef CAN_COMPILE_X86_ASM
e5c30e06
MN
195static inline void unusedVariableWarningFixer()
196{
197if(
198 packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000
199 + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110
200 + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F
43d52f76 201 + bFF + b20 + b04+ b08 + pQPb2 + b80 + mmxDCOffset + mmxDCThreshold + b3F + temp0 + temp1 + temp2 + temp3 + temp4
e5c30e06
MN
202 + temp5 + pQPb== 0) b00=0;
203}
b28daef8 204#endif
e5c30e06 205
a6be8111 206#ifdef TIMING
3057fa66
A
207static inline long long rdtsc()
208{
209 long long l;
210 asm volatile( "rdtsc\n\t"
211 : "=A" (l)
212 );
213// printf("%d\n", int(l/1000));
214 return l;
215}
9a722af7 216#endif
3057fa66 217
cc9b0679 218#ifdef CAN_COMPILE_X86_ASM
3057fa66
A
219static inline void prefetchnta(void *p)
220{
221 asm volatile( "prefetchnta (%0)\n\t"
222 : : "r" (p)
223 );
224}
225
226static inline void prefetcht0(void *p)
227{
228 asm volatile( "prefetcht0 (%0)\n\t"
229 : : "r" (p)
230 );
231}
232
233static inline void prefetcht1(void *p)
234{
235 asm volatile( "prefetcht1 (%0)\n\t"
236 : : "r" (p)
237 );
238}
239
240static inline void prefetcht2(void *p)
241{
242 asm volatile( "prefetcht2 (%0)\n\t"
243 : : "r" (p)
244 );
245}
9a722af7 246#endif
3057fa66 247
cc9b0679 248// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 249
cf5ec61d
MN
250/**
251 * Check if the given 8x8 Block is mostly "flat"
252 */
253static inline int isHorizDC(uint8_t src[], int stride)
254{
255 int numEq= 0;
256 int y;
257 for(y=0; y<BLOCK_SIZE; y++)
258 {
43d52f76
MN
259 if(((src[0] - src[1] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
260 if(((src[1] - src[2] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
261 if(((src[2] - src[3] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
262 if(((src[3] - src[4] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
263 if(((src[4] - src[5] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
264 if(((src[5] - src[6] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
265 if(((src[6] - src[7] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
cf5ec61d
MN
266 src+= stride;
267 }
268 return numEq > hFlatnessThreshold;
269}
270
271static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
272{
273 if(abs(src[0] - src[7]) > 2*QP) return 0;
274
275 return 1;
276}
277
278static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
279{
280 int y;
281 for(y=0; y<BLOCK_SIZE; y++)
282 {
283 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
284
285 if(ABS(middleEnergy) < 8*QP)
286 {
287 const int q=(dst[3] - dst[4])/2;
288 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
289 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
290
291 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
292 d= MAX(d, 0);
293
294 d= (5*d + 32) >> 6;
295 d*= SIGN(-middleEnergy);
296
297 if(q>0)
298 {
299 d= d<0 ? 0 : d;
300 d= d>q ? q : d;
301 }
302 else
303 {
304 d= d>0 ? 0 : d;
305 d= d<q ? q : d;
306 }
307
308 dst[3]-= d;
309 dst[4]+= d;
310 }
311 dst+= stride;
312 }
313}
314
315/**
316 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
317 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
318 */
319static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
320{
321
322 int y;
323 for(y=0; y<BLOCK_SIZE; y++)
324 {
325 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
326 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
327
328 int sums[9];
329 sums[0] = first + dst[0];
330 sums[1] = dst[0] + dst[1];
331 sums[2] = dst[1] + dst[2];
332 sums[3] = dst[2] + dst[3];
333 sums[4] = dst[3] + dst[4];
334 sums[5] = dst[4] + dst[5];
335 sums[6] = dst[5] + dst[6];
336 sums[7] = dst[6] + dst[7];
337 sums[8] = dst[7] + last;
338
339 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
340 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
341 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
342 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
343 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
344 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
345 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
346 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
347
348 dst+= stride;
349 }
350}
351
4e4dcbc5 352/**
cc9b0679
MN
353 * Experimental Filter 1 (Horizontal)
354 * will not damage linear gradients
355 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
356 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
357 * MMX2 version does correct clipping C version doesnt
358 * not identical with the vertical one
4e4dcbc5 359 */
cc9b0679
MN
360static inline void horizX1Filter(uint8_t *src, int stride, int QP)
361{
117e45b0 362 int y;
cc9b0679
MN
363 static uint64_t *lut= NULL;
364 if(lut==NULL)
117e45b0 365 {
cc9b0679
MN
366 int i;
367 lut= (uint64_t*)memalign(8, 256*8);
368 for(i=0; i<256; i++)
117e45b0 369 {
cc9b0679 370 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 371/*
cc9b0679
MN
372//Simulate 112242211 9-Tap filter
373 uint64_t a= (v/16) & 0xFF;
374 uint64_t b= (v/8) & 0xFF;
375 uint64_t c= (v/4) & 0xFF;
376 uint64_t d= (3*v/8) & 0xFF;
117e45b0 377*/
cc9b0679
MN
378//Simulate piecewise linear interpolation
379 uint64_t a= (v/16) & 0xFF;
380 uint64_t b= (v*3/16) & 0xFF;
381 uint64_t c= (v*5/16) & 0xFF;
382 uint64_t d= (7*v/16) & 0xFF;
383 uint64_t A= (0x100 - a)&0xFF;
384 uint64_t B= (0x100 - b)&0xFF;
385 uint64_t C= (0x100 - c)&0xFF;
386 uint64_t D= (0x100 - c)&0xFF;
387
388 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
389 (D<<24) | (C<<16) | (B<<8) | (A);
390 //lut[i] = (v<<32) | (v<<24);
117e45b0
MN
391 }
392 }
cc9b0679
MN
393
394 for(y=0; y<BLOCK_SIZE; y++)
117e45b0 395 {
cc9b0679
MN
396 int a= src[1] - src[2];
397 int b= src[3] - src[4];
398 int c= src[5] - src[6];
399
400 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
401
402 if(d < QP)
117e45b0 403 {
cc9b0679
MN
404 int v = d * SIGN(-b);
405
406 src[1] +=v/8;
407 src[2] +=v/4;
408 src[3] +=3*v/8;
409 src[4] -=3*v/8;
410 src[5] -=v/4;
411 src[6] -=v/8;
412
117e45b0 413 }
cc9b0679 414 src+=stride;
117e45b0 415 }
cc9b0679
MN
416}
417
418
e89952aa 419//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 420//Plain C versions
e89952aa
MN
421#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
422#define COMPILE_C
423#endif
424
425#ifdef CAN_COMPILE_X86_ASM
426
427#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
428#define COMPILE_MMX
429#endif
430
431#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
432#define COMPILE_MMX2
433#endif
434
435#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
436#define COMPILE_3DNOW
437#endif
438#endif //CAN_COMPILE_X86_ASM
439
440#undef HAVE_MMX
441#undef HAVE_MMX2
442#undef HAVE_3DNOW
443#undef ARCH_X86
444
445#ifdef COMPILE_C
cc9b0679
MN
446#undef HAVE_MMX
447#undef HAVE_MMX2
448#undef HAVE_3DNOW
449#undef ARCH_X86
450#define RENAME(a) a ## _C
451#include "postprocess_template.c"
e89952aa 452#endif
cc9b0679
MN
453
454//MMX versions
e89952aa 455#ifdef COMPILE_MMX
cc9b0679
MN
456#undef RENAME
457#define HAVE_MMX
458#undef HAVE_MMX2
459#undef HAVE_3DNOW
460#define ARCH_X86
461#define RENAME(a) a ## _MMX
462#include "postprocess_template.c"
e89952aa 463#endif
cc9b0679
MN
464
465//MMX2 versions
e89952aa 466#ifdef COMPILE_MMX2
cc9b0679
MN
467#undef RENAME
468#define HAVE_MMX
469#define HAVE_MMX2
470#undef HAVE_3DNOW
471#define ARCH_X86
472#define RENAME(a) a ## _MMX2
473#include "postprocess_template.c"
e89952aa 474#endif
cc9b0679
MN
475
476//3DNOW versions
e89952aa 477#ifdef COMPILE_3DNOW
cc9b0679
MN
478#undef RENAME
479#define HAVE_MMX
480#undef HAVE_MMX2
481#define HAVE_3DNOW
482#define ARCH_X86
483#define RENAME(a) a ## _3DNow
484#include "postprocess_template.c"
e89952aa 485#endif
cc9b0679
MN
486
487// minor note: the HAVE_xyz is messed up after that line so dont use it
488
489static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
490 QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode)
491{
492 // useing ifs here as they are faster than function pointers allthough the
493 // difference wouldnt be messureable here but its much better because
494 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 495#ifdef RUNTIME_CPUDETECT
cc9b0679
MN
496#ifdef CAN_COMPILE_X86_ASM
497 // ordered per speed fasterst first
498 if(gCpuCaps.hasMMX2)
499 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
500 else if(gCpuCaps.has3DNow)
501 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
502 else if(gCpuCaps.hasMMX)
503 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
504 else
505 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
506#else
507 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
be44a4d7 508#endif
e89952aa
MN
509#else //RUNTIME_CPUDETECT
510#ifdef HAVE_MMX2
511 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
512#elif defined (HAVE_3DNOW)
513 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
514#elif defined (HAVE_MMX)
515 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
516#else
517 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
518#endif
519#endif //!RUNTIME_CPUDETECT
117e45b0
MN
520}
521
9a722af7
A
522#ifdef HAVE_ODIVX_POSTPROCESS
523#include "../opendivx/postprocess.h"
524int use_old_pp=0;
525#endif
13e00528 526
cc9b0679
MN
527//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
528// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 529
911879d1
MN
530/* -pp Command line Help
531NOTE/FIXME: put this at an appropriate place (--help, html docs, man mplayer)?
911879d1 532*/
4b001a13
MN
533char *help=
534"-npp <filterName>[:<option>[:<option>...]][,[-]<filterName>[:<option>...]]...\n"
535"long form example:\n"
536"-npp vdeblock:autoq,hdeblock:autoq,linblenddeint -npp default,-vdeblock\n"
537"short form example:\n"
538"-npp vb:a,hb:a,lb -npp de,-vb\n"
539"more examples:\n"
540"-npp tn:64:128:256\n"
541"Filters Options\n"
542"short long name short long option Description\n"
543"* * a autoq cpu power dependant enabler\n"
544" c chrom chrominance filtring enabled\n"
545" y nochrom chrominance filtring disabled\n"
546"hb hdeblock (2 Threshold) horizontal deblocking filter\n"
547" 1. Threshold: default=1, higher -> more deblocking\n"
548" 2. Threshold: default=40, lower -> more deblocking\n"
549" the h & v deblocking filters share these\n"
550" so u cant set different thresholds for h / v\n"
551"vb vdeblock (2 Threshold) vertical deblocking filter\n"
552"hr rkhdeblock\n"
553"vr rkvdeblock\n"
554"h1 x1hdeblock Experimental h deblock filter 1\n"
555"v1 x1vdeblock Experimental v deblock filter 1\n"
556"dr dering Deringing filter\n"
557"al autolevels automatic brightness / contrast\n"
558" f fullyrange stretch luminance to (0..255)\n"
559"lb linblenddeint linear blend deinterlacer\n"
560"li linipoldeint linear interpolating deinterlace\n"
561"ci cubicipoldeint cubic interpolating deinterlacer\n"
562"md mediandeint median deinterlacer\n"
563"de default hb:a,vb:a,dr:a,al\n"
564"fa fast h1:a,v1:a,dr:a,al\n"
565"tn tmpnoise (3 Thresholds) Temporal Noise Reducer\n"
566" 1. <= 2. <= 3. larger -> stronger filtering\n"
567"fq forceQuant <quantizer> Force quantizer\n"
568;
911879d1
MN
569
570/**
571 * returns a PPMode struct which will have a non 0 error variable if an error occured
572 * name is the string after "-pp" on the command line
573 * quality is a number from 0 to GET_PP_QUALITY_MAX
574 */
575struct PPMode getPPModeByNameAndQuality(char *name, int quality)
576{
577 char temp[GET_MODE_BUFFER_SIZE];
578 char *p= temp;
579 char *filterDelimiters= ",";
580 char *optionDelimiters= ":";
117e45b0 581 struct PPMode ppMode= {0,0,0,0,0,0,{150,200,400}};
911879d1
MN
582 char *filterToken;
583
584 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
585
162c9c2e 586 if(verbose>1) printf("pp: %s\n", name);
117e45b0 587
911879d1 588 for(;;){
911879d1 589 char *filterName;
117e45b0 590 int q= 1000000; //GET_PP_QUALITY_MAX;
911879d1
MN
591 int chrom=-1;
592 char *option;
593 char *options[OPTIONS_ARRAY_SIZE];
594 int i;
595 int filterNameOk=0;
596 int numOfUnknownOptions=0;
597 int enable=1; //does the user want us to enabled or disabled the filter
598
599 filterToken= strtok(p, filterDelimiters);
600 if(filterToken == NULL) break;
117e45b0 601 p+= strlen(filterToken) + 1; // p points to next filterToken
911879d1 602 filterName= strtok(filterToken, optionDelimiters);
162c9c2e 603 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
911879d1
MN
604
605 if(*filterName == '-')
606 {
607 enable=0;
608 filterName++;
609 }
117e45b0 610
911879d1
MN
611 for(;;){ //for all options
612 option= strtok(NULL, optionDelimiters);
613 if(option == NULL) break;
614
162c9c2e 615 if(verbose>1) printf("pp: option: %s\n", option);
911879d1
MN
616 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
617 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
618 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
619 else
620 {
621 options[numOfUnknownOptions] = option;
622 numOfUnknownOptions++;
911879d1
MN
623 }
624 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
625 }
117e45b0 626 options[numOfUnknownOptions] = NULL;
911879d1
MN
627
628 /* replace stuff from the replace Table */
629 for(i=0; replaceTable[2*i]!=NULL; i++)
630 {
631 if(!strcmp(replaceTable[2*i], filterName))
632 {
633 int newlen= strlen(replaceTable[2*i + 1]);
634 int plen;
635 int spaceLeft;
636
637 if(p==NULL) p= temp, *p=0; //last filter
638 else p--, *p=','; //not last filter
639
640 plen= strlen(p);
8cd91a44 641 spaceLeft= p - temp + plen;
911879d1
MN
642 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
643 {
644 ppMode.error++;
645 break;
646 }
647 memmove(p + newlen, p, plen+1);
648 memcpy(p, replaceTable[2*i + 1], newlen);
649 filterNameOk=1;
650 }
651 }
652
653 for(i=0; filters[i].shortName!=NULL; i++)
654 {
117e45b0 655// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
911879d1
MN
656 if( !strcmp(filters[i].longName, filterName)
657 || !strcmp(filters[i].shortName, filterName))
658 {
659 ppMode.lumMode &= ~filters[i].mask;
660 ppMode.chromMode &= ~filters[i].mask;
661
662 filterNameOk=1;
663 if(!enable) break; // user wants to disable it
664
665 if(q >= filters[i].minLumQuality)
666 ppMode.lumMode|= filters[i].mask;
667 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
668 if(q >= filters[i].minChromQuality)
669 ppMode.chromMode|= filters[i].mask;
670
671 if(filters[i].mask == LEVEL_FIX)
672 {
673 int o;
674 ppMode.minAllowedY= 16;
675 ppMode.maxAllowedY= 234;
676 for(o=0; options[o]!=NULL; o++)
07f8991b 677 {
911879d1
MN
678 if( !strcmp(options[o],"fullyrange")
679 ||!strcmp(options[o],"f"))
680 {
681 ppMode.minAllowedY= 0;
682 ppMode.maxAllowedY= 255;
683 numOfUnknownOptions--;
684 }
07f8991b 685 }
911879d1 686 }
117e45b0
MN
687 else if(filters[i].mask == TEMP_NOISE_FILTER)
688 {
689 int o;
690 int numOfNoises=0;
691 ppMode.maxTmpNoise[0]= 150;
692 ppMode.maxTmpNoise[1]= 200;
693 ppMode.maxTmpNoise[2]= 400;
694
695 for(o=0; options[o]!=NULL; o++)
696 {
697 char *tail;
698 ppMode.maxTmpNoise[numOfNoises]=
699 strtol(options[o], &tail, 0);
700 if(tail!=options[o])
701 {
702 numOfNoises++;
703 numOfUnknownOptions--;
704 if(numOfNoises >= 3) break;
705 }
706 }
707 }
43d52f76
MN
708 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
709 {
710 int o;
07f8991b
MN
711 ppMode.maxDcDiff=1;
712// hFlatnessThreshold= 40;
713// vFlatnessThreshold= 40;
43d52f76
MN
714
715 for(o=0; options[o]!=NULL && o<2; o++)
716 {
717 char *tail;
718 int val= strtol(options[o], &tail, 0);
719 if(tail==options[o]) break;
720
721 numOfUnknownOptions--;
07f8991b 722 if(o==0) ppMode.maxDcDiff= val;
43d52f76
MN
723 else hFlatnessThreshold=
724 vFlatnessThreshold= val;
725 }
726 }
8aaac435
MN
727 else if(filters[i].mask == FORCE_QUANT)
728 {
729 int o;
730 ppMode.forcedQuant= 15;
731
732 for(o=0; options[o]!=NULL && o<1; o++)
733 {
734 char *tail;
735 int val= strtol(options[o], &tail, 0);
736 if(tail==options[o]) break;
737
738 numOfUnknownOptions--;
739 ppMode.forcedQuant= val;
740 }
741 }
911879d1
MN
742 }
743 }
744 if(!filterNameOk) ppMode.error++;
745 ppMode.error += numOfUnknownOptions;
746 }
747
815cbfe7 748#ifdef HAVE_ODIVX_POSTPROCESS
911879d1
MN
749 if(ppMode.lumMode & H_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_Y_H;
750 if(ppMode.lumMode & V_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_Y_V;
751 if(ppMode.chromMode & H_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_C_H;
752 if(ppMode.chromMode & V_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_C_V;
753 if(ppMode.lumMode & DERING) ppMode.oldMode |= PP_DERING_Y;
754 if(ppMode.chromMode & DERING) ppMode.oldMode |= PP_DERING_C;
815cbfe7 755#endif
911879d1 756
162c9c2e 757 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode.lumMode, ppMode.chromMode);
911879d1
MN
758 return ppMode;
759}
760
3057fa66 761/**
45b4f285
MN
762 * Check and load the -npp part of the cmd line
763 */
cbf5fa71 764int readNPPOpt(void *conf, char *arg)
45b4f285
MN
765{
766 int quality;
211c4920
MN
767
768 if(!strcmp("help", arg))
769 {
770 printf("%s", help);
771 exit(1);
772 }
773
45b4f285
MN
774 for(quality=0; quality<GET_PP_QUALITY_MAX+1; quality++)
775 {
776 gPPMode[quality]= getPPModeByNameAndQuality(arg, quality);
777
778 if(gPPMode[quality].error) return -1;
779 }
780 newPPFlag=1;
781
6ec6e3c9 782//divx_quality is passed to postprocess if autoq if off
cbf5fa71
AB
783 divx_quality= GET_PP_QUALITY_MAX;
784 firstTime = firstTime2 = 1;
45b4f285
MN
785 return 1;
786}
787
cbf5fa71
AB
788int readPPOpt(void *conf, char *arg)
789{
790 int val;
791
792 if(arg == NULL)
793 return -2; // ERR_MISSING_PARAM
794 errno = 0;
795 val = (int)strtol(arg,NULL,0);
796 if(errno != 0)
797 return -4; // What about include cfgparser.h and use ERR_* defines */
798 if(val < 0)
799 return -3; // ERR_OUT_OF_RANGE
800
801 divx_quality = val;
802 firstTime = firstTime2 = 1;
803
804 return 1;
805}
806
807void revertPPOpt(void *conf, char* opt)
808{
809 newPPFlag=0;
810 divx_quality=0;
811}
812
813
45b4f285 814/**
117e45b0 815 * Obsolete, dont use it, use postprocess2() instead
45b4f285
MN
816 * this will check newPPFlag automatically and use postprocess2 if it is set
817 * mode = quality if newPPFlag
3057fa66 818 */
3057fa66
A
819void postprocess(unsigned char * src[], int src_stride,
820 unsigned char * dst[], int dst_stride,
821 int horizontal_size, int vertical_size,
822 QP_STORE_T *QP_store, int QP_stride,
823 int mode)
824{
117e45b0
MN
825 struct PPMode ppMode;
826 static QP_STORE_T zeroArray[2048/8];
45b4f285
MN
827
828 if(newPPFlag)
829 {
830 ppMode= gPPMode[mode];
8aaac435 831// printf("%d \n",QP_store[5]);
45b4f285
MN
832 postprocess2(src, src_stride, dst, dst_stride,
833 horizontal_size, vertical_size, QP_store, QP_stride, &ppMode);
834
835 return;
836 }
162c9c2e
MN
837
838 if(firstTime && verbose)
839 {
840 printf("using pp filters 0x%X\n", mode);
841 firstTime=0;
842 }
843
815cbfe7
MN
844 if(QP_store==NULL)
845 {
846 QP_store= zeroArray;
847 QP_stride= 0;
848 }
13e00528 849
117e45b0
MN
850 ppMode.lumMode= mode;
851 mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00);
852 ppMode.chromMode= mode;
be44a4d7
MN
853 ppMode.maxTmpNoise[0]= 700;
854 ppMode.maxTmpNoise[1]= 1500;
855 ppMode.maxTmpNoise[2]= 3000;
07f8991b
MN
856 ppMode.maxAllowedY= 234;
857 ppMode.minAllowedY= 16;
858 ppMode.maxDcDiff= 1;
117e45b0 859
9a722af7
A
860#ifdef HAVE_ODIVX_POSTPROCESS
861// Note: I could make this shit outside of this file, but it would mean one
862// more function call...
863 if(use_old_pp){
864 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode);
865 return;
866 }
867#endif
868
13e00528 869 postProcess(src[0], src_stride, dst[0], dst_stride,
117e45b0 870 horizontal_size, vertical_size, QP_store, QP_stride, 0, &ppMode);
3057fa66
A
871
872 horizontal_size >>= 1;
873 vertical_size >>= 1;
874 src_stride >>= 1;
875 dst_stride >>= 1;
876
4e1349d4 877 if(ppMode.chromMode)
3057fa66 878 {
13e00528 879 postProcess(src[1], src_stride, dst[1], dst_stride,
117e45b0 880 horizontal_size, vertical_size, QP_store, QP_stride, 1, &ppMode);
13e00528 881 postProcess(src[2], src_stride, dst[2], dst_stride,
117e45b0 882 horizontal_size, vertical_size, QP_store, QP_stride, 2, &ppMode);
3057fa66 883 }
4e1349d4
MN
884 else if(src_stride == dst_stride)
885 {
886 memcpy(dst[1], src[1], src_stride*vertical_size);
887 memcpy(dst[2], src[2], src_stride*vertical_size);
888 }
3057fa66
A
889 else
890 {
4e1349d4
MN
891 int y;
892 for(y=0; y<vertical_size; y++)
893 {
894 memcpy(&(dst[1][y*dst_stride]), &(src[1][y*src_stride]), horizontal_size);
895 memcpy(&(dst[2][y*dst_stride]), &(src[2][y*src_stride]), horizontal_size);
896 }
897 }
898
899#if 0
117e45b0
MN
900 memset(dst[1], 128, dst_stride*vertical_size);
901 memset(dst[2], 128, dst_stride*vertical_size);
4e1349d4 902#endif
3057fa66 903}
9a722af7 904
911879d1
MN
905void postprocess2(unsigned char * src[], int src_stride,
906 unsigned char * dst[], int dst_stride,
907 int horizontal_size, int vertical_size,
908 QP_STORE_T *QP_store, int QP_stride,
909 struct PPMode *mode)
910{
911
8aaac435 912 QP_STORE_T quantArray[2048/8];
162c9c2e 913
8aaac435 914 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
815cbfe7 915 {
8aaac435
MN
916 int i;
917 QP_store= quantArray;
815cbfe7 918 QP_stride= 0;
8aaac435
MN
919 if(mode->lumMode & FORCE_QUANT)
920 for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant;
921 else
922 for(i=0; i<2048/8; i++) quantArray[i]= 1;
815cbfe7
MN
923 }
924
cbf5fa71 925 if(firstTime2 && verbose)
162c9c2e
MN
926 {
927 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
cbf5fa71 928 firstTime2=0;
162c9c2e
MN
929 }
930
911879d1
MN
931#ifdef HAVE_ODIVX_POSTPROCESS
932// Note: I could make this shit outside of this file, but it would mean one
933// more function call...
934 if(use_old_pp){
935 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,
936 mode->oldMode);
937 return;
938 }
939#endif
940
941 postProcess(src[0], src_stride, dst[0], dst_stride,
117e45b0 942 horizontal_size, vertical_size, QP_store, QP_stride, 0, mode);
911879d1
MN
943
944 horizontal_size >>= 1;
945 vertical_size >>= 1;
946 src_stride >>= 1;
947 dst_stride >>= 1;
948
4e1349d4
MN
949 if(mode->chromMode)
950 {
951 postProcess(src[1], src_stride, dst[1], dst_stride,
952 horizontal_size, vertical_size, QP_store, QP_stride, 1, mode);
953 postProcess(src[2], src_stride, dst[2], dst_stride,
954 horizontal_size, vertical_size, QP_store, QP_stride, 2, mode);
955 }
956 else if(src_stride == dst_stride)
957 {
958 memcpy(dst[1], src[1], src_stride*vertical_size);
959 memcpy(dst[2], src[2], src_stride*vertical_size);
960 }
961 else
962 {
963 int y;
964 for(y=0; y<vertical_size; y++)
965 {
966 memcpy(&(dst[1][y*dst_stride]), &(src[1][y*src_stride]), horizontal_size);
967 memcpy(&(dst[2][y*dst_stride]), &(src[2][y*src_stride]), horizontal_size);
968 }
969 }
911879d1
MN
970}
971
972
13e00528
A
973/**
974 * gets the mode flags for a given quality (larger values mean slower but better postprocessing)
45b4f285 975 * with -npp it simply returns quality
9a722af7 976 * 0 <= quality <= 6
13e00528 977 */
9a722af7
A
978int getPpModeForQuality(int quality){
979 int modes[1+GET_PP_QUALITY_MAX]= {
980 0,
981#if 1
982 // horizontal filters first
983 LUM_H_DEBLOCK,
984 LUM_H_DEBLOCK | LUM_V_DEBLOCK,
985 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK,
986 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK,
987 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK | LUM_DERING,
988 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK | LUM_DERING | CHROM_DERING
989#else
990 // vertical filters first
13e00528
A
991 LUM_V_DEBLOCK,
992 LUM_V_DEBLOCK | LUM_H_DEBLOCK,
993 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK,
994 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK,
995 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK | LUM_DERING,
996 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK | LUM_DERING | CHROM_DERING
9a722af7
A
997#endif
998 };
999
1000#ifdef HAVE_ODIVX_POSTPROCESS
1001 int odivx_modes[1+GET_PP_QUALITY_MAX]= {
1002 0,
1003 PP_DEBLOCK_Y_H,
1004 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V,
1005 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H,
1006 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V,
1007 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V|PP_DERING_Y,
1008 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V|PP_DERING_Y|PP_DERING_C
1009 };
1010 if(use_old_pp) return odivx_modes[quality];
1011#endif
45b4f285
MN
1012 if(newPPFlag) return quality;
1013 else return modes[quality];
3057fa66
A
1014}
1015
4e4dcbc5 1016