typos & cosmetics
[libav.git] / libavcodec / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
9858f773 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3057fa66
A
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
19/*
3b58b885 20 C MMX MMX2 3DNow
3057fa66
A
21isVertDC Ec Ec
22isVertMinMaxOk Ec Ec
3b58b885 23doVertLowPass E e e
7f16f6e6 24doVertDefFilter Ec Ec e e
3057fa66 25isHorizDC Ec Ec
4e4dcbc5
MN
26isHorizMinMaxOk a E
27doHorizLowPass E e e
7f16f6e6 28doHorizDefFilter Ec Ec e e
2e212618 29deRing E e e*
3b58b885 30Vertical RKAlgo1 E a a
e5c30e06 31Horizontal RKAlgo1 a a
117e45b0
MN
32Vertical X1# a E E
33Horizontal X1# a E E
acced553
MN
34LinIpolDeinterlace e E E*
35CubicIpolDeinterlace a e e*
36LinBlendDeinterlace e E E*
9b1663fc 37MedianDeinterlace# E Ec Ec
be44a4d7 38TempDeNoiser# E e e
d5a1a995 39
117e45b0
MN
40* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 42E = Exact implementation
acced553 43e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
44a = alternative / approximate impl
45c = checked against the other implementations (-vo md5)
46*/
47
48/*
49TODO:
3057fa66 50reduce the time wasted on the mem transfer
3057fa66 51unroll stuff if instructions depend too much on the prior one
3057fa66 52move YScale thing to the end instead of fixing QP
13e00528 53write a faster and higher quality deblocking filter :)
d5a1a995
MN
54make the mainloop more flexible (variable number of blocks at once
55 (the if/else stuff per block is slowing things down)
9f45d04d 56compare the quality & speed of all filters
9f45d04d 57split this huge file
8405b3fd 58optimize c versions
117e45b0 59try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 60...
13e00528
A
61*/
62
a6be8111 63//Changelog: use the CVS log
3057fa66 64
9858f773 65#include "config.h"
3057fa66
A
66#include <inttypes.h>
67#include <stdio.h>
d5a1a995 68#include <stdlib.h>
911879d1 69#include <string.h>
dda87e9f
PL
70#ifdef HAVE_MALLOC_H
71#include <malloc.h>
72#endif
3057fa66 73//#undef HAVE_MMX2
13e00528 74//#define HAVE_3DNOW
3057fa66 75//#undef HAVE_MMX
cc9b0679 76//#undef ARCH_X86
7f16f6e6 77//#define DEBUG_BRIGHTNESS
bba9b16c
MN
78#ifdef USE_FASTMEMCPY
79#include "libvo/fastmemcpy.h"
70d4f2da 80#endif
13e00528 81#include "postprocess.h"
c41d972d 82#include "postprocess_internal.h"
bba9b16c
MN
83
84#include "mangle.h" //FIXME should be supressed
3057fa66 85
ca390e72
ZK
86#ifndef HAVE_MEMALIGN
87#define memalign(a,b) malloc(b)
88#endif
89
e939e1c3
A
90#define MIN(a,b) ((a) > (b) ? (b) : (a))
91#define MAX(a,b) ((a) < (b) ? (b) : (a))
92#define ABS(a) ((a) > 0 ? (a) : (-(a)))
93#define SIGN(a) ((a) > 0 ? 1 : -1)
94
911879d1
MN
95#define GET_MODE_BUFFER_SIZE 500
96#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
97#define BLOCK_SIZE 8
98#define TEMP_STRIDE 8
99//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 100
cc9b0679 101#ifdef ARCH_X86
b28daef8
MN
102static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL;
103static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL;
b28daef8
MN
104static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL;
105static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL;
106static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL;
b28daef8 107static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL;
b28daef8 108static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL;
b28daef8 109#endif
3057fa66 110
df8d4d0e 111static int verbose= 0;
45b4f285 112
df8d4d0e 113static const int deringThreshold= 20;
3057fa66 114
9c9e467d 115
911879d1
MN
116static struct PPFilter filters[]=
117{
118 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
119 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
9c9e467d
MN
120/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
121 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
911879d1
MN
122 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
123 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
124 {"dr", "dering", 1, 5, 6, DERING},
125 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
43d52f76
MN
126 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
127 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
128 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
129 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
9c9e467d 130 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
117e45b0 131 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
8aaac435 132 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
911879d1
MN
133 {NULL, NULL,0,0,0,0} //End Marker
134};
135
136static char *replaceTable[]=
137{
117e45b0
MN
138 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
139 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
140 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
141 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
911879d1
MN
142 NULL //End Marker
143};
144
9c9e467d 145#ifdef ARCH_X86
e5c30e06
MN
146static inline void unusedVariableWarningFixer()
147{
9c9e467d 148 if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
e5c30e06 149}
b28daef8 150#endif
e5c30e06 151
3057fa66 152
9c9e467d 153#ifdef ARCH_X86
3057fa66
A
154static inline void prefetchnta(void *p)
155{
156 asm volatile( "prefetchnta (%0)\n\t"
157 : : "r" (p)
158 );
159}
160
161static inline void prefetcht0(void *p)
162{
163 asm volatile( "prefetcht0 (%0)\n\t"
164 : : "r" (p)
165 );
166}
167
168static inline void prefetcht1(void *p)
169{
170 asm volatile( "prefetcht1 (%0)\n\t"
171 : : "r" (p)
172 );
173}
174
175static inline void prefetcht2(void *p)
176{
177 asm volatile( "prefetcht2 (%0)\n\t"
178 : : "r" (p)
179 );
180}
9a722af7 181#endif
3057fa66 182
cc9b0679 183// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 184
cf5ec61d
MN
185/**
186 * Check if the given 8x8 Block is mostly "flat"
187 */
9c9e467d 188static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
cf5ec61d
MN
189{
190 int numEq= 0;
191 int y;
ec487e5d
MN
192 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
193 const int dcThreshold= dcOffset*2 + 1;
cf5ec61d
MN
194 for(y=0; y<BLOCK_SIZE; y++)
195 {
9c9e467d
MN
196 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
197 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
198 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
199 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
200 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
201 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
202 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
203 src+= stride;
204 }
205 return numEq > c->ppMode.flatnessThreshold;
206}
207
208/**
209 * Check if the middle 8x8 Block in the given 8x16 block is flat
210 */
211static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
212 int numEq= 0;
213 int y;
ec487e5d
MN
214 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
215 const int dcThreshold= dcOffset*2 + 1;
9c9e467d
MN
216 src+= stride*4; // src points to begin of the 8x8 Block
217 for(y=0; y<BLOCK_SIZE-1; y++)
218 {
219 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
226 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
cf5ec61d
MN
227 src+= stride;
228 }
9c9e467d 229 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
230}
231
232static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
233{
234 if(abs(src[0] - src[7]) > 2*QP) return 0;
235
236 return 1;
237}
238
239static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
240{
241 int y;
242 for(y=0; y<BLOCK_SIZE; y++)
243 {
244 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
245
246 if(ABS(middleEnergy) < 8*QP)
247 {
248 const int q=(dst[3] - dst[4])/2;
249 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
250 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
251
252 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
253 d= MAX(d, 0);
254
255 d= (5*d + 32) >> 6;
256 d*= SIGN(-middleEnergy);
257
258 if(q>0)
259 {
260 d= d<0 ? 0 : d;
261 d= d>q ? q : d;
262 }
263 else
264 {
265 d= d>0 ? 0 : d;
266 d= d<q ? q : d;
267 }
268
269 dst[3]-= d;
270 dst[4]+= d;
271 }
272 dst+= stride;
273 }
274}
275
276/**
277 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
278 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
279 */
280static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
281{
282
283 int y;
284 for(y=0; y<BLOCK_SIZE; y++)
285 {
286 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
287 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
288
289 int sums[9];
290 sums[0] = first + dst[0];
291 sums[1] = dst[0] + dst[1];
292 sums[2] = dst[1] + dst[2];
293 sums[3] = dst[2] + dst[3];
294 sums[4] = dst[3] + dst[4];
295 sums[5] = dst[4] + dst[5];
296 sums[6] = dst[5] + dst[6];
297 sums[7] = dst[6] + dst[7];
298 sums[8] = dst[7] + last;
299
300 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
301 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
302 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
303 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
304 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
305 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
306 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
307 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
308
309 dst+= stride;
310 }
311}
312
4e4dcbc5 313/**
cc9b0679
MN
314 * Experimental Filter 1 (Horizontal)
315 * will not damage linear gradients
316 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
317 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
318 * MMX2 version does correct clipping C version doesnt
319 * not identical with the vertical one
4e4dcbc5 320 */
cc9b0679
MN
321static inline void horizX1Filter(uint8_t *src, int stride, int QP)
322{
117e45b0 323 int y;
cc9b0679
MN
324 static uint64_t *lut= NULL;
325 if(lut==NULL)
117e45b0 326 {
cc9b0679
MN
327 int i;
328 lut= (uint64_t*)memalign(8, 256*8);
329 for(i=0; i<256; i++)
117e45b0 330 {
cc9b0679 331 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 332/*
cc9b0679
MN
333//Simulate 112242211 9-Tap filter
334 uint64_t a= (v/16) & 0xFF;
335 uint64_t b= (v/8) & 0xFF;
336 uint64_t c= (v/4) & 0xFF;
337 uint64_t d= (3*v/8) & 0xFF;
117e45b0 338*/
cc9b0679
MN
339//Simulate piecewise linear interpolation
340 uint64_t a= (v/16) & 0xFF;
341 uint64_t b= (v*3/16) & 0xFF;
342 uint64_t c= (v*5/16) & 0xFF;
343 uint64_t d= (7*v/16) & 0xFF;
344 uint64_t A= (0x100 - a)&0xFF;
345 uint64_t B= (0x100 - b)&0xFF;
346 uint64_t C= (0x100 - c)&0xFF;
347 uint64_t D= (0x100 - c)&0xFF;
348
349 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
350 (D<<24) | (C<<16) | (B<<8) | (A);
351 //lut[i] = (v<<32) | (v<<24);
117e45b0
MN
352 }
353 }
cc9b0679
MN
354
355 for(y=0; y<BLOCK_SIZE; y++)
117e45b0 356 {
cc9b0679
MN
357 int a= src[1] - src[2];
358 int b= src[3] - src[4];
359 int c= src[5] - src[6];
360
361 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
362
363 if(d < QP)
117e45b0 364 {
cc9b0679
MN
365 int v = d * SIGN(-b);
366
367 src[1] +=v/8;
368 src[2] +=v/4;
369 src[3] +=3*v/8;
370 src[4] -=3*v/8;
371 src[5] -=v/4;
372 src[6] -=v/8;
373
117e45b0 374 }
cc9b0679 375 src+=stride;
117e45b0 376 }
cc9b0679
MN
377}
378
379
e89952aa 380//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 381//Plain C versions
e89952aa
MN
382#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
383#define COMPILE_C
384#endif
385
9c9e467d 386#ifdef ARCH_X86
e89952aa
MN
387
388#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
389#define COMPILE_MMX
390#endif
391
392#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
393#define COMPILE_MMX2
394#endif
395
396#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
397#define COMPILE_3DNOW
398#endif
9c9e467d 399#endif //ARCH_X86
e89952aa
MN
400
401#undef HAVE_MMX
402#undef HAVE_MMX2
403#undef HAVE_3DNOW
404#undef ARCH_X86
405
406#ifdef COMPILE_C
cc9b0679
MN
407#undef HAVE_MMX
408#undef HAVE_MMX2
409#undef HAVE_3DNOW
410#undef ARCH_X86
411#define RENAME(a) a ## _C
412#include "postprocess_template.c"
e89952aa 413#endif
cc9b0679
MN
414
415//MMX versions
e89952aa 416#ifdef COMPILE_MMX
cc9b0679
MN
417#undef RENAME
418#define HAVE_MMX
419#undef HAVE_MMX2
420#undef HAVE_3DNOW
421#define ARCH_X86
422#define RENAME(a) a ## _MMX
423#include "postprocess_template.c"
e89952aa 424#endif
cc9b0679
MN
425
426//MMX2 versions
e89952aa 427#ifdef COMPILE_MMX2
cc9b0679
MN
428#undef RENAME
429#define HAVE_MMX
430#define HAVE_MMX2
431#undef HAVE_3DNOW
432#define ARCH_X86
433#define RENAME(a) a ## _MMX2
434#include "postprocess_template.c"
e89952aa 435#endif
cc9b0679
MN
436
437//3DNOW versions
e89952aa 438#ifdef COMPILE_3DNOW
cc9b0679
MN
439#undef RENAME
440#define HAVE_MMX
441#undef HAVE_MMX2
442#define HAVE_3DNOW
443#define ARCH_X86
444#define RENAME(a) a ## _3DNow
445#include "postprocess_template.c"
e89952aa 446#endif
cc9b0679
MN
447
448// minor note: the HAVE_xyz is messed up after that line so dont use it
449
450static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
c41d972d 451 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 452{
9c9e467d 453 PPContext *c= (PPContext *)vc;
c41d972d 454 PPMode *ppMode= (PPMode *)vm;
9c9e467d
MN
455 c->ppMode= *ppMode; //FIXME
456
cc9b0679
MN
457 // useing ifs here as they are faster than function pointers allthough the
458 // difference wouldnt be messureable here but its much better because
459 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 460#ifdef RUNTIME_CPUDETECT
9c9e467d 461#ifdef ARCH_X86
cc9b0679 462 // ordered per speed fasterst first
fa6ea14e 463 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
9c9e467d 464 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 465 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
9c9e467d 466 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 467 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
9c9e467d 468 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 469 else
9c9e467d 470 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 471#else
9c9e467d 472 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 473#endif
e89952aa
MN
474#else //RUNTIME_CPUDETECT
475#ifdef HAVE_MMX2
9c9e467d 476 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 477#elif defined (HAVE_3DNOW)
9c9e467d 478 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 479#elif defined (HAVE_MMX)
9c9e467d 480 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 481#else
9c9e467d 482 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
483#endif
484#endif //!RUNTIME_CPUDETECT
117e45b0
MN
485}
486
cc9b0679
MN
487//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
488// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 489
911879d1 490/* -pp Command line Help
911879d1 491*/
4407a3c4 492char *pp_help=
b01be121 493"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
4b001a13 494"long form example:\n"
b01be121 495"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
4b001a13 496"short form example:\n"
b01be121 497"vb:a/hb:a/lb de,-vb\n"
4b001a13 498"more examples:\n"
1d9324fd 499"tn:64:128:256\n"
4b001a13
MN
500"Filters Options\n"
501"short long name short long option Description\n"
502"* * a autoq cpu power dependant enabler\n"
503" c chrom chrominance filtring enabled\n"
504" y nochrom chrominance filtring disabled\n"
505"hb hdeblock (2 Threshold) horizontal deblocking filter\n"
b01be121
MN
506" 1. difference factor: default=64, higher -> more deblocking\n"
507" 2. flatness threshold: default=40, lower -> more deblocking\n"
4b001a13
MN
508" the h & v deblocking filters share these\n"
509" so u cant set different thresholds for h / v\n"
510"vb vdeblock (2 Threshold) vertical deblocking filter\n"
4b001a13
MN
511"h1 x1hdeblock Experimental h deblock filter 1\n"
512"v1 x1vdeblock Experimental v deblock filter 1\n"
513"dr dering Deringing filter\n"
514"al autolevels automatic brightness / contrast\n"
515" f fullyrange stretch luminance to (0..255)\n"
516"lb linblenddeint linear blend deinterlacer\n"
517"li linipoldeint linear interpolating deinterlace\n"
518"ci cubicipoldeint cubic interpolating deinterlacer\n"
519"md mediandeint median deinterlacer\n"
9c9e467d 520"fd ffmpegdeint ffmpeg deinterlacer\n"
4b001a13
MN
521"de default hb:a,vb:a,dr:a,al\n"
522"fa fast h1:a,v1:a,dr:a,al\n"
523"tn tmpnoise (3 Thresholds) Temporal Noise Reducer\n"
524" 1. <= 2. <= 3. larger -> stronger filtering\n"
525"fq forceQuant <quantizer> Force quantizer\n"
526;
911879d1 527
c41d972d 528pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1
MN
529{
530 char temp[GET_MODE_BUFFER_SIZE];
531 char *p= temp;
9c9e467d 532 char *filterDelimiters= ",/";
911879d1 533 char *optionDelimiters= ":";
c41d972d 534 struct PPMode *ppMode;
911879d1
MN
535 char *filterToken;
536
c41d972d
MN
537 ppMode= memalign(8, sizeof(PPMode));
538
539 ppMode->lumMode= 0;
540 ppMode->chromMode= 0;
541 ppMode->maxTmpNoise[0]= 700;
542 ppMode->maxTmpNoise[1]= 1500;
543 ppMode->maxTmpNoise[2]= 3000;
544 ppMode->maxAllowedY= 234;
545 ppMode->minAllowedY= 16;
546 ppMode->baseDcDiff= 256/4;
c41d972d
MN
547 ppMode->flatnessThreshold= 56-16;
548 ppMode->maxClippedThreshold= 0.01;
549 ppMode->error=0;
df8d4d0e 550
911879d1
MN
551 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
552
162c9c2e 553 if(verbose>1) printf("pp: %s\n", name);
117e45b0 554
911879d1 555 for(;;){
911879d1 556 char *filterName;
326d40af 557 int q= 1000000; //PP_QUALITY_MAX;
911879d1
MN
558 int chrom=-1;
559 char *option;
560 char *options[OPTIONS_ARRAY_SIZE];
561 int i;
562 int filterNameOk=0;
563 int numOfUnknownOptions=0;
564 int enable=1; //does the user want us to enabled or disabled the filter
565
566 filterToken= strtok(p, filterDelimiters);
567 if(filterToken == NULL) break;
117e45b0 568 p+= strlen(filterToken) + 1; // p points to next filterToken
911879d1 569 filterName= strtok(filterToken, optionDelimiters);
162c9c2e 570 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
911879d1
MN
571
572 if(*filterName == '-')
573 {
574 enable=0;
575 filterName++;
576 }
117e45b0 577
911879d1
MN
578 for(;;){ //for all options
579 option= strtok(NULL, optionDelimiters);
580 if(option == NULL) break;
581
162c9c2e 582 if(verbose>1) printf("pp: option: %s\n", option);
911879d1
MN
583 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
584 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
585 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
586 else
587 {
588 options[numOfUnknownOptions] = option;
589 numOfUnknownOptions++;
911879d1
MN
590 }
591 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
592 }
117e45b0 593 options[numOfUnknownOptions] = NULL;
911879d1
MN
594
595 /* replace stuff from the replace Table */
596 for(i=0; replaceTable[2*i]!=NULL; i++)
597 {
598 if(!strcmp(replaceTable[2*i], filterName))
599 {
600 int newlen= strlen(replaceTable[2*i + 1]);
601 int plen;
602 int spaceLeft;
603
604 if(p==NULL) p= temp, *p=0; //last filter
605 else p--, *p=','; //not last filter
606
607 plen= strlen(p);
8cd91a44 608 spaceLeft= p - temp + plen;
911879d1
MN
609 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
610 {
c41d972d 611 ppMode->error++;
911879d1
MN
612 break;
613 }
614 memmove(p + newlen, p, plen+1);
615 memcpy(p, replaceTable[2*i + 1], newlen);
616 filterNameOk=1;
617 }
618 }
619
620 for(i=0; filters[i].shortName!=NULL; i++)
621 {
117e45b0 622// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
911879d1
MN
623 if( !strcmp(filters[i].longName, filterName)
624 || !strcmp(filters[i].shortName, filterName))
625 {
c41d972d
MN
626 ppMode->lumMode &= ~filters[i].mask;
627 ppMode->chromMode &= ~filters[i].mask;
911879d1
MN
628
629 filterNameOk=1;
630 if(!enable) break; // user wants to disable it
631
632 if(q >= filters[i].minLumQuality)
c41d972d 633 ppMode->lumMode|= filters[i].mask;
911879d1
MN
634 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
635 if(q >= filters[i].minChromQuality)
c41d972d 636 ppMode->chromMode|= filters[i].mask;
911879d1
MN
637
638 if(filters[i].mask == LEVEL_FIX)
639 {
640 int o;
c41d972d
MN
641 ppMode->minAllowedY= 16;
642 ppMode->maxAllowedY= 234;
911879d1 643 for(o=0; options[o]!=NULL; o++)
07f8991b 644 {
911879d1
MN
645 if( !strcmp(options[o],"fullyrange")
646 ||!strcmp(options[o],"f"))
647 {
c41d972d
MN
648 ppMode->minAllowedY= 0;
649 ppMode->maxAllowedY= 255;
911879d1
MN
650 numOfUnknownOptions--;
651 }
07f8991b 652 }
911879d1 653 }
117e45b0
MN
654 else if(filters[i].mask == TEMP_NOISE_FILTER)
655 {
656 int o;
657 int numOfNoises=0;
117e45b0
MN
658
659 for(o=0; options[o]!=NULL; o++)
660 {
661 char *tail;
c41d972d 662 ppMode->maxTmpNoise[numOfNoises]=
117e45b0
MN
663 strtol(options[o], &tail, 0);
664 if(tail!=options[o])
665 {
666 numOfNoises++;
667 numOfUnknownOptions--;
668 if(numOfNoises >= 3) break;
669 }
670 }
671 }
43d52f76
MN
672 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
673 {
674 int o;
675
676 for(o=0; options[o]!=NULL && o<2; o++)
677 {
678 char *tail;
679 int val= strtol(options[o], &tail, 0);
680 if(tail==options[o]) break;
681
682 numOfUnknownOptions--;
c41d972d
MN
683 if(o==0) ppMode->baseDcDiff= val;
684 else ppMode->flatnessThreshold= val;
43d52f76
MN
685 }
686 }
8aaac435
MN
687 else if(filters[i].mask == FORCE_QUANT)
688 {
689 int o;
c41d972d 690 ppMode->forcedQuant= 15;
8aaac435
MN
691
692 for(o=0; options[o]!=NULL && o<1; o++)
693 {
694 char *tail;
695 int val= strtol(options[o], &tail, 0);
696 if(tail==options[o]) break;
697
698 numOfUnknownOptions--;
c41d972d 699 ppMode->forcedQuant= val;
8aaac435
MN
700 }
701 }
911879d1
MN
702 }
703 }
c41d972d
MN
704 if(!filterNameOk) ppMode->error++;
705 ppMode->error += numOfUnknownOptions;
911879d1
MN
706 }
707
c41d972d
MN
708 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
709 if(ppMode->error)
710 {
711 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
712 free(ppMode);
713 return NULL;
714 }
911879d1
MN
715 return ppMode;
716}
717
c41d972d
MN
718void pp_free_mode(pp_mode_t *mode){
719 if(mode) free(mode);
720}
721
88c0bc7e
MN
722static void reallocAlign(void **p, int alignment, int size){
723 if(*p) free(*p);
724 *p= memalign(alignment, size);
725 memset(*p, 0, size);
726}
727
728static void reallocBuffers(PPContext *c, int width, int height, int stride){
ec487e5d
MN
729 int mbWidth = (width+15)>>4;
730 int mbHeight= (height+15)>>4;
88c0bc7e
MN
731 int i;
732
733 c->stride= stride;
9c9e467d 734
88c0bc7e
MN
735 reallocAlign((void **)&c->tempDst, 8, stride*24);
736 reallocAlign((void **)&c->tempSrc, 8, stride*24);
737 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
738 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
9c9e467d
MN
739 for(i=0; i<256; i++)
740 c->yHistogram[i]= width*height/64*15/256;
741
742 for(i=0; i<3; i++)
211c4920 743 {
9c9e467d 744 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
88c0bc7e
MN
745 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
746 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
211c4920 747 }
45b4f285 748
88c0bc7e
MN
749 reallocAlign((void **)&c->deintTemp, 8, width+16);
750 reallocAlign((void **)&c->nonBQPTable, 8, mbWidth*mbHeight*sizeof(QP_STORE_T));
751 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
752}
753
754pp_context_t *pp_get_context(int width, int height, int cpuCaps){
755 PPContext *c= memalign(32, sizeof(PPContext));
88c0bc7e
MN
756 int stride= (width+15)&(~15); //assumed / will realloc if needed
757
758 memset(c, 0, sizeof(PPContext));
759 c->cpuCaps= cpuCaps;
e9effafd
MN
760 if(cpuCaps&PP_FORMAT){
761 c->hChromaSubSample= cpuCaps&0x3;
762 c->vChromaSubSample= (cpuCaps>>4)&0x3;
763 }else{
764 c->hChromaSubSample= 1;
765 c->vChromaSubSample= 1;
766 }
88c0bc7e
MN
767
768 reallocBuffers(c, width, height, stride);
769
9c9e467d 770 c->frameNum=-1;
45b4f285 771
9c9e467d 772 return c;
45b4f285
MN
773}
774
9cb54f43 775void pp_free_context(void *vc){
9c9e467d
MN
776 PPContext *c = (PPContext*)vc;
777 int i;
778
779 for(i=0; i<3; i++) free(c->tempBlured[i]);
780 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
781
782 free(c->tempBlocks);
783 free(c->yHistogram);
784 free(c->tempDst);
785 free(c->tempSrc);
9c9e467d 786 free(c->deintTemp);
ec487e5d 787 free(c->nonBQPTable);
88c0bc7e
MN
788 free(c->forcedQPTable);
789
790 memset(c, 0, sizeof(PPContext));
791
9c9e467d
MN
792 free(c);
793}
794
9cb54f43 795void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 796 uint8_t * dst[3], int dstStride[3],
ec487e5d 797 int width, int height,
9c9e467d 798 QP_STORE_T *QP_store, int QPStride,
c41d972d 799 pp_mode_t *vm, void *vc, int pict_type)
911879d1 800{
ec487e5d
MN
801 int mbWidth = (width+15)>>4;
802 int mbHeight= (height+15)>>4;
c41d972d 803 PPMode *mode = (PPMode*)vm;
ec487e5d 804 PPContext *c = (PPContext*)vc;
88c0bc7e
MN
805 int minStride= MAX(srcStride[0], dstStride[0]);
806
807 if(c->stride < minStride)
808 reallocBuffers(c, width, height, minStride);
9c9e467d 809
8aaac435 810 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
815cbfe7 811 {
8aaac435 812 int i;
88c0bc7e 813 QP_store= c->forcedQPTable;
9c9e467d 814 QPStride= 0;
8aaac435 815 if(mode->lumMode & FORCE_QUANT)
88c0bc7e 816 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
8aaac435 817 else
88c0bc7e 818 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
815cbfe7 819 }
ec487e5d
MN
820if(0){
821int x,y;
822for(y=0; y<mbHeight; y++){
823 for(x=0; x<mbWidth; x++){
824 printf("%2d ", QP_store[x + y*QPStride]);
825 }
826 printf("\n");
827}
828 printf("\n");
829}
830//printf("pict_type:%d\n", pict_type);
51e19dcc 831
ec487e5d
MN
832 if(pict_type!=3)
833 {
834 int x,y;
835 for(y=0; y<mbHeight; y++){
836 for(x=0; x<mbWidth; x++){
837 int qscale= QP_store[x + y*QPStride];
838 if(qscale&~31)
839 qscale=31;
840 c->nonBQPTable[y*mbWidth + x]= qscale;
841 }
842 }
843 }
815cbfe7 844
df8d4d0e 845 if(verbose>2)
162c9c2e
MN
846 {
847 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
162c9c2e
MN
848 }
849
9c9e467d 850 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
b2a3fcb7 851 width, height, QP_store, QPStride, 0, mode, c);
911879d1 852
e9effafd
MN
853 width = (width )>>c->hChromaSubSample;
854 height = (height)>>c->vChromaSubSample;
911879d1 855
4e1349d4
MN
856 if(mode->chromMode)
857 {
9c9e467d 858 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
b2a3fcb7 859 width, height, QP_store, QPStride, 1, mode, c);
9c9e467d 860 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
b2a3fcb7 861 width, height, QP_store, QPStride, 2, mode, c);
4e1349d4 862 }
9c9e467d 863 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
4e1349d4 864 {
ec487e5d
MN
865 memcpy(dst[1], src[1], srcStride[1]*height);
866 memcpy(dst[2], src[2], srcStride[2]*height);
4e1349d4
MN
867 }
868 else
869 {
870 int y;
ec487e5d 871 for(y=0; y<height; y++)
4e1349d4 872 {
ec487e5d
MN
873 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
874 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
4e1349d4
MN
875 }
876 }
911879d1
MN
877}
878