update
[libav.git] / postproc / postprocess.c
CommitLineData
3057fa66 1/*
8aaac435 2 Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at)
3057fa66
A
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
19/*
3b58b885 20 C MMX MMX2 3DNow
3057fa66
A
21isVertDC Ec Ec
22isVertMinMaxOk Ec Ec
3b58b885 23doVertLowPass E e e
7f16f6e6 24doVertDefFilter Ec Ec e e
3057fa66 25isHorizDC Ec Ec
4e4dcbc5
MN
26isHorizMinMaxOk a E
27doHorizLowPass E e e
7f16f6e6 28doHorizDefFilter Ec Ec e e
2e212618 29deRing E e e*
3b58b885 30Vertical RKAlgo1 E a a
e5c30e06 31Horizontal RKAlgo1 a a
117e45b0
MN
32Vertical X1# a E E
33Horizontal X1# a E E
acced553
MN
34LinIpolDeinterlace e E E*
35CubicIpolDeinterlace a e e*
36LinBlendDeinterlace e E E*
117e45b0 37MedianDeinterlace# Ec Ec
be44a4d7 38TempDeNoiser# E e e
d5a1a995 39
117e45b0
MN
40* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 42E = Exact implementation
acced553 43e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
44a = alternative / approximate impl
45c = checked against the other implementations (-vo md5)
46*/
47
48/*
49TODO:
3057fa66 50reduce the time wasted on the mem transfer
3057fa66 51unroll stuff if instructions depend too much on the prior one
3057fa66 52move YScale thing to the end instead of fixing QP
13e00528 53write a faster and higher quality deblocking filter :)
d5a1a995
MN
54make the mainloop more flexible (variable number of blocks at once
55 (the if/else stuff per block is slowing things down)
9f45d04d 56compare the quality & speed of all filters
9f45d04d 57split this huge file
8405b3fd 58optimize c versions
117e45b0 59try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 60...
13e00528
A
61*/
62
a6be8111 63//Changelog: use the CVS log
3057fa66 64
6c426cff 65#include "../config.h"
3057fa66
A
66#include <inttypes.h>
67#include <stdio.h>
d5a1a995 68#include <stdlib.h>
911879d1 69#include <string.h>
dda87e9f
PL
70#ifdef HAVE_MALLOC_H
71#include <malloc.h>
72#endif
3057fa66 73//#undef HAVE_MMX2
13e00528 74//#define HAVE_3DNOW
3057fa66 75//#undef HAVE_MMX
cc9b0679 76//#undef ARCH_X86
7f16f6e6 77//#define DEBUG_BRIGHTNESS
70d4f2da 78#ifndef PIC
9c9e467d 79#include "../libvo/fastmemcpy.h"
70d4f2da 80#endif
13e00528 81#include "postprocess.h"
c41d972d 82#include "postprocess_internal.h"
9b464428 83#include "../mangle.h"
3057fa66 84
e939e1c3
A
85#define MIN(a,b) ((a) > (b) ? (b) : (a))
86#define MAX(a,b) ((a) < (b) ? (b) : (a))
87#define ABS(a) ((a) > 0 ? (a) : (-(a)))
88#define SIGN(a) ((a) > 0 ? 1 : -1)
89
911879d1
MN
90#define GET_MODE_BUFFER_SIZE 500
91#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
92#define BLOCK_SIZE 8
93#define TEMP_STRIDE 8
94//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 95
cc9b0679 96#ifdef ARCH_X86
b28daef8
MN
97static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL;
98static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL;
b28daef8
MN
99static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL;
100static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL;
101static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL;
b28daef8 102static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL;
b28daef8 103static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL;
b28daef8 104#endif
3057fa66 105
df8d4d0e 106static int verbose= 0;
45b4f285 107
df8d4d0e 108static const int deringThreshold= 20;
3057fa66 109
9c9e467d 110
911879d1
MN
111static struct PPFilter filters[]=
112{
113 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
114 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
9c9e467d
MN
115/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
116 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
911879d1
MN
117 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
118 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
119 {"dr", "dering", 1, 5, 6, DERING},
120 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
43d52f76
MN
121 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
122 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
123 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
124 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
9c9e467d 125 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
117e45b0 126 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
8aaac435 127 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
911879d1
MN
128 {NULL, NULL,0,0,0,0} //End Marker
129};
130
131static char *replaceTable[]=
132{
117e45b0
MN
133 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
134 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
135 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
136 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
911879d1
MN
137 NULL //End Marker
138};
139
9c9e467d 140#ifdef ARCH_X86
e5c30e06
MN
141static inline void unusedVariableWarningFixer()
142{
9c9e467d 143 if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
e5c30e06 144}
b28daef8 145#endif
e5c30e06 146
3057fa66 147
9c9e467d 148#ifdef ARCH_X86
3057fa66
A
149static inline void prefetchnta(void *p)
150{
151 asm volatile( "prefetchnta (%0)\n\t"
152 : : "r" (p)
153 );
154}
155
156static inline void prefetcht0(void *p)
157{
158 asm volatile( "prefetcht0 (%0)\n\t"
159 : : "r" (p)
160 );
161}
162
163static inline void prefetcht1(void *p)
164{
165 asm volatile( "prefetcht1 (%0)\n\t"
166 : : "r" (p)
167 );
168}
169
170static inline void prefetcht2(void *p)
171{
172 asm volatile( "prefetcht2 (%0)\n\t"
173 : : "r" (p)
174 );
175}
9a722af7 176#endif
3057fa66 177
cc9b0679 178// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 179
cf5ec61d
MN
180/**
181 * Check if the given 8x8 Block is mostly "flat"
182 */
9c9e467d 183static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
cf5ec61d
MN
184{
185 int numEq= 0;
186 int y;
ec487e5d
MN
187 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
188 const int dcThreshold= dcOffset*2 + 1;
cf5ec61d
MN
189 for(y=0; y<BLOCK_SIZE; y++)
190 {
9c9e467d
MN
191 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
192 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
193 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
194 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
195 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
196 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
197 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
198 src+= stride;
199 }
200 return numEq > c->ppMode.flatnessThreshold;
201}
202
203/**
204 * Check if the middle 8x8 Block in the given 8x16 block is flat
205 */
206static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
207 int numEq= 0;
208 int y;
ec487e5d
MN
209 const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
210 const int dcThreshold= dcOffset*2 + 1;
9c9e467d
MN
211 src+= stride*4; // src points to begin of the 8x8 Block
212 for(y=0; y<BLOCK_SIZE-1; y++)
213 {
214 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
215 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
216 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
217 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
218 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
219 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
cf5ec61d
MN
222 src+= stride;
223 }
9c9e467d 224 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
225}
226
227static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
228{
229 if(abs(src[0] - src[7]) > 2*QP) return 0;
230
231 return 1;
232}
233
234static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
235{
236 int y;
237 for(y=0; y<BLOCK_SIZE; y++)
238 {
239 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
240
241 if(ABS(middleEnergy) < 8*QP)
242 {
243 const int q=(dst[3] - dst[4])/2;
244 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
245 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
246
247 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
248 d= MAX(d, 0);
249
250 d= (5*d + 32) >> 6;
251 d*= SIGN(-middleEnergy);
252
253 if(q>0)
254 {
255 d= d<0 ? 0 : d;
256 d= d>q ? q : d;
257 }
258 else
259 {
260 d= d>0 ? 0 : d;
261 d= d<q ? q : d;
262 }
263
264 dst[3]-= d;
265 dst[4]+= d;
266 }
267 dst+= stride;
268 }
269}
270
271/**
272 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
273 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
274 */
275static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
276{
277
278 int y;
279 for(y=0; y<BLOCK_SIZE; y++)
280 {
281 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
282 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
283
284 int sums[9];
285 sums[0] = first + dst[0];
286 sums[1] = dst[0] + dst[1];
287 sums[2] = dst[1] + dst[2];
288 sums[3] = dst[2] + dst[3];
289 sums[4] = dst[3] + dst[4];
290 sums[5] = dst[4] + dst[5];
291 sums[6] = dst[5] + dst[6];
292 sums[7] = dst[6] + dst[7];
293 sums[8] = dst[7] + last;
294
295 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
296 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
297 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
298 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
299 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
300 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
301 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
302 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
303
304 dst+= stride;
305 }
306}
307
4e4dcbc5 308/**
cc9b0679
MN
309 * Experimental Filter 1 (Horizontal)
310 * will not damage linear gradients
311 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
312 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
313 * MMX2 version does correct clipping C version doesnt
314 * not identical with the vertical one
4e4dcbc5 315 */
cc9b0679
MN
316static inline void horizX1Filter(uint8_t *src, int stride, int QP)
317{
117e45b0 318 int y;
cc9b0679
MN
319 static uint64_t *lut= NULL;
320 if(lut==NULL)
117e45b0 321 {
cc9b0679
MN
322 int i;
323 lut= (uint64_t*)memalign(8, 256*8);
324 for(i=0; i<256; i++)
117e45b0 325 {
cc9b0679 326 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 327/*
cc9b0679
MN
328//Simulate 112242211 9-Tap filter
329 uint64_t a= (v/16) & 0xFF;
330 uint64_t b= (v/8) & 0xFF;
331 uint64_t c= (v/4) & 0xFF;
332 uint64_t d= (3*v/8) & 0xFF;
117e45b0 333*/
cc9b0679
MN
334//Simulate piecewise linear interpolation
335 uint64_t a= (v/16) & 0xFF;
336 uint64_t b= (v*3/16) & 0xFF;
337 uint64_t c= (v*5/16) & 0xFF;
338 uint64_t d= (7*v/16) & 0xFF;
339 uint64_t A= (0x100 - a)&0xFF;
340 uint64_t B= (0x100 - b)&0xFF;
341 uint64_t C= (0x100 - c)&0xFF;
342 uint64_t D= (0x100 - c)&0xFF;
343
344 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
345 (D<<24) | (C<<16) | (B<<8) | (A);
346 //lut[i] = (v<<32) | (v<<24);
117e45b0
MN
347 }
348 }
cc9b0679
MN
349
350 for(y=0; y<BLOCK_SIZE; y++)
117e45b0 351 {
cc9b0679
MN
352 int a= src[1] - src[2];
353 int b= src[3] - src[4];
354 int c= src[5] - src[6];
355
356 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
357
358 if(d < QP)
117e45b0 359 {
cc9b0679
MN
360 int v = d * SIGN(-b);
361
362 src[1] +=v/8;
363 src[2] +=v/4;
364 src[3] +=3*v/8;
365 src[4] -=3*v/8;
366 src[5] -=v/4;
367 src[6] -=v/8;
368
117e45b0 369 }
cc9b0679 370 src+=stride;
117e45b0 371 }
cc9b0679
MN
372}
373
374
e89952aa 375//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 376//Plain C versions
e89952aa
MN
377#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
378#define COMPILE_C
379#endif
380
9c9e467d 381#ifdef ARCH_X86
e89952aa
MN
382
383#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
384#define COMPILE_MMX
385#endif
386
387#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
388#define COMPILE_MMX2
389#endif
390
391#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
392#define COMPILE_3DNOW
393#endif
9c9e467d 394#endif //ARCH_X86
e89952aa
MN
395
396#undef HAVE_MMX
397#undef HAVE_MMX2
398#undef HAVE_3DNOW
399#undef ARCH_X86
400
401#ifdef COMPILE_C
cc9b0679
MN
402#undef HAVE_MMX
403#undef HAVE_MMX2
404#undef HAVE_3DNOW
405#undef ARCH_X86
406#define RENAME(a) a ## _C
407#include "postprocess_template.c"
e89952aa 408#endif
cc9b0679
MN
409
410//MMX versions
e89952aa 411#ifdef COMPILE_MMX
cc9b0679
MN
412#undef RENAME
413#define HAVE_MMX
414#undef HAVE_MMX2
415#undef HAVE_3DNOW
416#define ARCH_X86
417#define RENAME(a) a ## _MMX
418#include "postprocess_template.c"
e89952aa 419#endif
cc9b0679
MN
420
421//MMX2 versions
e89952aa 422#ifdef COMPILE_MMX2
cc9b0679
MN
423#undef RENAME
424#define HAVE_MMX
425#define HAVE_MMX2
426#undef HAVE_3DNOW
427#define ARCH_X86
428#define RENAME(a) a ## _MMX2
429#include "postprocess_template.c"
e89952aa 430#endif
cc9b0679
MN
431
432//3DNOW versions
e89952aa 433#ifdef COMPILE_3DNOW
cc9b0679
MN
434#undef RENAME
435#define HAVE_MMX
436#undef HAVE_MMX2
437#define HAVE_3DNOW
438#define ARCH_X86
439#define RENAME(a) a ## _3DNow
440#include "postprocess_template.c"
e89952aa 441#endif
cc9b0679
MN
442
443// minor note: the HAVE_xyz is messed up after that line so dont use it
444
445static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
c41d972d 446 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 447{
9c9e467d 448 PPContext *c= (PPContext *)vc;
c41d972d 449 PPMode *ppMode= (PPMode *)vm;
9c9e467d
MN
450 c->ppMode= *ppMode; //FIXME
451
cc9b0679
MN
452 // useing ifs here as they are faster than function pointers allthough the
453 // difference wouldnt be messureable here but its much better because
454 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 455#ifdef RUNTIME_CPUDETECT
9c9e467d 456#ifdef ARCH_X86
cc9b0679 457 // ordered per speed fasterst first
fa6ea14e 458 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
9c9e467d 459 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 460 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
9c9e467d 461 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 462 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
9c9e467d 463 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 464 else
9c9e467d 465 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 466#else
9c9e467d 467 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 468#endif
e89952aa
MN
469#else //RUNTIME_CPUDETECT
470#ifdef HAVE_MMX2
9c9e467d 471 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 472#elif defined (HAVE_3DNOW)
9c9e467d 473 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 474#elif defined (HAVE_MMX)
9c9e467d 475 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 476#else
9c9e467d 477 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
478#endif
479#endif //!RUNTIME_CPUDETECT
117e45b0
MN
480}
481
cc9b0679
MN
482//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
483// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 484
911879d1 485/* -pp Command line Help
911879d1 486*/
4407a3c4 487char *pp_help=
b01be121 488"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
4b001a13 489"long form example:\n"
b01be121 490"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
4b001a13 491"short form example:\n"
b01be121 492"vb:a/hb:a/lb de,-vb\n"
4b001a13 493"more examples:\n"
1d9324fd 494"tn:64:128:256\n"
4b001a13
MN
495"Filters Options\n"
496"short long name short long option Description\n"
497"* * a autoq cpu power dependant enabler\n"
498" c chrom chrominance filtring enabled\n"
499" y nochrom chrominance filtring disabled\n"
500"hb hdeblock (2 Threshold) horizontal deblocking filter\n"
b01be121
MN
501" 1. difference factor: default=64, higher -> more deblocking\n"
502" 2. flatness threshold: default=40, lower -> more deblocking\n"
4b001a13
MN
503" the h & v deblocking filters share these\n"
504" so u cant set different thresholds for h / v\n"
505"vb vdeblock (2 Threshold) vertical deblocking filter\n"
4b001a13
MN
506"h1 x1hdeblock Experimental h deblock filter 1\n"
507"v1 x1vdeblock Experimental v deblock filter 1\n"
508"dr dering Deringing filter\n"
509"al autolevels automatic brightness / contrast\n"
510" f fullyrange stretch luminance to (0..255)\n"
511"lb linblenddeint linear blend deinterlacer\n"
512"li linipoldeint linear interpolating deinterlace\n"
513"ci cubicipoldeint cubic interpolating deinterlacer\n"
514"md mediandeint median deinterlacer\n"
9c9e467d 515"fd ffmpegdeint ffmpeg deinterlacer\n"
4b001a13
MN
516"de default hb:a,vb:a,dr:a,al\n"
517"fa fast h1:a,v1:a,dr:a,al\n"
518"tn tmpnoise (3 Thresholds) Temporal Noise Reducer\n"
519" 1. <= 2. <= 3. larger -> stronger filtering\n"
520"fq forceQuant <quantizer> Force quantizer\n"
521;
911879d1 522
c41d972d 523pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1
MN
524{
525 char temp[GET_MODE_BUFFER_SIZE];
526 char *p= temp;
9c9e467d 527 char *filterDelimiters= ",/";
911879d1 528 char *optionDelimiters= ":";
c41d972d 529 struct PPMode *ppMode;
911879d1
MN
530 char *filterToken;
531
c41d972d
MN
532 ppMode= memalign(8, sizeof(PPMode));
533
534 ppMode->lumMode= 0;
535 ppMode->chromMode= 0;
536 ppMode->maxTmpNoise[0]= 700;
537 ppMode->maxTmpNoise[1]= 1500;
538 ppMode->maxTmpNoise[2]= 3000;
539 ppMode->maxAllowedY= 234;
540 ppMode->minAllowedY= 16;
541 ppMode->baseDcDiff= 256/4;
c41d972d
MN
542 ppMode->flatnessThreshold= 56-16;
543 ppMode->maxClippedThreshold= 0.01;
544 ppMode->error=0;
df8d4d0e 545
911879d1
MN
546 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
547
162c9c2e 548 if(verbose>1) printf("pp: %s\n", name);
117e45b0 549
911879d1 550 for(;;){
911879d1 551 char *filterName;
326d40af 552 int q= 1000000; //PP_QUALITY_MAX;
911879d1
MN
553 int chrom=-1;
554 char *option;
555 char *options[OPTIONS_ARRAY_SIZE];
556 int i;
557 int filterNameOk=0;
558 int numOfUnknownOptions=0;
559 int enable=1; //does the user want us to enabled or disabled the filter
560
561 filterToken= strtok(p, filterDelimiters);
562 if(filterToken == NULL) break;
117e45b0 563 p+= strlen(filterToken) + 1; // p points to next filterToken
911879d1 564 filterName= strtok(filterToken, optionDelimiters);
162c9c2e 565 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
911879d1
MN
566
567 if(*filterName == '-')
568 {
569 enable=0;
570 filterName++;
571 }
117e45b0 572
911879d1
MN
573 for(;;){ //for all options
574 option= strtok(NULL, optionDelimiters);
575 if(option == NULL) break;
576
162c9c2e 577 if(verbose>1) printf("pp: option: %s\n", option);
911879d1
MN
578 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
579 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
580 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
581 else
582 {
583 options[numOfUnknownOptions] = option;
584 numOfUnknownOptions++;
911879d1
MN
585 }
586 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
587 }
117e45b0 588 options[numOfUnknownOptions] = NULL;
911879d1
MN
589
590 /* replace stuff from the replace Table */
591 for(i=0; replaceTable[2*i]!=NULL; i++)
592 {
593 if(!strcmp(replaceTable[2*i], filterName))
594 {
595 int newlen= strlen(replaceTable[2*i + 1]);
596 int plen;
597 int spaceLeft;
598
599 if(p==NULL) p= temp, *p=0; //last filter
600 else p--, *p=','; //not last filter
601
602 plen= strlen(p);
8cd91a44 603 spaceLeft= p - temp + plen;
911879d1
MN
604 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
605 {
c41d972d 606 ppMode->error++;
911879d1
MN
607 break;
608 }
609 memmove(p + newlen, p, plen+1);
610 memcpy(p, replaceTable[2*i + 1], newlen);
611 filterNameOk=1;
612 }
613 }
614
615 for(i=0; filters[i].shortName!=NULL; i++)
616 {
117e45b0 617// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
911879d1
MN
618 if( !strcmp(filters[i].longName, filterName)
619 || !strcmp(filters[i].shortName, filterName))
620 {
c41d972d
MN
621 ppMode->lumMode &= ~filters[i].mask;
622 ppMode->chromMode &= ~filters[i].mask;
911879d1
MN
623
624 filterNameOk=1;
625 if(!enable) break; // user wants to disable it
626
627 if(q >= filters[i].minLumQuality)
c41d972d 628 ppMode->lumMode|= filters[i].mask;
911879d1
MN
629 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
630 if(q >= filters[i].minChromQuality)
c41d972d 631 ppMode->chromMode|= filters[i].mask;
911879d1
MN
632
633 if(filters[i].mask == LEVEL_FIX)
634 {
635 int o;
c41d972d
MN
636 ppMode->minAllowedY= 16;
637 ppMode->maxAllowedY= 234;
911879d1 638 for(o=0; options[o]!=NULL; o++)
07f8991b 639 {
911879d1
MN
640 if( !strcmp(options[o],"fullyrange")
641 ||!strcmp(options[o],"f"))
642 {
c41d972d
MN
643 ppMode->minAllowedY= 0;
644 ppMode->maxAllowedY= 255;
911879d1
MN
645 numOfUnknownOptions--;
646 }
07f8991b 647 }
911879d1 648 }
117e45b0
MN
649 else if(filters[i].mask == TEMP_NOISE_FILTER)
650 {
651 int o;
652 int numOfNoises=0;
117e45b0
MN
653
654 for(o=0; options[o]!=NULL; o++)
655 {
656 char *tail;
c41d972d 657 ppMode->maxTmpNoise[numOfNoises]=
117e45b0
MN
658 strtol(options[o], &tail, 0);
659 if(tail!=options[o])
660 {
661 numOfNoises++;
662 numOfUnknownOptions--;
663 if(numOfNoises >= 3) break;
664 }
665 }
666 }
43d52f76
MN
667 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
668 {
669 int o;
670
671 for(o=0; options[o]!=NULL && o<2; o++)
672 {
673 char *tail;
674 int val= strtol(options[o], &tail, 0);
675 if(tail==options[o]) break;
676
677 numOfUnknownOptions--;
c41d972d
MN
678 if(o==0) ppMode->baseDcDiff= val;
679 else ppMode->flatnessThreshold= val;
43d52f76
MN
680 }
681 }
8aaac435
MN
682 else if(filters[i].mask == FORCE_QUANT)
683 {
684 int o;
c41d972d 685 ppMode->forcedQuant= 15;
8aaac435
MN
686
687 for(o=0; options[o]!=NULL && o<1; o++)
688 {
689 char *tail;
690 int val= strtol(options[o], &tail, 0);
691 if(tail==options[o]) break;
692
693 numOfUnknownOptions--;
c41d972d 694 ppMode->forcedQuant= val;
8aaac435
MN
695 }
696 }
911879d1
MN
697 }
698 }
c41d972d
MN
699 if(!filterNameOk) ppMode->error++;
700 ppMode->error += numOfUnknownOptions;
911879d1
MN
701 }
702
c41d972d
MN
703 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
704 if(ppMode->error)
705 {
706 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
707 free(ppMode);
708 return NULL;
709 }
911879d1
MN
710 return ppMode;
711}
712
c41d972d
MN
713void pp_free_mode(pp_mode_t *mode){
714 if(mode) free(mode);
715}
716
717pp_context_t *pp_get_context(int width, int height, int cpuCaps){
9c9e467d
MN
718 PPContext *c= memalign(32, sizeof(PPContext));
719 int i;
ec487e5d
MN
720 int mbWidth = (width+15)>>4;
721 int mbHeight= (height+15)>>4;
fa6ea14e
MN
722
723 c->cpuCaps= cpuCaps;
9c9e467d
MN
724
725 c->tempBlocks= (uint8_t*)memalign(8, 2*16*8);
726 c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t));
727 for(i=0; i<256; i++)
728 c->yHistogram[i]= width*height/64*15/256;
729
730 for(i=0; i<3; i++)
211c4920 731 {
9c9e467d
MN
732 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
733 c->tempBlured[i]= (uint8_t*)memalign(8, ((width+7)&(~7))*2*((height+7)&(~7)) + 17*1024); //FIXME dstStride instead of width
734 c->tempBluredPast[i]= (uint32_t*)memalign(8, 256*((height+7)&(~7))/2 + 17*1024);
735
736 memset(c->tempBlured[i], 0, ((width+7)&(~7))*2*((height+7)&(~7)) + 17*1024);
737 memset(c->tempBluredPast[i], 0, 256*((height+7)&(~7))/2 + 17*1024);
211c4920
MN
738 }
739
9c9e467d
MN
740 c->tempDst= (uint8_t*)memalign(8, 1024*24);
741 c->tempSrc= (uint8_t*)memalign(8, 1024*24);
742 c->tempDstBlock= (uint8_t*)memalign(8, 1024*24);
743 c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
744 c->deintTemp= (uint8_t*)memalign(8, width+16);
ec487e5d
MN
745 c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T));
746 memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T));
45b4f285 747
9c9e467d 748 c->frameNum=-1;
45b4f285 749
9c9e467d 750 return c;
45b4f285
MN
751}
752
9cb54f43 753void pp_free_context(void *vc){
9c9e467d
MN
754 PPContext *c = (PPContext*)vc;
755 int i;
756
757 for(i=0; i<3; i++) free(c->tempBlured[i]);
758 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
759
760 free(c->tempBlocks);
761 free(c->yHistogram);
762 free(c->tempDst);
763 free(c->tempSrc);
764 free(c->tempDstBlock);
765 free(c->tempSrcBlock);
766 free(c->deintTemp);
ec487e5d 767 free(c->nonBQPTable);
9c9e467d
MN
768
769 free(c);
770}
771
9cb54f43 772void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 773 uint8_t * dst[3], int dstStride[3],
ec487e5d 774 int width, int height,
9c9e467d 775 QP_STORE_T *QP_store, int QPStride,
c41d972d 776 pp_mode_t *vm, void *vc, int pict_type)
911879d1 777{
ec487e5d
MN
778 int mbWidth = (width+15)>>4;
779 int mbHeight= (height+15)>>4;
8aaac435 780 QP_STORE_T quantArray[2048/8];
c41d972d 781 PPMode *mode = (PPMode*)vm;
ec487e5d 782 PPContext *c = (PPContext*)vc;
9c9e467d 783
8aaac435 784 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
815cbfe7 785 {
8aaac435
MN
786 int i;
787 QP_store= quantArray;
9c9e467d 788 QPStride= 0;
8aaac435
MN
789 if(mode->lumMode & FORCE_QUANT)
790 for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant;
791 else
792 for(i=0; i<2048/8; i++) quantArray[i]= 1;
815cbfe7 793 }
ec487e5d
MN
794if(0){
795int x,y;
796for(y=0; y<mbHeight; y++){
797 for(x=0; x<mbWidth; x++){
798 printf("%2d ", QP_store[x + y*QPStride]);
799 }
800 printf("\n");
801}
802 printf("\n");
803}
804//printf("pict_type:%d\n", pict_type);
51e19dcc 805
ec487e5d
MN
806 if(pict_type!=3)
807 {
808 int x,y;
809 for(y=0; y<mbHeight; y++){
810 for(x=0; x<mbWidth; x++){
811 int qscale= QP_store[x + y*QPStride];
812 if(qscale&~31)
813 qscale=31;
814 c->nonBQPTable[y*mbWidth + x]= qscale;
815 }
816 }
817 }
815cbfe7 818
df8d4d0e 819 if(verbose>2)
162c9c2e
MN
820 {
821 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
162c9c2e
MN
822 }
823
9c9e467d 824 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
ec487e5d 825 width, height, QP_store, QPStride, 0, mode, c);
911879d1 826
ec487e5d
MN
827 width = (width +1)>>1;
828 height = (height+1)>>1;
911879d1 829
4e1349d4
MN
830 if(mode->chromMode)
831 {
9c9e467d 832 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
ec487e5d 833 width, height, QP_store, QPStride, 1, mode, c);
9c9e467d 834 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
ec487e5d 835 width, height, QP_store, QPStride, 2, mode, c);
4e1349d4 836 }
9c9e467d 837 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
4e1349d4 838 {
ec487e5d
MN
839 memcpy(dst[1], src[1], srcStride[1]*height);
840 memcpy(dst[2], src[2], srcStride[2]*height);
4e1349d4
MN
841 }
842 else
843 {
844 int y;
ec487e5d 845 for(y=0; y<height; y++)
4e1349d4 846 {
ec487e5d
MN
847 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
848 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
4e1349d4
MN
849 }
850 }
911879d1
MN
851}
852