runtime cpu detection
[libav.git] / postproc / postprocess.c
CommitLineData
3057fa66
A
1/*
2 Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
19/*
3b58b885 20 C MMX MMX2 3DNow
3057fa66
A
21isVertDC Ec Ec
22isVertMinMaxOk Ec Ec
3b58b885 23doVertLowPass E e e
7f16f6e6 24doVertDefFilter Ec Ec e e
3057fa66 25isHorizDC Ec Ec
4e4dcbc5
MN
26isHorizMinMaxOk a E
27doHorizLowPass E e e
7f16f6e6 28doHorizDefFilter Ec Ec e e
2e212618 29deRing E e e*
3b58b885 30Vertical RKAlgo1 E a a
e5c30e06 31Horizontal RKAlgo1 a a
117e45b0
MN
32Vertical X1# a E E
33Horizontal X1# a E E
acced553
MN
34LinIpolDeinterlace e E E*
35CubicIpolDeinterlace a e e*
36LinBlendDeinterlace e E E*
117e45b0 37MedianDeinterlace# Ec Ec
be44a4d7 38TempDeNoiser# E e e
d5a1a995 39
117e45b0
MN
40* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 42E = Exact implementation
acced553 43e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
44a = alternative / approximate impl
45c = checked against the other implementations (-vo md5)
46*/
47
48/*
49TODO:
3057fa66 50reduce the time wasted on the mem transfer
13e00528 51implement everything in C at least (done at the moment but ...)
3057fa66
A
52unroll stuff if instructions depend too much on the prior one
53we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4?
54move YScale thing to the end instead of fixing QP
13e00528 55write a faster and higher quality deblocking filter :)
d5a1a995
MN
56make the mainloop more flexible (variable number of blocks at once
57 (the if/else stuff per block is slowing things down)
9f45d04d 58compare the quality & speed of all filters
9f45d04d 59split this huge file
e5c30e06 60border remover
8405b3fd 61optimize c versions
117e45b0 62try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
be44a4d7 63smart blur
cd38e322 64commandline option for the deblock / dering thresholds
cc9b0679
MN
65put fastmemcpy back
66dont use #ifdef ARCH_X86 for the asm stuff ... cross compilers? (note cpudetect uses ARCH_X86)
3057fa66 67...
13e00528
A
68*/
69
a6be8111 70//Changelog: use the CVS log
3057fa66 71
6c426cff 72#include "../config.h"
3057fa66
A
73#include <inttypes.h>
74#include <stdio.h>
d5a1a995 75#include <stdlib.h>
911879d1 76#include <string.h>
dda87e9f
PL
77#ifdef HAVE_MALLOC_H
78#include <malloc.h>
79#endif
3057fa66 80//#undef HAVE_MMX2
13e00528 81//#define HAVE_3DNOW
3057fa66 82//#undef HAVE_MMX
cc9b0679 83//#undef ARCH_X86
7f16f6e6 84//#define DEBUG_BRIGHTNESS
cc9b0679 85//#include "../libvo/fastmemcpy.h"
13e00528 86#include "postprocess.h"
cc9b0679 87#include "../cpudetect.h"
3057fa66 88
e939e1c3
A
89#define MIN(a,b) ((a) > (b) ? (b) : (a))
90#define MAX(a,b) ((a) < (b) ? (b) : (a))
91#define ABS(a) ((a) > 0 ? (a) : (-(a)))
92#define SIGN(a) ((a) > 0 ? 1 : -1)
93
911879d1
MN
94#define GET_MODE_BUFFER_SIZE 500
95#define OPTIONS_ARRAY_SIZE 10
96
cc9b0679
MN
97#ifdef ARCH_X86
98#define CAN_COMPILE_X86_ASM
99#endif
100
101#ifdef CAN_COMPILE_X86_ASM
3fe8e8f0
MN
102static volatile uint64_t __attribute__((aligned(8))) packedYOffset= 0x0000000000000000LL;
103static volatile uint64_t __attribute__((aligned(8))) packedYScale= 0x0100010001000100LL;
b28daef8
MN
104static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL;
105static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL;
106static uint64_t __attribute__((aligned(8))) w1400= 0x1400140014001400LL;
107static uint64_t __attribute__((aligned(8))) bm00000001= 0x00000000000000FFLL;
108static uint64_t __attribute__((aligned(8))) bm00010000= 0x000000FF00000000LL;
109static uint64_t __attribute__((aligned(8))) bm00001000= 0x00000000FF000000LL;
110static uint64_t __attribute__((aligned(8))) bm10000000= 0xFF00000000000000LL;
111static uint64_t __attribute__((aligned(8))) bm10000001= 0xFF000000000000FFLL;
112static uint64_t __attribute__((aligned(8))) bm11000011= 0xFFFF00000000FFFFLL;
113static uint64_t __attribute__((aligned(8))) bm00000011= 0x000000000000FFFFLL;
114static uint64_t __attribute__((aligned(8))) bm11111110= 0xFFFFFFFFFFFFFF00LL;
115static uint64_t __attribute__((aligned(8))) bm11000000= 0xFFFF000000000000LL;
116static uint64_t __attribute__((aligned(8))) bm00011000= 0x000000FFFF000000LL;
117static uint64_t __attribute__((aligned(8))) bm00110011= 0x0000FFFF0000FFFFLL;
118static uint64_t __attribute__((aligned(8))) bm11001100= 0xFFFF0000FFFF0000LL;
119static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL;
120static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL;
121static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL;
122static uint64_t __attribute__((aligned(8))) b0F= 0x0F0F0F0F0F0F0F0FLL;
123static uint64_t __attribute__((aligned(8))) b04= 0x0404040404040404LL;
124static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL;
125static uint64_t __attribute__((aligned(8))) bFF= 0xFFFFFFFFFFFFFFFFLL;
126static uint64_t __attribute__((aligned(8))) b20= 0x2020202020202020LL;
127static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL;
128static uint64_t __attribute__((aligned(8))) b7E= 0x7E7E7E7E7E7E7E7ELL;
129static uint64_t __attribute__((aligned(8))) b7C= 0x7C7C7C7C7C7C7C7CLL;
130static uint64_t __attribute__((aligned(8))) b3F= 0x3F3F3F3F3F3F3F3FLL;
131static uint64_t __attribute__((aligned(8))) temp0=0;
132static uint64_t __attribute__((aligned(8))) temp1=0;
133static uint64_t __attribute__((aligned(8))) temp2=0;
134static uint64_t __attribute__((aligned(8))) temp3=0;
135static uint64_t __attribute__((aligned(8))) temp4=0;
136static uint64_t __attribute__((aligned(8))) temp5=0;
137static uint64_t __attribute__((aligned(8))) pQPb=0;
138static uint64_t __attribute__((aligned(8))) pQPb2=0;
139static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code
a9c77978 140static uint32_t __attribute__((aligned(4))) maxTmpNoise[4];
b28daef8 141#else
3057fa66
A
142static uint64_t packedYOffset= 0x0000000000000000LL;
143static uint64_t packedYScale= 0x0100010001000100LL;
b28daef8 144#endif
3057fa66
A
145
146int hFlatnessThreshold= 56 - 16;
147int vFlatnessThreshold= 56 - 16;
cd38e322 148int deringThreshold= 20;
3057fa66
A
149
150//amount of "black" u r willing to loose to get a brightness corrected picture
151double maxClippedThreshold= 0.01;
152
911879d1 153int maxAllowedY=234;
658a85f2 154int minAllowedY=16;
3057fa66 155
911879d1
MN
156static struct PPFilter filters[]=
157{
158 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
159 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
160 {"vr", "rkvdeblock", 1, 2, 4, H_RK1_FILTER},
161 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
162 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
163 {"dr", "dering", 1, 5, 6, DERING},
164 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
165 {"lb", "linblenddeint", 0, 1, 6, LINEAR_BLEND_DEINT_FILTER},
166 {"li", "linipoldeint", 0, 1, 6, LINEAR_IPOL_DEINT_FILTER},
167 {"ci", "cubicipoldeint", 0, 1, 6, CUBIC_IPOL_DEINT_FILTER},
168 {"md", "mediandeint", 0, 1, 6, MEDIAN_DEINT_FILTER},
117e45b0 169 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
911879d1
MN
170 {NULL, NULL,0,0,0,0} //End Marker
171};
172
173static char *replaceTable[]=
174{
117e45b0
MN
175 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
176 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
177 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
178 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
911879d1
MN
179 NULL //End Marker
180};
181
cc9b0679 182#ifdef CAN_COMPILE_X86_ASM
e5c30e06
MN
183static inline void unusedVariableWarningFixer()
184{
185if(
186 packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000
187 + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110
188 + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F
b28daef8 189 + bFF + b20 + b04+ b08 + pQPb2 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4
e5c30e06
MN
190 + temp5 + pQPb== 0) b00=0;
191}
b28daef8 192#endif
e5c30e06 193
a6be8111 194#ifdef TIMING
3057fa66
A
195static inline long long rdtsc()
196{
197 long long l;
198 asm volatile( "rdtsc\n\t"
199 : "=A" (l)
200 );
201// printf("%d\n", int(l/1000));
202 return l;
203}
9a722af7 204#endif
3057fa66 205
cc9b0679 206#ifdef CAN_COMPILE_X86_ASM
3057fa66
A
207static inline void prefetchnta(void *p)
208{
209 asm volatile( "prefetchnta (%0)\n\t"
210 : : "r" (p)
211 );
212}
213
214static inline void prefetcht0(void *p)
215{
216 asm volatile( "prefetcht0 (%0)\n\t"
217 : : "r" (p)
218 );
219}
220
221static inline void prefetcht1(void *p)
222{
223 asm volatile( "prefetcht1 (%0)\n\t"
224 : : "r" (p)
225 );
226}
227
228static inline void prefetcht2(void *p)
229{
230 asm volatile( "prefetcht2 (%0)\n\t"
231 : : "r" (p)
232 );
233}
9a722af7 234#endif
3057fa66 235
cc9b0679 236// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 237
cf5ec61d
MN
238/**
239 * Check if the given 8x8 Block is mostly "flat"
240 */
241static inline int isHorizDC(uint8_t src[], int stride)
242{
243 int numEq= 0;
244 int y;
245 for(y=0; y<BLOCK_SIZE; y++)
246 {
247 if(((src[0] - src[1] + 1) & 0xFFFF) < 3) numEq++;
248 if(((src[1] - src[2] + 1) & 0xFFFF) < 3) numEq++;
249 if(((src[2] - src[3] + 1) & 0xFFFF) < 3) numEq++;
250 if(((src[3] - src[4] + 1) & 0xFFFF) < 3) numEq++;
251 if(((src[4] - src[5] + 1) & 0xFFFF) < 3) numEq++;
252 if(((src[5] - src[6] + 1) & 0xFFFF) < 3) numEq++;
253 if(((src[6] - src[7] + 1) & 0xFFFF) < 3) numEq++;
254 src+= stride;
255 }
256 return numEq > hFlatnessThreshold;
257}
258
259static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
260{
261 if(abs(src[0] - src[7]) > 2*QP) return 0;
262
263 return 1;
264}
265
266static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
267{
268 int y;
269 for(y=0; y<BLOCK_SIZE; y++)
270 {
271 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
272
273 if(ABS(middleEnergy) < 8*QP)
274 {
275 const int q=(dst[3] - dst[4])/2;
276 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
277 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
278
279 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
280 d= MAX(d, 0);
281
282 d= (5*d + 32) >> 6;
283 d*= SIGN(-middleEnergy);
284
285 if(q>0)
286 {
287 d= d<0 ? 0 : d;
288 d= d>q ? q : d;
289 }
290 else
291 {
292 d= d>0 ? 0 : d;
293 d= d<q ? q : d;
294 }
295
296 dst[3]-= d;
297 dst[4]+= d;
298 }
299 dst+= stride;
300 }
301}
302
303/**
304 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
305 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
306 */
307static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
308{
309
310 int y;
311 for(y=0; y<BLOCK_SIZE; y++)
312 {
313 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
314 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
315
316 int sums[9];
317 sums[0] = first + dst[0];
318 sums[1] = dst[0] + dst[1];
319 sums[2] = dst[1] + dst[2];
320 sums[3] = dst[2] + dst[3];
321 sums[4] = dst[3] + dst[4];
322 sums[5] = dst[4] + dst[5];
323 sums[6] = dst[5] + dst[6];
324 sums[7] = dst[6] + dst[7];
325 sums[8] = dst[7] + last;
326
327 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
328 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
329 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
330 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
331 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
332 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
333 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
334 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
335
336 dst+= stride;
337 }
338}
339
4e4dcbc5 340/**
cc9b0679
MN
341 * Experimental Filter 1 (Horizontal)
342 * will not damage linear gradients
343 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
344 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
345 * MMX2 version does correct clipping C version doesnt
346 * not identical with the vertical one
4e4dcbc5 347 */
cc9b0679
MN
348static inline void horizX1Filter(uint8_t *src, int stride, int QP)
349{
117e45b0 350 int y;
cc9b0679
MN
351 static uint64_t *lut= NULL;
352 if(lut==NULL)
117e45b0 353 {
cc9b0679
MN
354 int i;
355 lut= (uint64_t*)memalign(8, 256*8);
356 for(i=0; i<256; i++)
117e45b0 357 {
cc9b0679 358 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 359/*
cc9b0679
MN
360//Simulate 112242211 9-Tap filter
361 uint64_t a= (v/16) & 0xFF;
362 uint64_t b= (v/8) & 0xFF;
363 uint64_t c= (v/4) & 0xFF;
364 uint64_t d= (3*v/8) & 0xFF;
117e45b0 365*/
cc9b0679
MN
366//Simulate piecewise linear interpolation
367 uint64_t a= (v/16) & 0xFF;
368 uint64_t b= (v*3/16) & 0xFF;
369 uint64_t c= (v*5/16) & 0xFF;
370 uint64_t d= (7*v/16) & 0xFF;
371 uint64_t A= (0x100 - a)&0xFF;
372 uint64_t B= (0x100 - b)&0xFF;
373 uint64_t C= (0x100 - c)&0xFF;
374 uint64_t D= (0x100 - c)&0xFF;
375
376 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
377 (D<<24) | (C<<16) | (B<<8) | (A);
378 //lut[i] = (v<<32) | (v<<24);
117e45b0
MN
379 }
380 }
cc9b0679
MN
381
382 for(y=0; y<BLOCK_SIZE; y++)
117e45b0 383 {
cc9b0679
MN
384 int a= src[1] - src[2];
385 int b= src[3] - src[4];
386 int c= src[5] - src[6];
387
388 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
389
390 if(d < QP)
117e45b0 391 {
cc9b0679
MN
392 int v = d * SIGN(-b);
393
394 src[1] +=v/8;
395 src[2] +=v/4;
396 src[3] +=3*v/8;
397 src[4] -=3*v/8;
398 src[5] -=v/4;
399 src[6] -=v/8;
400
117e45b0 401 }
cc9b0679 402 src+=stride;
117e45b0 403 }
cc9b0679
MN
404}
405
406
407//Note: we have C, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
408//Plain C versions
409#undef HAVE_MMX
410#undef HAVE_MMX2
411#undef HAVE_3DNOW
412#undef ARCH_X86
413#define RENAME(a) a ## _C
414#include "postprocess_template.c"
415
416#ifdef CAN_COMPILE_X86_ASM
417
418//MMX versions
419#undef RENAME
420#define HAVE_MMX
421#undef HAVE_MMX2
422#undef HAVE_3DNOW
423#define ARCH_X86
424#define RENAME(a) a ## _MMX
425#include "postprocess_template.c"
426
427//MMX2 versions
428#undef RENAME
429#define HAVE_MMX
430#define HAVE_MMX2
431#undef HAVE_3DNOW
432#define ARCH_X86
433#define RENAME(a) a ## _MMX2
434#include "postprocess_template.c"
435
436//3DNOW versions
437#undef RENAME
438#define HAVE_MMX
439#undef HAVE_MMX2
440#define HAVE_3DNOW
441#define ARCH_X86
442#define RENAME(a) a ## _3DNow
443#include "postprocess_template.c"
444
445#endif //CAN_COMPILE_X86_ASM
446
447// minor note: the HAVE_xyz is messed up after that line so dont use it
448
449static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
450 QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode)
451{
452 // useing ifs here as they are faster than function pointers allthough the
453 // difference wouldnt be messureable here but its much better because
454 // someone might exchange the cpu whithout restarting mplayer ;)
455
456#ifdef CAN_COMPILE_X86_ASM
457 // ordered per speed fasterst first
458 if(gCpuCaps.hasMMX2)
459 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
460 else if(gCpuCaps.has3DNow)
461 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
462 else if(gCpuCaps.hasMMX)
463 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
464 else
465 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
466#else
467 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
be44a4d7 468#endif
117e45b0
MN
469}
470
9a722af7
A
471#ifdef HAVE_ODIVX_POSTPROCESS
472#include "../opendivx/postprocess.h"
473int use_old_pp=0;
474#endif
13e00528 475
cc9b0679
MN
476//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
477// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 478
911879d1
MN
479/* -pp Command line Help
480NOTE/FIXME: put this at an appropriate place (--help, html docs, man mplayer)?
481
482-pp <filterName>[:<option>[:<option>...]][,[-]<filterName>[:<option>...]]...
483
484long form example:
485-pp vdeblock:autoq,hdeblock:autoq,linblenddeint -pp default,-vdeblock
486short form example:
487-pp vb:a,hb:a,lb -pp de,-vb
117e45b0
MN
488more examples:
489-pp tn:64:128:256
911879d1
MN
490
491Filters Options
492short long name short long option Description
493* * a autoq cpu power dependant enabler
494 c chrom chrominance filtring enabled
495 y nochrom chrominance filtring disabled
496hb hdeblock horizontal deblocking filter
497vb vdeblock vertical deblocking filter
498vr rkvdeblock
499h1 x1hdeblock Experimental horizontal deblock filter 1
500v1 x1vdeblock Experimental vertical deblock filter 1
501dr dering not implemented yet
502al autolevels automatic brightness / contrast fixer
503 f fullyrange stretch luminance range to (0..255)
504lb linblenddeint linear blend deinterlacer
505li linipoldeint linear interpolating deinterlacer
506ci cubicipoldeint cubic interpolating deinterlacer
507md mediandeint median deinterlacer
508de default hdeblock:a,vdeblock:a,dering:a,autolevels
509fa fast x1hdeblock:a,x1vdeblock:a,dering:a,autolevels
117e45b0 510tn tmpnoise (3 Thresholds) Temporal Noise Reducer
911879d1
MN
511*/
512
513/**
514 * returns a PPMode struct which will have a non 0 error variable if an error occured
515 * name is the string after "-pp" on the command line
516 * quality is a number from 0 to GET_PP_QUALITY_MAX
517 */
518struct PPMode getPPModeByNameAndQuality(char *name, int quality)
519{
520 char temp[GET_MODE_BUFFER_SIZE];
521 char *p= temp;
522 char *filterDelimiters= ",";
523 char *optionDelimiters= ":";
117e45b0 524 struct PPMode ppMode= {0,0,0,0,0,0,{150,200,400}};
911879d1
MN
525 char *filterToken;
526
527 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
528
117e45b0
MN
529 printf("%s\n", name);
530
911879d1 531 for(;;){
911879d1 532 char *filterName;
117e45b0 533 int q= 1000000; //GET_PP_QUALITY_MAX;
911879d1
MN
534 int chrom=-1;
535 char *option;
536 char *options[OPTIONS_ARRAY_SIZE];
537 int i;
538 int filterNameOk=0;
539 int numOfUnknownOptions=0;
540 int enable=1; //does the user want us to enabled or disabled the filter
541
542 filterToken= strtok(p, filterDelimiters);
543 if(filterToken == NULL) break;
117e45b0 544 p+= strlen(filterToken) + 1; // p points to next filterToken
911879d1
MN
545 filterName= strtok(filterToken, optionDelimiters);
546 printf("%s::%s\n", filterToken, filterName);
547
548 if(*filterName == '-')
549 {
550 enable=0;
551 filterName++;
552 }
117e45b0 553
911879d1
MN
554 for(;;){ //for all options
555 option= strtok(NULL, optionDelimiters);
556 if(option == NULL) break;
557
558 printf("%s\n", option);
559 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
560 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
561 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
562 else
563 {
564 options[numOfUnknownOptions] = option;
565 numOfUnknownOptions++;
911879d1
MN
566 }
567 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
568 }
117e45b0 569 options[numOfUnknownOptions] = NULL;
911879d1
MN
570
571 /* replace stuff from the replace Table */
572 for(i=0; replaceTable[2*i]!=NULL; i++)
573 {
574 if(!strcmp(replaceTable[2*i], filterName))
575 {
576 int newlen= strlen(replaceTable[2*i + 1]);
577 int plen;
578 int spaceLeft;
579
580 if(p==NULL) p= temp, *p=0; //last filter
581 else p--, *p=','; //not last filter
582
583 plen= strlen(p);
584 spaceLeft= (int)p - (int)temp + plen;
585 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
586 {
587 ppMode.error++;
588 break;
589 }
590 memmove(p + newlen, p, plen+1);
591 memcpy(p, replaceTable[2*i + 1], newlen);
592 filterNameOk=1;
593 }
594 }
595
596 for(i=0; filters[i].shortName!=NULL; i++)
597 {
117e45b0 598// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
911879d1
MN
599 if( !strcmp(filters[i].longName, filterName)
600 || !strcmp(filters[i].shortName, filterName))
601 {
602 ppMode.lumMode &= ~filters[i].mask;
603 ppMode.chromMode &= ~filters[i].mask;
604
605 filterNameOk=1;
606 if(!enable) break; // user wants to disable it
607
608 if(q >= filters[i].minLumQuality)
609 ppMode.lumMode|= filters[i].mask;
610 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
611 if(q >= filters[i].minChromQuality)
612 ppMode.chromMode|= filters[i].mask;
613
614 if(filters[i].mask == LEVEL_FIX)
615 {
616 int o;
617 ppMode.minAllowedY= 16;
618 ppMode.maxAllowedY= 234;
619 for(o=0; options[o]!=NULL; o++)
620 if( !strcmp(options[o],"fullyrange")
621 ||!strcmp(options[o],"f"))
622 {
623 ppMode.minAllowedY= 0;
624 ppMode.maxAllowedY= 255;
625 numOfUnknownOptions--;
626 }
627 }
117e45b0
MN
628 else if(filters[i].mask == TEMP_NOISE_FILTER)
629 {
630 int o;
631 int numOfNoises=0;
632 ppMode.maxTmpNoise[0]= 150;
633 ppMode.maxTmpNoise[1]= 200;
634 ppMode.maxTmpNoise[2]= 400;
635
636 for(o=0; options[o]!=NULL; o++)
637 {
638 char *tail;
639 ppMode.maxTmpNoise[numOfNoises]=
640 strtol(options[o], &tail, 0);
641 if(tail!=options[o])
642 {
643 numOfNoises++;
644 numOfUnknownOptions--;
645 if(numOfNoises >= 3) break;
646 }
647 }
648 }
911879d1
MN
649 }
650 }
651 if(!filterNameOk) ppMode.error++;
652 ppMode.error += numOfUnknownOptions;
653 }
654
815cbfe7 655#ifdef HAVE_ODIVX_POSTPROCESS
911879d1
MN
656 if(ppMode.lumMode & H_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_Y_H;
657 if(ppMode.lumMode & V_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_Y_V;
658 if(ppMode.chromMode & H_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_C_H;
659 if(ppMode.chromMode & V_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_C_V;
660 if(ppMode.lumMode & DERING) ppMode.oldMode |= PP_DERING_Y;
661 if(ppMode.chromMode & DERING) ppMode.oldMode |= PP_DERING_C;
815cbfe7 662#endif
911879d1
MN
663
664 return ppMode;
665}
666
3057fa66 667/**
117e45b0 668 * Obsolete, dont use it, use postprocess2() instead
3057fa66 669 */
3057fa66
A
670void postprocess(unsigned char * src[], int src_stride,
671 unsigned char * dst[], int dst_stride,
672 int horizontal_size, int vertical_size,
673 QP_STORE_T *QP_store, int QP_stride,
674 int mode)
675{
117e45b0
MN
676 struct PPMode ppMode;
677 static QP_STORE_T zeroArray[2048/8];
911879d1
MN
678/*
679 static int qual=0;
680
117e45b0
MN
681 ppMode= getPPModeByNameAndQuality("fast,default,-hdeblock,-vdeblock,tmpnoise:150:200:300", qual);
682 printf("OK\n");
911879d1
MN
683 qual++;
684 qual%=7;
117e45b0
MN
685 printf("\n%X %X %X %X :%d: %d %d %d\n", ppMode.lumMode, ppMode.chromMode, ppMode.oldMode, ppMode.error,
686 qual, ppMode.maxTmpNoise[0], ppMode.maxTmpNoise[1], ppMode.maxTmpNoise[2]);
911879d1
MN
687 postprocess2(src, src_stride, dst, dst_stride,
688 horizontal_size, vertical_size, QP_store, QP_stride, &ppMode);
689
690 return;
691*/
815cbfe7
MN
692 if(QP_store==NULL)
693 {
694 QP_store= zeroArray;
695 QP_stride= 0;
696 }
13e00528 697
117e45b0
MN
698 ppMode.lumMode= mode;
699 mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00);
700 ppMode.chromMode= mode;
be44a4d7
MN
701 ppMode.maxTmpNoise[0]= 700;
702 ppMode.maxTmpNoise[1]= 1500;
703 ppMode.maxTmpNoise[2]= 3000;
117e45b0 704
9a722af7
A
705#ifdef HAVE_ODIVX_POSTPROCESS
706// Note: I could make this shit outside of this file, but it would mean one
707// more function call...
708 if(use_old_pp){
709 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode);
710 return;
711 }
712#endif
713
13e00528 714 postProcess(src[0], src_stride, dst[0], dst_stride,
117e45b0 715 horizontal_size, vertical_size, QP_store, QP_stride, 0, &ppMode);
3057fa66
A
716
717 horizontal_size >>= 1;
718 vertical_size >>= 1;
719 src_stride >>= 1;
720 dst_stride >>= 1;
721
4e1349d4 722 if(ppMode.chromMode)
3057fa66 723 {
13e00528 724 postProcess(src[1], src_stride, dst[1], dst_stride,
117e45b0 725 horizontal_size, vertical_size, QP_store, QP_stride, 1, &ppMode);
13e00528 726 postProcess(src[2], src_stride, dst[2], dst_stride,
117e45b0 727 horizontal_size, vertical_size, QP_store, QP_stride, 2, &ppMode);
3057fa66 728 }
4e1349d4
MN
729 else if(src_stride == dst_stride)
730 {
731 memcpy(dst[1], src[1], src_stride*vertical_size);
732 memcpy(dst[2], src[2], src_stride*vertical_size);
733 }
3057fa66
A
734 else
735 {
4e1349d4
MN
736 int y;
737 for(y=0; y<vertical_size; y++)
738 {
739 memcpy(&(dst[1][y*dst_stride]), &(src[1][y*src_stride]), horizontal_size);
740 memcpy(&(dst[2][y*dst_stride]), &(src[2][y*src_stride]), horizontal_size);
741 }
742 }
743
744#if 0
117e45b0
MN
745 memset(dst[1], 128, dst_stride*vertical_size);
746 memset(dst[2], 128, dst_stride*vertical_size);
4e1349d4 747#endif
3057fa66 748}
9a722af7 749
911879d1
MN
750void postprocess2(unsigned char * src[], int src_stride,
751 unsigned char * dst[], int dst_stride,
752 int horizontal_size, int vertical_size,
753 QP_STORE_T *QP_store, int QP_stride,
754 struct PPMode *mode)
755{
756
815cbfe7
MN
757 static QP_STORE_T zeroArray[2048/8];
758 if(QP_store==NULL)
759 {
760 QP_store= zeroArray;
761 QP_stride= 0;
762 }
763
911879d1
MN
764#ifdef HAVE_ODIVX_POSTPROCESS
765// Note: I could make this shit outside of this file, but it would mean one
766// more function call...
767 if(use_old_pp){
768 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,
769 mode->oldMode);
770 return;
771 }
772#endif
773
774 postProcess(src[0], src_stride, dst[0], dst_stride,
117e45b0 775 horizontal_size, vertical_size, QP_store, QP_stride, 0, mode);
911879d1
MN
776
777 horizontal_size >>= 1;
778 vertical_size >>= 1;
779 src_stride >>= 1;
780 dst_stride >>= 1;
781
4e1349d4
MN
782 if(mode->chromMode)
783 {
784 postProcess(src[1], src_stride, dst[1], dst_stride,
785 horizontal_size, vertical_size, QP_store, QP_stride, 1, mode);
786 postProcess(src[2], src_stride, dst[2], dst_stride,
787 horizontal_size, vertical_size, QP_store, QP_stride, 2, mode);
788 }
789 else if(src_stride == dst_stride)
790 {
791 memcpy(dst[1], src[1], src_stride*vertical_size);
792 memcpy(dst[2], src[2], src_stride*vertical_size);
793 }
794 else
795 {
796 int y;
797 for(y=0; y<vertical_size; y++)
798 {
799 memcpy(&(dst[1][y*dst_stride]), &(src[1][y*src_stride]), horizontal_size);
800 memcpy(&(dst[2][y*dst_stride]), &(src[2][y*src_stride]), horizontal_size);
801 }
802 }
911879d1
MN
803}
804
805
13e00528
A
806/**
807 * gets the mode flags for a given quality (larger values mean slower but better postprocessing)
9a722af7 808 * 0 <= quality <= 6
13e00528 809 */
9a722af7
A
810int getPpModeForQuality(int quality){
811 int modes[1+GET_PP_QUALITY_MAX]= {
812 0,
813#if 1
814 // horizontal filters first
815 LUM_H_DEBLOCK,
816 LUM_H_DEBLOCK | LUM_V_DEBLOCK,
817 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK,
818 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK,
819 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK | LUM_DERING,
820 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK | LUM_DERING | CHROM_DERING
821#else
822 // vertical filters first
13e00528
A
823 LUM_V_DEBLOCK,
824 LUM_V_DEBLOCK | LUM_H_DEBLOCK,
825 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK,
826 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK,
827 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK | LUM_DERING,
828 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK | LUM_DERING | CHROM_DERING
9a722af7
A
829#endif
830 };
831
832#ifdef HAVE_ODIVX_POSTPROCESS
833 int odivx_modes[1+GET_PP_QUALITY_MAX]= {
834 0,
835 PP_DEBLOCK_Y_H,
836 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V,
837 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H,
838 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V,
839 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V|PP_DERING_Y,
840 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V|PP_DERING_Y|PP_DERING_C
841 };
842 if(use_old_pp) return odivx_modes[quality];
843#endif
844 return modes[quality];
3057fa66
A
845}
846
4e4dcbc5 847