2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * i do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = allmost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use the Subversion log
89 //#define DEBUG_BRIGHTNESS
90 #include "postprocess.h"
91 #include "postprocess_internal.h"
97 #define GET_MODE_BUFFER_SIZE 500
98 #define OPTIONS_ARRAY_SIZE 10
100 #define TEMP_STRIDE 8
101 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
103 #if defined(ARCH_X86)
104 static DECLARE_ALIGNED(8, uint64_t attribute_used
, w05
)= 0x0005000500050005LL
;
105 static DECLARE_ALIGNED(8, uint64_t attribute_used
, w04
)= 0x0004000400040004LL
;
106 static DECLARE_ALIGNED(8, uint64_t attribute_used
, w20
)= 0x0020002000200020LL
;
107 static DECLARE_ALIGNED(8, uint64_t attribute_used
, b00
)= 0x0000000000000000LL
;
108 static DECLARE_ALIGNED(8, uint64_t attribute_used
, b01
)= 0x0101010101010101LL
;
109 static DECLARE_ALIGNED(8, uint64_t attribute_used
, b02
)= 0x0202020202020202LL
;
110 static DECLARE_ALIGNED(8, uint64_t attribute_used
, b08
)= 0x0808080808080808LL
;
111 static DECLARE_ALIGNED(8, uint64_t attribute_used
, b80
)= 0x8080808080808080LL
;
114 static const int attribute_used deringThreshold
= 20;
117 static struct PPFilter filters
[]=
119 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK
},
120 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK
},
121 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
122 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
123 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER
},
124 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER
},
125 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK
},
126 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK
},
127 {"dr", "dering", 1, 5, 6, DERING
},
128 {"al", "autolevels", 0, 1, 2, LEVEL_FIX
},
129 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER
},
130 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER
},
131 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER
},
132 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER
},
133 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER
},
134 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER
},
135 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER
},
136 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT
},
137 {NULL
, NULL
,0,0,0,0} //End Marker
140 static const char *replaceTable
[]=
142 "default", "hb:a,vb:a,dr:a",
143 "de", "hb:a,vb:a,dr:a",
144 "fast", "h1:a,v1:a,dr:a",
145 "fa", "h1:a,v1:a,dr:a",
146 "ac", "ha:a:128:7,va:a,dr:a",
151 #if defined(ARCH_X86)
152 static inline void prefetchnta(void *p
)
154 asm volatile( "prefetchnta (%0)\n\t"
159 static inline void prefetcht0(void *p
)
161 asm volatile( "prefetcht0 (%0)\n\t"
166 static inline void prefetcht1(void *p
)
168 asm volatile( "prefetcht1 (%0)\n\t"
173 static inline void prefetcht2(void *p
)
175 asm volatile( "prefetcht2 (%0)\n\t"
181 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
184 * Check if the given 8x8 Block is mostly "flat"
186 static inline int isHorizDC_C(uint8_t src
[], int stride
, PPContext
*c
)
190 const int dcOffset
= ((c
->nonBQP
*c
->ppMode
.baseDcDiff
)>>8) + 1;
191 const int dcThreshold
= dcOffset
*2 + 1;
193 for(y
=0; y
<BLOCK_SIZE
; y
++)
195 if(((unsigned)(src
[0] - src
[1] + dcOffset
)) < dcThreshold
) numEq
++;
196 if(((unsigned)(src
[1] - src
[2] + dcOffset
)) < dcThreshold
) numEq
++;
197 if(((unsigned)(src
[2] - src
[3] + dcOffset
)) < dcThreshold
) numEq
++;
198 if(((unsigned)(src
[3] - src
[4] + dcOffset
)) < dcThreshold
) numEq
++;
199 if(((unsigned)(src
[4] - src
[5] + dcOffset
)) < dcThreshold
) numEq
++;
200 if(((unsigned)(src
[5] - src
[6] + dcOffset
)) < dcThreshold
) numEq
++;
201 if(((unsigned)(src
[6] - src
[7] + dcOffset
)) < dcThreshold
) numEq
++;
204 return numEq
> c
->ppMode
.flatnessThreshold
;
208 * Check if the middle 8x8 Block in the given 8x16 block is flat
210 static inline int isVertDC_C(uint8_t src
[], int stride
, PPContext
*c
){
213 const int dcOffset
= ((c
->nonBQP
*c
->ppMode
.baseDcDiff
)>>8) + 1;
214 const int dcThreshold
= dcOffset
*2 + 1;
216 src
+= stride
*4; // src points to begin of the 8x8 Block
217 for(y
=0; y
<BLOCK_SIZE
-1; y
++)
219 if(((unsigned)(src
[0] - src
[0+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
220 if(((unsigned)(src
[1] - src
[1+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
221 if(((unsigned)(src
[2] - src
[2+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
222 if(((unsigned)(src
[3] - src
[3+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
223 if(((unsigned)(src
[4] - src
[4+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
224 if(((unsigned)(src
[5] - src
[5+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
225 if(((unsigned)(src
[6] - src
[6+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
226 if(((unsigned)(src
[7] - src
[7+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
229 return numEq
> c
->ppMode
.flatnessThreshold
;
232 static inline int isHorizMinMaxOk_C(uint8_t src
[], int stride
, int QP
)
237 if((unsigned)(src
[0] - src
[5] + 2*QP
) > 4*QP
) return 0;
239 if((unsigned)(src
[2] - src
[7] + 2*QP
) > 4*QP
) return 0;
241 if((unsigned)(src
[4] - src
[1] + 2*QP
) > 4*QP
) return 0;
243 if((unsigned)(src
[6] - src
[3] + 2*QP
) > 4*QP
) return 0;
248 if((unsigned)(src
[0] - src
[7] + 2*QP
) > 4*QP
) return 0;
255 static inline int isVertMinMaxOk_C(uint8_t src
[], int stride
, int QP
)
261 for(x
=0; x
<BLOCK_SIZE
; x
+=4)
263 if((unsigned)(src
[ x
+ 0*stride
] - src
[ x
+ 5*stride
] + 2*QP
) > 4*QP
) return 0;
264 if((unsigned)(src
[1+x
+ 2*stride
] - src
[1+x
+ 7*stride
] + 2*QP
) > 4*QP
) return 0;
265 if((unsigned)(src
[2+x
+ 4*stride
] - src
[2+x
+ 1*stride
] + 2*QP
) > 4*QP
) return 0;
266 if((unsigned)(src
[3+x
+ 6*stride
] - src
[3+x
+ 3*stride
] + 2*QP
) > 4*QP
) return 0;
271 for(x
=0; x
<BLOCK_SIZE
; x
++)
273 if((unsigned)(src
[x
+ stride
] - src
[x
+ (stride
<<3)] + 2*QP
) > 4*QP
) return 0;
280 for(x
=0; x
<BLOCK_SIZE
; x
++)
286 int v
= src
[x
+ y
*stride
];
290 if(max
-min
> 2*QP
) return 0;
296 static inline int horizClassify_C(uint8_t src
[], int stride
, PPContext
*c
){
297 if( isHorizDC_C(src
, stride
, c
) ){
298 if( isHorizMinMaxOk_C(src
, stride
, c
->QP
) )
307 static inline int vertClassify_C(uint8_t src
[], int stride
, PPContext
*c
){
308 if( isVertDC_C(src
, stride
, c
) ){
309 if( isVertMinMaxOk_C(src
, stride
, c
->QP
) )
318 static inline void doHorizDefFilter_C(uint8_t dst
[], int stride
, PPContext
*c
)
321 for(y
=0; y
<BLOCK_SIZE
; y
++)
323 const int middleEnergy
= 5*(dst
[4] - dst
[3]) + 2*(dst
[2] - dst
[5]);
325 if(FFABS(middleEnergy
) < 8*c
->QP
)
327 const int q
=(dst
[3] - dst
[4])/2;
328 const int leftEnergy
= 5*(dst
[2] - dst
[1]) + 2*(dst
[0] - dst
[3]);
329 const int rightEnergy
= 5*(dst
[6] - dst
[5]) + 2*(dst
[4] - dst
[7]);
331 int d
= FFABS(middleEnergy
) - FFMIN( FFABS(leftEnergy
), FFABS(rightEnergy
) );
335 d
*= FFSIGN(-middleEnergy
);
356 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
357 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
359 static inline void doHorizLowPass_C(uint8_t dst
[], int stride
, PPContext
*c
)
362 for(y
=0; y
<BLOCK_SIZE
; y
++)
364 const int first
= FFABS(dst
[-1] - dst
[0]) < c
->QP ? dst
[-1] : dst
[0];
365 const int last
= FFABS(dst
[8] - dst
[7]) < c
->QP ? dst
[8] : dst
[7];
368 sums
[0] = 4*first
+ dst
[0] + dst
[1] + dst
[2] + 4;
369 sums
[1] = sums
[0] - first
+ dst
[3];
370 sums
[2] = sums
[1] - first
+ dst
[4];
371 sums
[3] = sums
[2] - first
+ dst
[5];
372 sums
[4] = sums
[3] - first
+ dst
[6];
373 sums
[5] = sums
[4] - dst
[0] + dst
[7];
374 sums
[6] = sums
[5] - dst
[1] + last
;
375 sums
[7] = sums
[6] - dst
[2] + last
;
376 sums
[8] = sums
[7] - dst
[3] + last
;
377 sums
[9] = sums
[8] - dst
[4] + last
;
379 dst
[0]= (sums
[0] + sums
[2] + 2*dst
[0])>>4;
380 dst
[1]= (sums
[1] + sums
[3] + 2*dst
[1])>>4;
381 dst
[2]= (sums
[2] + sums
[4] + 2*dst
[2])>>4;
382 dst
[3]= (sums
[3] + sums
[5] + 2*dst
[3])>>4;
383 dst
[4]= (sums
[4] + sums
[6] + 2*dst
[4])>>4;
384 dst
[5]= (sums
[5] + sums
[7] + 2*dst
[5])>>4;
385 dst
[6]= (sums
[6] + sums
[8] + 2*dst
[6])>>4;
386 dst
[7]= (sums
[7] + sums
[9] + 2*dst
[7])>>4;
393 * Experimental Filter 1 (Horizontal)
394 * will not damage linear gradients
395 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
396 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
397 * MMX2 version does correct clipping C version does not
398 * not identical with the vertical one
400 static inline void horizX1Filter(uint8_t *src
, int stride
, int QP
)
403 static uint64_t *lut
= NULL
;
407 lut
= av_malloc(256*8);
410 int v
= i
< 128 ?
2*i
: 2*(i
-256);
412 //Simulate 112242211 9-Tap filter
413 uint64_t a= (v/16) & 0xFF;
414 uint64_t b= (v/8) & 0xFF;
415 uint64_t c= (v/4) & 0xFF;
416 uint64_t d= (3*v/8) & 0xFF;
418 //Simulate piecewise linear interpolation
419 uint64_t a
= (v
/16) & 0xFF;
420 uint64_t b
= (v
*3/16) & 0xFF;
421 uint64_t c
= (v
*5/16) & 0xFF;
422 uint64_t d
= (7*v
/16) & 0xFF;
423 uint64_t A
= (0x100 - a
)&0xFF;
424 uint64_t B
= (0x100 - b
)&0xFF;
425 uint64_t C
= (0x100 - c
)&0xFF;
426 uint64_t D
= (0x100 - c
)&0xFF;
428 lut
[i
] = (a
<<56) | (b
<<48) | (c
<<40) | (d
<<32) |
429 (D
<<24) | (C
<<16) | (B
<<8) | (A
);
430 //lut[i] = (v<<32) | (v<<24);
434 for(y
=0; y
<BLOCK_SIZE
; y
++)
436 int a
= src
[1] - src
[2];
437 int b
= src
[3] - src
[4];
438 int c
= src
[5] - src
[6];
440 int d
= FFMAX(FFABS(b
) - (FFABS(a
) + FFABS(c
))/2, 0);
444 int v
= d
* FFSIGN(-b
);
459 * accurate deblock filter
461 static av_always_inline
void do_a_deblock_C(uint8_t *src
, int step
, int stride
, PPContext
*c
){
464 const int dcOffset
= ((c
->nonBQP
*c
->ppMode
.baseDcDiff
)>>8) + 1;
465 const int dcThreshold
= dcOffset
*2 + 1;
467 src
+= step
*4; // src points to begin of the 8x8 Block
471 if(((unsigned)(src
[-1*step
] - src
[0*step
] + dcOffset
)) < dcThreshold
) numEq
++;
472 if(((unsigned)(src
[ 0*step
] - src
[1*step
] + dcOffset
)) < dcThreshold
) numEq
++;
473 if(((unsigned)(src
[ 1*step
] - src
[2*step
] + dcOffset
)) < dcThreshold
) numEq
++;
474 if(((unsigned)(src
[ 2*step
] - src
[3*step
] + dcOffset
)) < dcThreshold
) numEq
++;
475 if(((unsigned)(src
[ 3*step
] - src
[4*step
] + dcOffset
)) < dcThreshold
) numEq
++;
476 if(((unsigned)(src
[ 4*step
] - src
[5*step
] + dcOffset
)) < dcThreshold
) numEq
++;
477 if(((unsigned)(src
[ 5*step
] - src
[6*step
] + dcOffset
)) < dcThreshold
) numEq
++;
478 if(((unsigned)(src
[ 6*step
] - src
[7*step
] + dcOffset
)) < dcThreshold
) numEq
++;
479 if(((unsigned)(src
[ 7*step
] - src
[8*step
] + dcOffset
)) < dcThreshold
) numEq
++;
480 if(numEq
> c
->ppMode
.flatnessThreshold
){
483 if(src
[0] > src
[step
]){
491 if(src
[x
*step
] > src
[(x
+1)*step
]){
492 if(src
[x
*step
] > max
) max
= src
[ x
*step
];
493 if(src
[(x
+1)*step
] < min
) min
= src
[(x
+1)*step
];
495 if(src
[(x
+1)*step
] > max
) max
= src
[(x
+1)*step
];
496 if(src
[ x
*step
] < min
) min
= src
[ x
*step
];
500 const int first
= FFABS(src
[-1*step
] - src
[0]) < QP ? src
[-1*step
] : src
[0];
501 const int last
= FFABS(src
[8*step
] - src
[7*step
]) < QP ? src
[8*step
] : src
[7*step
];
504 sums
[0] = 4*first
+ src
[0*step
] + src
[1*step
] + src
[2*step
] + 4;
505 sums
[1] = sums
[0] - first
+ src
[3*step
];
506 sums
[2] = sums
[1] - first
+ src
[4*step
];
507 sums
[3] = sums
[2] - first
+ src
[5*step
];
508 sums
[4] = sums
[3] - first
+ src
[6*step
];
509 sums
[5] = sums
[4] - src
[0*step
] + src
[7*step
];
510 sums
[6] = sums
[5] - src
[1*step
] + last
;
511 sums
[7] = sums
[6] - src
[2*step
] + last
;
512 sums
[8] = sums
[7] - src
[3*step
] + last
;
513 sums
[9] = sums
[8] - src
[4*step
] + last
;
515 src
[0*step
]= (sums
[0] + sums
[2] + 2*src
[0*step
])>>4;
516 src
[1*step
]= (sums
[1] + sums
[3] + 2*src
[1*step
])>>4;
517 src
[2*step
]= (sums
[2] + sums
[4] + 2*src
[2*step
])>>4;
518 src
[3*step
]= (sums
[3] + sums
[5] + 2*src
[3*step
])>>4;
519 src
[4*step
]= (sums
[4] + sums
[6] + 2*src
[4*step
])>>4;
520 src
[5*step
]= (sums
[5] + sums
[7] + 2*src
[5*step
])>>4;
521 src
[6*step
]= (sums
[6] + sums
[8] + 2*src
[6*step
])>>4;
522 src
[7*step
]= (sums
[7] + sums
[9] + 2*src
[7*step
])>>4;
525 const int middleEnergy
= 5*(src
[4*step
] - src
[3*step
]) + 2*(src
[2*step
] - src
[5*step
]);
527 if(FFABS(middleEnergy
) < 8*QP
)
529 const int q
=(src
[3*step
] - src
[4*step
])/2;
530 const int leftEnergy
= 5*(src
[2*step
] - src
[1*step
]) + 2*(src
[0*step
] - src
[3*step
]);
531 const int rightEnergy
= 5*(src
[6*step
] - src
[5*step
]) + 2*(src
[4*step
] - src
[7*step
]);
533 int d
= FFABS(middleEnergy
) - FFMIN( FFABS(leftEnergy
), FFABS(rightEnergy
) );
537 d
*= FFSIGN(-middleEnergy
);
564 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
566 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
572 #define COMPILE_ALTIVEC
573 #endif //HAVE_ALTIVEC
574 #endif //ARCH_POWERPC
576 #if defined(ARCH_X86)
578 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
582 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
586 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
587 #define COMPILE_3DNOW
589 #endif /* defined(ARCH_X86) */
600 #define RENAME(a) a ## _C
601 #include "postprocess_template.c"
605 #ifdef COMPILE_ALTIVEC
608 #define RENAME(a) a ## _altivec
609 #include "postprocess_altivec_template.c"
610 #include "postprocess_template.c"
612 #endif //ARCH_POWERPC
620 #define RENAME(a) a ## _MMX
621 #include "postprocess_template.c"
630 #define RENAME(a) a ## _MMX2
631 #include "postprocess_template.c"
640 #define RENAME(a) a ## _3DNow
641 #include "postprocess_template.c"
644 // minor note: the HAVE_xyz is messed up after that line so do not use it.
646 static inline void postProcess(uint8_t src
[], int srcStride
, uint8_t dst
[], int dstStride
, int width
, int height
,
647 QP_STORE_T QPs
[], int QPStride
, int isColor
, pp_mode_t
*vm
, pp_context_t
*vc
)
649 PPContext
*c
= (PPContext
*)vc
;
650 PPMode
*ppMode
= (PPMode
*)vm
;
651 c
->ppMode
= *ppMode
; //FIXME
653 // Using ifs here as they are faster than function pointers although the
654 // difference would not be measureable here but it is much better because
655 // someone might exchange the CPU whithout restarting MPlayer ;)
656 #ifdef RUNTIME_CPUDETECT
657 #if defined(ARCH_X86)
658 // ordered per speed fasterst first
659 if(c
->cpuCaps
& PP_CPU_CAPS_MMX2
)
660 postProcess_MMX2(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
661 else if(c
->cpuCaps
& PP_CPU_CAPS_3DNOW
)
662 postProcess_3DNow(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
663 else if(c
->cpuCaps
& PP_CPU_CAPS_MMX
)
664 postProcess_MMX(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
666 postProcess_C(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
670 if(c
->cpuCaps
& PP_CPU_CAPS_ALTIVEC
)
671 postProcess_altivec(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
675 postProcess_C(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
677 #else //RUNTIME_CPUDETECT
679 postProcess_MMX2(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
680 #elif defined (HAVE_3DNOW)
681 postProcess_3DNow(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
682 #elif defined (HAVE_MMX)
683 postProcess_MMX(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
684 #elif defined (HAVE_ALTIVEC)
685 postProcess_altivec(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
687 postProcess_C(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
689 #endif //!RUNTIME_CPUDETECT
692 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
693 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
695 /* -pp Command line Help
697 const char *const pp_help
=
698 "Available postprocessing filters:\n"
700 "short long name short long option Description\n"
701 "* * a autoq CPU power dependent enabler\n"
702 " c chrom chrominance filtering enabled\n"
703 " y nochrom chrominance filtering disabled\n"
704 " n noluma luma filtering disabled\n"
705 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
706 " 1. difference factor: default=32, higher -> more deblocking\n"
707 " 2. flatness threshold: default=39, lower -> more deblocking\n"
708 " the h & v deblocking filters share these\n"
709 " so you can't set different thresholds for h / v\n"
710 "vb vdeblock (2 threshold) vertical deblocking filter\n"
711 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
712 "va vadeblock (2 threshold) vertical deblocking filter\n"
713 "h1 x1hdeblock experimental h deblock filter 1\n"
714 "v1 x1vdeblock experimental v deblock filter 1\n"
715 "dr dering deringing filter\n"
716 "al autolevels automatic brightness / contrast\n"
717 " f fullyrange stretch luminance to (0..255)\n"
718 "lb linblenddeint linear blend deinterlacer\n"
719 "li linipoldeint linear interpolating deinterlace\n"
720 "ci cubicipoldeint cubic interpolating deinterlacer\n"
721 "md mediandeint median deinterlacer\n"
722 "fd ffmpegdeint ffmpeg deinterlacer\n"
723 "l5 lowpass5 FIR lowpass deinterlacer\n"
724 "de default hb:a,vb:a,dr:a\n"
725 "fa fast h1:a,v1:a,dr:a\n"
726 "ac ha:a:128:7,va:a,dr:a\n"
727 "tn tmpnoise (3 threshold) temporal noise reducer\n"
728 " 1. <= 2. <= 3. larger -> stronger filtering\n"
729 "fq forceQuant <quantizer> force quantizer\n"
731 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
732 "long form example:\n"
733 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
734 "short form example:\n"
735 "vb:a/hb:a/lb de,-vb\n"
741 pp_mode_t
*pp_get_mode_by_name_and_quality(const char *name
, int quality
)
743 char temp
[GET_MODE_BUFFER_SIZE
];
745 static const char filterDelimiters
[] = ",/";
746 static const char optionDelimiters
[] = ":";
747 struct PPMode
*ppMode
;
750 ppMode
= av_malloc(sizeof(PPMode
));
753 ppMode
->chromMode
= 0;
754 ppMode
->maxTmpNoise
[0]= 700;
755 ppMode
->maxTmpNoise
[1]= 1500;
756 ppMode
->maxTmpNoise
[2]= 3000;
757 ppMode
->maxAllowedY
= 234;
758 ppMode
->minAllowedY
= 16;
759 ppMode
->baseDcDiff
= 256/8;
760 ppMode
->flatnessThreshold
= 56-16-1;
761 ppMode
->maxClippedThreshold
= 0.01;
764 strncpy(temp
, name
, GET_MODE_BUFFER_SIZE
);
766 av_log(NULL
, AV_LOG_DEBUG
, "pp: %s\n", name
);
770 int q
= 1000000; //PP_QUALITY_MAX;
774 char *options
[OPTIONS_ARRAY_SIZE
];
777 int numOfUnknownOptions
=0;
778 int enable
=1; //does the user want us to enabled or disabled the filter
780 filterToken
= strtok(p
, filterDelimiters
);
781 if(filterToken
== NULL
) break;
782 p
+= strlen(filterToken
) + 1; // p points to next filterToken
783 filterName
= strtok(filterToken
, optionDelimiters
);
784 av_log(NULL
, AV_LOG_DEBUG
, "pp: %s::%s\n", filterToken
, filterName
);
786 if(*filterName
== '-')
792 for(;;){ //for all options
793 option
= strtok(NULL
, optionDelimiters
);
794 if(option
== NULL
) break;
796 av_log(NULL
, AV_LOG_DEBUG
, "pp: option: %s\n", option
);
797 if(!strcmp("autoq", option
) || !strcmp("a", option
)) q
= quality
;
798 else if(!strcmp("nochrom", option
) || !strcmp("y", option
)) chrom
=0;
799 else if(!strcmp("chrom", option
) || !strcmp("c", option
)) chrom
=1;
800 else if(!strcmp("noluma", option
) || !strcmp("n", option
)) luma
=0;
803 options
[numOfUnknownOptions
] = option
;
804 numOfUnknownOptions
++;
806 if(numOfUnknownOptions
>= OPTIONS_ARRAY_SIZE
-1) break;
808 options
[numOfUnknownOptions
] = NULL
;
810 /* replace stuff from the replace Table */
811 for(i
=0; replaceTable
[2*i
]!=NULL
; i
++)
813 if(!strcmp(replaceTable
[2*i
], filterName
))
815 int newlen
= strlen(replaceTable
[2*i
+ 1]);
819 if(p
==NULL
) p
= temp
, *p
=0; //last filter
820 else p
--, *p
=','; //not last filter
823 spaceLeft
= p
- temp
+ plen
;
824 if(spaceLeft
+ newlen
>= GET_MODE_BUFFER_SIZE
)
829 memmove(p
+ newlen
, p
, plen
+1);
830 memcpy(p
, replaceTable
[2*i
+ 1], newlen
);
835 for(i
=0; filters
[i
].shortName
!=NULL
; i
++)
837 if( !strcmp(filters
[i
].longName
, filterName
)
838 || !strcmp(filters
[i
].shortName
, filterName
))
840 ppMode
->lumMode
&= ~filters
[i
].mask
;
841 ppMode
->chromMode
&= ~filters
[i
].mask
;
844 if(!enable
) break; // user wants to disable it
846 if(q
>= filters
[i
].minLumQuality
&& luma
)
847 ppMode
->lumMode
|= filters
[i
].mask
;
848 if(chrom
==1 || (chrom
==-1 && filters
[i
].chromDefault
))
849 if(q
>= filters
[i
].minChromQuality
)
850 ppMode
->chromMode
|= filters
[i
].mask
;
852 if(filters
[i
].mask
== LEVEL_FIX
)
855 ppMode
->minAllowedY
= 16;
856 ppMode
->maxAllowedY
= 234;
857 for(o
=0; options
[o
]!=NULL
; o
++)
859 if( !strcmp(options
[o
],"fullyrange")
860 ||!strcmp(options
[o
],"f"))
862 ppMode
->minAllowedY
= 0;
863 ppMode
->maxAllowedY
= 255;
864 numOfUnknownOptions
--;
868 else if(filters
[i
].mask
== TEMP_NOISE_FILTER
)
873 for(o
=0; options
[o
]!=NULL
; o
++)
876 ppMode
->maxTmpNoise
[numOfNoises
]=
877 strtol(options
[o
], &tail
, 0);
881 numOfUnknownOptions
--;
882 if(numOfNoises
>= 3) break;
886 else if(filters
[i
].mask
== V_DEBLOCK
|| filters
[i
].mask
== H_DEBLOCK
887 || filters
[i
].mask
== V_A_DEBLOCK
|| filters
[i
].mask
== H_A_DEBLOCK
)
891 for(o
=0; options
[o
]!=NULL
&& o
<2; o
++)
894 int val
= strtol(options
[o
], &tail
, 0);
895 if(tail
==options
[o
]) break;
897 numOfUnknownOptions
--;
898 if(o
==0) ppMode
->baseDcDiff
= val
;
899 else ppMode
->flatnessThreshold
= val
;
902 else if(filters
[i
].mask
== FORCE_QUANT
)
905 ppMode
->forcedQuant
= 15;
907 for(o
=0; options
[o
]!=NULL
&& o
<1; o
++)
910 int val
= strtol(options
[o
], &tail
, 0);
911 if(tail
==options
[o
]) break;
913 numOfUnknownOptions
--;
914 ppMode
->forcedQuant
= val
;
919 if(!filterNameOk
) ppMode
->error
++;
920 ppMode
->error
+= numOfUnknownOptions
;
923 av_log(NULL
, AV_LOG_DEBUG
, "pp: lumMode=%X, chromMode=%X\n", ppMode
->lumMode
, ppMode
->chromMode
);
926 av_log(NULL
, AV_LOG_ERROR
, "%d errors in postprocess string \"%s\"\n", ppMode
->error
, name
);
933 void pp_free_mode(pp_mode_t
*mode
){
937 static void reallocAlign(void **p
, int alignment
, int size
){
939 *p
= av_mallocz(size
);
942 static void reallocBuffers(PPContext
*c
, int width
, int height
, int stride
, int qpStride
){
943 int mbWidth
= (width
+15)>>4;
944 int mbHeight
= (height
+15)>>4;
948 c
->qpStride
= qpStride
;
950 reallocAlign((void **)&c
->tempDst
, 8, stride
*24);
951 reallocAlign((void **)&c
->tempSrc
, 8, stride
*24);
952 reallocAlign((void **)&c
->tempBlocks
, 8, 2*16*8);
953 reallocAlign((void **)&c
->yHistogram
, 8, 256*sizeof(uint64_t));
955 c
->yHistogram
[i
]= width
*height
/64*15/256;
959 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
960 reallocAlign((void **)&c
->tempBlured
[i
], 8, stride
*mbHeight
*16 + 17*1024);
961 reallocAlign((void **)&c
->tempBluredPast
[i
], 8, 256*((height
+7)&(~7))/2 + 17*1024);//FIXME size
964 reallocAlign((void **)&c
->deintTemp
, 8, 2*width
+32);
965 reallocAlign((void **)&c
->nonBQPTable
, 8, qpStride
*mbHeight
*sizeof(QP_STORE_T
));
966 reallocAlign((void **)&c
->stdQPTable
, 8, qpStride
*mbHeight
*sizeof(QP_STORE_T
));
967 reallocAlign((void **)&c
->forcedQPTable
, 8, mbWidth
*sizeof(QP_STORE_T
));
970 static const char * context_to_name(void * ptr
) {
974 static const AVClass av_codec_context_class
= { "Postproc", context_to_name
, NULL
};
976 pp_context_t
*pp_get_context(int width
, int height
, int cpuCaps
){
977 PPContext
*c
= av_malloc(sizeof(PPContext
));
978 int stride
= (width
+15)&(~15); //assumed / will realloc if needed
979 int qpStride
= (width
+15)/16 + 2; //assumed / will realloc if needed
981 memset(c
, 0, sizeof(PPContext
));
982 c
->av_class
= &av_codec_context_class
;
984 if(cpuCaps
&PP_FORMAT
){
985 c
->hChromaSubSample
= cpuCaps
&0x3;
986 c
->vChromaSubSample
= (cpuCaps
>>4)&0x3;
988 c
->hChromaSubSample
= 1;
989 c
->vChromaSubSample
= 1;
992 reallocBuffers(c
, width
, height
, stride
, qpStride
);
999 void pp_free_context(void *vc
){
1000 PPContext
*c
= (PPContext
*)vc
;
1003 for(i
=0; i
<3; i
++) av_free(c
->tempBlured
[i
]);
1004 for(i
=0; i
<3; i
++) av_free(c
->tempBluredPast
[i
]);
1006 av_free(c
->tempBlocks
);
1007 av_free(c
->yHistogram
);
1008 av_free(c
->tempDst
);
1009 av_free(c
->tempSrc
);
1010 av_free(c
->deintTemp
);
1011 av_free(c
->stdQPTable
);
1012 av_free(c
->nonBQPTable
);
1013 av_free(c
->forcedQPTable
);
1015 memset(c
, 0, sizeof(PPContext
));
1020 void pp_postprocess(uint8_t * src
[3], int srcStride
[3],
1021 uint8_t * dst
[3], int dstStride
[3],
1022 int width
, int height
,
1023 QP_STORE_T
*QP_store
, int QPStride
,
1024 pp_mode_t
*vm
, void *vc
, int pict_type
)
1026 int mbWidth
= (width
+15)>>4;
1027 int mbHeight
= (height
+15)>>4;
1028 PPMode
*mode
= (PPMode
*)vm
;
1029 PPContext
*c
= (PPContext
*)vc
;
1030 int minStride
= FFMAX(FFABS(srcStride
[0]), FFABS(dstStride
[0]));
1031 int absQPStride
= FFABS(QPStride
);
1033 // c->stride and c->QPStride are always positive
1034 if(c
->stride
< minStride
|| c
->qpStride
< absQPStride
)
1035 reallocBuffers(c
, width
, height
,
1036 FFMAX(minStride
, c
->stride
),
1037 FFMAX(c
->qpStride
, absQPStride
));
1039 if(QP_store
==NULL
|| (mode
->lumMode
& FORCE_QUANT
))
1042 QP_store
= c
->forcedQPTable
;
1043 absQPStride
= QPStride
= 0;
1044 if(mode
->lumMode
& FORCE_QUANT
)
1045 for(i
=0; i
<mbWidth
; i
++) QP_store
[i
]= mode
->forcedQuant
;
1047 for(i
=0; i
<mbWidth
; i
++) QP_store
[i
]= 1;
1050 if(pict_type
& PP_PICT_TYPE_QP2
){
1052 const int count
= mbHeight
* absQPStride
;
1053 for(i
=0; i
<(count
>>2); i
++){
1054 ((uint32_t*)c
->stdQPTable
)[i
] = (((uint32_t*)QP_store
)[i
]>>1) & 0x7F7F7F7F;
1056 for(i
<<=2; i
<count
; i
++){
1057 c
->stdQPTable
[i
] = QP_store
[i
]>>1;
1059 QP_store
= c
->stdQPTable
;
1060 QPStride
= absQPStride
;
1065 for(y
=0; y
<mbHeight
; y
++){
1066 for(x
=0; x
<mbWidth
; x
++){
1067 av_log(c
, AV_LOG_INFO
, "%2d ", QP_store
[x
+ y
*QPStride
]);
1069 av_log(c
, AV_LOG_INFO
, "\n");
1071 av_log(c
, AV_LOG_INFO
, "\n");
1074 if((pict_type
&7)!=3)
1076 if (QPStride
>= 0) {
1078 const int count
= mbHeight
* QPStride
;
1079 for(i
=0; i
<(count
>>2); i
++){
1080 ((uint32_t*)c
->nonBQPTable
)[i
] = ((uint32_t*)QP_store
)[i
] & 0x3F3F3F3F;
1082 for(i
<<=2; i
<count
; i
++){
1083 c
->nonBQPTable
[i
] = QP_store
[i
] & 0x3F;
1087 for(i
=0; i
<mbHeight
; i
++) {
1088 for(j
=0; j
<absQPStride
; j
++) {
1089 c
->nonBQPTable
[i
*absQPStride
+j
] = QP_store
[i
*QPStride
+j
] & 0x3F;
1095 av_log(c
, AV_LOG_DEBUG
, "using npp filters 0x%X/0x%X\n",
1096 mode
->lumMode
, mode
->chromMode
);
1098 postProcess(src
[0], srcStride
[0], dst
[0], dstStride
[0],
1099 width
, height
, QP_store
, QPStride
, 0, mode
, c
);
1101 width
= (width
)>>c
->hChromaSubSample
;
1102 height
= (height
)>>c
->vChromaSubSample
;
1106 postProcess(src
[1], srcStride
[1], dst
[1], dstStride
[1],
1107 width
, height
, QP_store
, QPStride
, 1, mode
, c
);
1108 postProcess(src
[2], srcStride
[2], dst
[2], dstStride
[2],
1109 width
, height
, QP_store
, QPStride
, 2, mode
, c
);
1111 else if(srcStride
[1] == dstStride
[1] && srcStride
[2] == dstStride
[2])
1113 linecpy(dst
[1], src
[1], height
, srcStride
[1]);
1114 linecpy(dst
[2], src
[2], height
, srcStride
[2]);
1119 for(y
=0; y
<height
; y
++)
1121 memcpy(&(dst
[1][y
*dstStride
[1]]), &(src
[1][y
*srcStride
[1]]), width
);
1122 memcpy(&(dst
[2][y
*dstStride
[2]]), &(src
[2][y
*srcStride
[2]]), width
);