Reportedly FFmpeg fails to compile on Cygwin with vhook enabled, but FFserver
[libav.git] / libavcodec / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
9858f773 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3057fa66 3
b0ac780a
MN
4 AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
3057fa66
A
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19*/
20
b304569a
MN
21/**
22 * @file postprocess.c
23 * postprocessing.
24 */
115329f1 25
3057fa66 26/*
b0ac780a
MN
27 C MMX MMX2 3DNow AltiVec
28isVertDC Ec Ec Ec
29isVertMinMaxOk Ec Ec Ec
30doVertLowPass E e e Ec
31doVertDefFilter Ec Ec e e Ec
20646267
RD
32isHorizDC Ec Ec Ec
33isHorizMinMaxOk a E Ec
34doHorizLowPass E e e Ec
35doHorizDefFilter Ec Ec e e Ec
792a5a7c 36do_a_deblock Ec E Ec E
b0ac780a 37deRing E e e* Ecp
3b58b885 38Vertical RKAlgo1 E a a
e5c30e06 39Horizontal RKAlgo1 a a
117e45b0
MN
40Vertical X1# a E E
41Horizontal X1# a E E
acced553
MN
42LinIpolDeinterlace e E E*
43CubicIpolDeinterlace a e e*
44LinBlendDeinterlace e E E*
9b1663fc 45MedianDeinterlace# E Ec Ec
20646267 46TempDeNoiser# E e e Ec
d5a1a995 47
117e45b0
MN
48* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 50E = Exact implementation
acced553 51e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
52a = alternative / approximate impl
53c = checked against the other implementations (-vo md5)
b0ac780a 54p = partially optimized, still some work to do
3057fa66
A
55*/
56
57/*
58TODO:
3057fa66 59reduce the time wasted on the mem transfer
3057fa66 60unroll stuff if instructions depend too much on the prior one
3057fa66 61move YScale thing to the end instead of fixing QP
13e00528 62write a faster and higher quality deblocking filter :)
d5a1a995
MN
63make the mainloop more flexible (variable number of blocks at once
64 (the if/else stuff per block is slowing things down)
9f45d04d 65compare the quality & speed of all filters
9f45d04d 66split this huge file
8405b3fd 67optimize c versions
117e45b0 68try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 69...
13e00528
A
70*/
71
a6be8111 72//Changelog: use the CVS log
3057fa66 73
9858f773 74#include "config.h"
3057fa66
A
75#include <inttypes.h>
76#include <stdio.h>
d5a1a995 77#include <stdlib.h>
911879d1 78#include <string.h>
dda87e9f
PL
79#ifdef HAVE_MALLOC_H
80#include <malloc.h>
81#endif
3057fa66 82//#undef HAVE_MMX2
13e00528 83//#define HAVE_3DNOW
3057fa66 84//#undef HAVE_MMX
cc9b0679 85//#undef ARCH_X86
7f16f6e6 86//#define DEBUG_BRIGHTNESS
bba9b16c 87#ifdef USE_FASTMEMCPY
0a87c409 88#include "fastmemcpy.h"
70d4f2da 89#endif
13e00528 90#include "postprocess.h"
c41d972d 91#include "postprocess_internal.h"
bba9b16c
MN
92
93#include "mangle.h" //FIXME should be supressed
3057fa66 94
a7b2871c
RD
95#ifdef HAVE_ALTIVEC_H
96#include <altivec.h>
97#endif
98
ca390e72
ZK
99#ifndef HAVE_MEMALIGN
100#define memalign(a,b) malloc(b)
101#endif
102
e939e1c3
A
103#define MIN(a,b) ((a) > (b) ? (b) : (a))
104#define MAX(a,b) ((a) < (b) ? (b) : (a))
105#define ABS(a) ((a) > 0 ? (a) : (-(a)))
106#define SIGN(a) ((a) > 0 ? 1 : -1)
107
911879d1
MN
108#define GET_MODE_BUFFER_SIZE 500
109#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
110#define BLOCK_SIZE 8
111#define TEMP_STRIDE 8
112//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 113
3f1d4e96
DB
114#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
115# define attribute_used __attribute__((used))
12eebd26 116# define always_inline __attribute__((always_inline)) inline
3f1d4e96
DB
117#else
118# define attribute_used
12eebd26 119# define always_inline inline
3f1d4e96
DB
120#endif
121
053dea12 122#if defined(ARCH_X86) || defined(ARCH_X86_64)
3f1d4e96 123static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
39d89b69 124static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
3f1d4e96
DB
125static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
126static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
127static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
128static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
129static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
130static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 131#endif
3057fa66 132
134eb1e5
MN
133static uint8_t clip_table[3*256];
134static uint8_t * const clip_tab= clip_table + 256;
135
4df8ca9d 136static const int verbose= 0;
45b4f285 137
3f1d4e96 138static const int attribute_used deringThreshold= 20;
3057fa66 139
9c9e467d 140
911879d1
MN
141static struct PPFilter filters[]=
142{
143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
9c9e467d
MN
145/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
911879d1
MN
147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
12eebd26
MN
149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
911879d1
MN
151 {"dr", "dering", 1, 5, 6, DERING},
152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
43d52f76
MN
153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
9c9e467d 157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
134eb1e5 158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
117e45b0 159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
8aaac435 160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
911879d1
MN
161 {NULL, NULL,0,0,0,0} //End Marker
162};
163
164static char *replaceTable[]=
165{
12eebd26
MN
166 "default", "hdeblock:a,vdeblock:a,dering:a",
167 "de", "hdeblock:a,vdeblock:a,dering:a",
168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
170 "ac", "ha:a:128:7,va:a,dering:a",
911879d1
MN
171 NULL //End Marker
172};
173
3057fa66 174
053dea12 175#if defined(ARCH_X86) || defined(ARCH_X86_64)
3057fa66
A
176static inline void prefetchnta(void *p)
177{
178 asm volatile( "prefetchnta (%0)\n\t"
179 : : "r" (p)
180 );
181}
182
183static inline void prefetcht0(void *p)
184{
185 asm volatile( "prefetcht0 (%0)\n\t"
186 : : "r" (p)
187 );
188}
189
190static inline void prefetcht1(void *p)
191{
192 asm volatile( "prefetcht1 (%0)\n\t"
193 : : "r" (p)
194 );
195}
196
197static inline void prefetcht2(void *p)
198{
199 asm volatile( "prefetcht2 (%0)\n\t"
200 : : "r" (p)
201 );
202}
9a722af7 203#endif
3057fa66 204
cc9b0679 205// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 206
cf5ec61d
MN
207/**
208 * Check if the given 8x8 Block is mostly "flat"
209 */
b0ac780a 210static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d
MN
211{
212 int numEq= 0;
213 int y;
0426af31 214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
ec487e5d 215 const int dcThreshold= dcOffset*2 + 1;
0426af31 216
cf5ec61d
MN
217 for(y=0; y<BLOCK_SIZE; y++)
218 {
9c9e467d
MN
219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
226 src+= stride;
227 }
228 return numEq > c->ppMode.flatnessThreshold;
229}
230
231/**
232 * Check if the middle 8x8 Block in the given 8x16 block is flat
233 */
234static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
235 int numEq= 0;
236 int y;
0426af31 237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
ec487e5d 238 const int dcThreshold= dcOffset*2 + 1;
0426af31 239
9c9e467d
MN
240 src+= stride*4; // src points to begin of the 8x8 Block
241 for(y=0; y<BLOCK_SIZE-1; y++)
242 {
243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
cf5ec61d
MN
251 src+= stride;
252 }
9c9e467d 253 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
254}
255
b0ac780a 256static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 257{
cb482d25
MN
258 int i;
259#if 1
260 for(i=0; i<2; i++){
261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
262 src += stride;
263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
264 src += stride;
265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
266 src += stride;
267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
268 src += stride;
269 }
115329f1 270#else
cb482d25
MN
271 for(i=0; i<8; i++){
272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
273 src += stride;
274 }
275#endif
276 return 1;
277}
cf5ec61d 278
cb482d25
MN
279static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
280{
281#if 1
282#if 1
283 int x;
284 src+= stride*4;
285 for(x=0; x<BLOCK_SIZE; x+=4)
286 {
287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
291 }
292#else
293 int x;
294 src+= stride*3;
295 for(x=0; x<BLOCK_SIZE; x++)
296 {
297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
298 }
299#endif
300 return 1;
301#else
302 int x;
303 src+= stride*4;
304 for(x=0; x<BLOCK_SIZE; x++)
305 {
306 int min=255;
307 int max=0;
308 int y;
309 for(y=0; y<8; y++){
310 int v= src[x + y*stride];
311 if(v>max) max=v;
312 if(v<min) min=v;
313 }
314 if(max-min > 2*QP) return 0;
315 }
cf5ec61d 316 return 1;
cb482d25
MN
317#endif
318}
319
b0ac780a
MN
320static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
321 if( isHorizDC_C(src, stride, c) ){
322 if( isHorizMinMaxOk_C(src, stride, c->QP) )
323 return 1;
324 else
325 return 0;
326 }else{
327 return 2;
328 }
329}
330
cb482d25
MN
331static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
332 if( isVertDC_C(src, stride, c) ){
333 if( isVertMinMaxOk_C(src, stride, c->QP) )
334 return 1;
335 else
336 return 0;
337 }else{
338 return 2;
339 }
cf5ec61d
MN
340}
341
b0ac780a 342static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d
MN
343{
344 int y;
345 for(y=0; y<BLOCK_SIZE; y++)
346 {
b0ac780a 347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
cf5ec61d 348
b0ac780a 349 if(ABS(middleEnergy) < 8*c->QP)
cf5ec61d
MN
350 {
351 const int q=(dst[3] - dst[4])/2;
352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
354
355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
356 d= MAX(d, 0);
357
358 d= (5*d + 32) >> 6;
359 d*= SIGN(-middleEnergy);
360
361 if(q>0)
362 {
363 d= d<0 ? 0 : d;
364 d= d>q ? q : d;
365 }
366 else
367 {
368 d= d>0 ? 0 : d;
369 d= d<q ? q : d;
370 }
371
372 dst[3]-= d;
373 dst[4]+= d;
374 }
375 dst+= stride;
376 }
377}
378
379/**
380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
382 */
b0ac780a 383static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 384{
cf5ec61d
MN
385 int y;
386 for(y=0; y<BLOCK_SIZE; y++)
387 {
b0ac780a
MN
388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
cf5ec61d 390
8c8bbd10
MN
391 int sums[10];
392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
393 sums[1] = sums[0] - first + dst[3];
394 sums[2] = sums[1] - first + dst[4];
395 sums[3] = sums[2] - first + dst[5];
396 sums[4] = sums[3] - first + dst[6];
397 sums[5] = sums[4] - dst[0] + dst[7];
398 sums[6] = sums[5] - dst[1] + last;
399 sums[7] = sums[6] - dst[2] + last;
400 sums[8] = sums[7] - dst[3] + last;
401 sums[9] = sums[8] - dst[4] + last;
402
403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
cf5ec61d
MN
411
412 dst+= stride;
413 }
414}
415
4e4dcbc5 416/**
cc9b0679
MN
417 * Experimental Filter 1 (Horizontal)
418 * will not damage linear gradients
419 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
420 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
421 * MMX2 version does correct clipping C version doesnt
422 * not identical with the vertical one
4e4dcbc5 423 */
cc9b0679
MN
424static inline void horizX1Filter(uint8_t *src, int stride, int QP)
425{
117e45b0 426 int y;
cc9b0679
MN
427 static uint64_t *lut= NULL;
428 if(lut==NULL)
117e45b0 429 {
cc9b0679
MN
430 int i;
431 lut= (uint64_t*)memalign(8, 256*8);
432 for(i=0; i<256; i++)
117e45b0 433 {
cc9b0679 434 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 435/*
cc9b0679
MN
436//Simulate 112242211 9-Tap filter
437 uint64_t a= (v/16) & 0xFF;
438 uint64_t b= (v/8) & 0xFF;
439 uint64_t c= (v/4) & 0xFF;
440 uint64_t d= (3*v/8) & 0xFF;
117e45b0 441*/
cc9b0679
MN
442//Simulate piecewise linear interpolation
443 uint64_t a= (v/16) & 0xFF;
444 uint64_t b= (v*3/16) & 0xFF;
445 uint64_t c= (v*5/16) & 0xFF;
446 uint64_t d= (7*v/16) & 0xFF;
447 uint64_t A= (0x100 - a)&0xFF;
448 uint64_t B= (0x100 - b)&0xFF;
449 uint64_t C= (0x100 - c)&0xFF;
450 uint64_t D= (0x100 - c)&0xFF;
451
452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
453 (D<<24) | (C<<16) | (B<<8) | (A);
454 //lut[i] = (v<<32) | (v<<24);
117e45b0
MN
455 }
456 }
cc9b0679
MN
457
458 for(y=0; y<BLOCK_SIZE; y++)
117e45b0 459 {
cc9b0679
MN
460 int a= src[1] - src[2];
461 int b= src[3] - src[4];
462 int c= src[5] - src[6];
463
464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
465
466 if(d < QP)
117e45b0 467 {
cc9b0679
MN
468 int v = d * SIGN(-b);
469
470 src[1] +=v/8;
471 src[2] +=v/4;
472 src[3] +=3*v/8;
473 src[4] -=3*v/8;
474 src[5] -=v/4;
475 src[6] -=v/8;
476
117e45b0 477 }
cc9b0679 478 src+=stride;
117e45b0 479 }
cc9b0679
MN
480}
481
12eebd26
MN
482/**
483 * accurate deblock filter
484 */
792a5a7c 485static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
12eebd26
MN
486 int y;
487 const int QP= c->QP;
488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
489 const int dcThreshold= dcOffset*2 + 1;
490//START_TIMER
491 src+= step*4; // src points to begin of the 8x8 Block
492 for(y=0; y<8; y++){
493 int numEq= 0;
494
495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
504 if(numEq > c->ppMode.flatnessThreshold){
505 int min, max, x;
115329f1 506
12eebd26
MN
507 if(src[0] > src[step]){
508 max= src[0];
509 min= src[step];
510 }else{
511 max= src[step];
512 min= src[0];
513 }
514 for(x=2; x<8; x+=2){
515 if(src[x*step] > src[(x+1)*step]){
516 if(src[x *step] > max) max= src[ x *step];
517 if(src[(x+1)*step] < min) min= src[(x+1)*step];
518 }else{
519 if(src[(x+1)*step] > max) max= src[(x+1)*step];
520 if(src[ x *step] < min) min= src[ x *step];
521 }
522 }
523 if(max-min < 2*QP){
524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
115329f1 526
12eebd26
MN
527 int sums[10];
528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
529 sums[1] = sums[0] - first + src[3*step];
530 sums[2] = sums[1] - first + src[4*step];
531 sums[3] = sums[2] - first + src[5*step];
532 sums[4] = sums[3] - first + src[6*step];
533 sums[5] = sums[4] - src[0*step] + src[7*step];
534 sums[6] = sums[5] - src[1*step] + last;
535 sums[7] = sums[6] - src[2*step] + last;
536 sums[8] = sums[7] - src[3*step] + last;
537 sums[9] = sums[8] - src[4*step] + last;
538
539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
547 }
548 }else{
549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
550
551 if(ABS(middleEnergy) < 8*QP)
552 {
553 const int q=(src[3*step] - src[4*step])/2;
554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
556
557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
558 d= MAX(d, 0);
115329f1 559
12eebd26
MN
560 d= (5*d + 32) >> 6;
561 d*= SIGN(-middleEnergy);
115329f1 562
12eebd26
MN
563 if(q>0)
564 {
565 d= d<0 ? 0 : d;
566 d= d>q ? q : d;
567 }
568 else
569 {
570 d= d>0 ? 0 : d;
571 d= d<q ? q : d;
572 }
115329f1 573
12eebd26
MN
574 src[3*step]-= d;
575 src[4*step]+= d;
576 }
577 }
578
579 src += stride;
580 }
581/*if(step==16){
582 STOP_TIMER("step16")
583}else{
584 STOP_TIMER("stepX")
585}*/
586}
cc9b0679 587
e89952aa 588//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 589//Plain C versions
e89952aa
MN
590#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
591#define COMPILE_C
592#endif
593
b0ac780a
MN
594#ifdef ARCH_POWERPC
595#ifdef HAVE_ALTIVEC
596#define COMPILE_ALTIVEC
b0ac780a
MN
597#endif //HAVE_ALTIVEC
598#endif //ARCH_POWERPC
599
053dea12 600#if defined(ARCH_X86) || defined(ARCH_X86_64)
e89952aa
MN
601
602#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
603#define COMPILE_MMX
604#endif
605
606#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
607#define COMPILE_MMX2
608#endif
609
610#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
611#define COMPILE_3DNOW
612#endif
9c9e467d 613#endif //ARCH_X86
e89952aa
MN
614
615#undef HAVE_MMX
616#undef HAVE_MMX2
617#undef HAVE_3DNOW
b0ac780a 618#undef HAVE_ALTIVEC
e89952aa
MN
619
620#ifdef COMPILE_C
cc9b0679
MN
621#undef HAVE_MMX
622#undef HAVE_MMX2
623#undef HAVE_3DNOW
cc9b0679
MN
624#define RENAME(a) a ## _C
625#include "postprocess_template.c"
e89952aa 626#endif
cc9b0679 627
b0ac780a
MN
628#ifdef ARCH_POWERPC
629#ifdef COMPILE_ALTIVEC
630#undef RENAME
631#define HAVE_ALTIVEC
632#define RENAME(a) a ## _altivec
633#include "postprocess_altivec_template.c"
634#include "postprocess_template.c"
635#endif
636#endif //ARCH_POWERPC
637
cc9b0679 638//MMX versions
e89952aa 639#ifdef COMPILE_MMX
cc9b0679
MN
640#undef RENAME
641#define HAVE_MMX
642#undef HAVE_MMX2
643#undef HAVE_3DNOW
cc9b0679
MN
644#define RENAME(a) a ## _MMX
645#include "postprocess_template.c"
e89952aa 646#endif
cc9b0679
MN
647
648//MMX2 versions
e89952aa 649#ifdef COMPILE_MMX2
cc9b0679
MN
650#undef RENAME
651#define HAVE_MMX
652#define HAVE_MMX2
653#undef HAVE_3DNOW
cc9b0679
MN
654#define RENAME(a) a ## _MMX2
655#include "postprocess_template.c"
e89952aa 656#endif
cc9b0679
MN
657
658//3DNOW versions
e89952aa 659#ifdef COMPILE_3DNOW
cc9b0679
MN
660#undef RENAME
661#define HAVE_MMX
662#undef HAVE_MMX2
663#define HAVE_3DNOW
cc9b0679
MN
664#define RENAME(a) a ## _3DNow
665#include "postprocess_template.c"
e89952aa 666#endif
cc9b0679
MN
667
668// minor note: the HAVE_xyz is messed up after that line so dont use it
669
670static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
c41d972d 671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 672{
9c9e467d 673 PPContext *c= (PPContext *)vc;
c41d972d 674 PPMode *ppMode= (PPMode *)vm;
9c9e467d
MN
675 c->ppMode= *ppMode; //FIXME
676
cc9b0679
MN
677 // useing ifs here as they are faster than function pointers allthough the
678 // difference wouldnt be messureable here but its much better because
679 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 680#ifdef RUNTIME_CPUDETECT
053dea12 681#if defined(ARCH_X86) || defined(ARCH_X86_64)
cc9b0679 682 // ordered per speed fasterst first
fa6ea14e 683 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
9c9e467d 684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
9c9e467d 686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 687 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
9c9e467d 688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 689 else
9c9e467d 690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 691#else
b0ac780a
MN
692#ifdef ARCH_POWERPC
693#ifdef HAVE_ALTIVEC
71487254 694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
b0ac780a
MN
695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
696 else
697#endif
698#endif
9c9e467d 699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 700#endif
e89952aa
MN
701#else //RUNTIME_CPUDETECT
702#ifdef HAVE_MMX2
9c9e467d 703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 704#elif defined (HAVE_3DNOW)
9c9e467d 705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 706#elif defined (HAVE_MMX)
9c9e467d 707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
708#elif defined (HAVE_ALTIVEC)
709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 710#else
9c9e467d 711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
712#endif
713#endif //!RUNTIME_CPUDETECT
117e45b0
MN
714}
715
cc9b0679
MN
716//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
717// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 718
911879d1 719/* -pp Command line Help
911879d1 720*/
4407a3c4 721char *pp_help=
bf69c4e5 722"Available postprocessing filters:\n"
4b001a13
MN
723"Filters Options\n"
724"short long name short long option Description\n"
6423d073
MM
725"* * a autoq CPU power dependent enabler\n"
726" c chrom chrominance filtering enabled\n"
727" y nochrom chrominance filtering disabled\n"
3da3c8d6 728" n noluma luma filtering disabled\n"
6423d073 729"hb hdeblock (2 threshold) horizontal deblocking filter\n"
68bf295e
MN
730" 1. difference factor: default=32, higher -> more deblocking\n"
731" 2. flatness threshold: default=39, lower -> more deblocking\n"
4b001a13 732" the h & v deblocking filters share these\n"
6423d073
MM
733" so you can't set different thresholds for h / v\n"
734"vb vdeblock (2 threshold) vertical deblocking filter\n"
12eebd26
MN
735"ha hadeblock (2 threshold) horizontal deblocking filter\n"
736"va vadeblock (2 threshold) vertical deblocking filter\n"
6423d073
MM
737"h1 x1hdeblock experimental h deblock filter 1\n"
738"v1 x1vdeblock experimental v deblock filter 1\n"
739"dr dering deringing filter\n"
4b001a13
MN
740"al autolevels automatic brightness / contrast\n"
741" f fullyrange stretch luminance to (0..255)\n"
742"lb linblenddeint linear blend deinterlacer\n"
743"li linipoldeint linear interpolating deinterlace\n"
744"ci cubicipoldeint cubic interpolating deinterlacer\n"
745"md mediandeint median deinterlacer\n"
9c9e467d 746"fd ffmpegdeint ffmpeg deinterlacer\n"
47d91825 747"l5 lowpass5 FIR lowpass deinterlacer\n"
12eebd26
MN
748"de default hb:a,vb:a,dr:a\n"
749"fa fast h1:a,v1:a,dr:a\n"
49d40989 750"ac ha:a:128:7,va:a,dr:a\n"
6423d073 751"tn tmpnoise (3 threshold) temporal noise reducer\n"
4b001a13 752" 1. <= 2. <= 3. larger -> stronger filtering\n"
6423d073 753"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
754"Usage:\n"
755"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
756"long form example:\n"
757"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
758"short form example:\n"
759"vb:a/hb:a/lb de,-vb\n"
760"more examples:\n"
761"tn:64:128:256\n"
4b001a13 762;
911879d1 763
c41d972d 764pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1
MN
765{
766 char temp[GET_MODE_BUFFER_SIZE];
767 char *p= temp;
9c9e467d 768 char *filterDelimiters= ",/";
911879d1 769 char *optionDelimiters= ":";
c41d972d 770 struct PPMode *ppMode;
911879d1
MN
771 char *filterToken;
772
c41d972d 773 ppMode= memalign(8, sizeof(PPMode));
115329f1 774
c41d972d
MN
775 ppMode->lumMode= 0;
776 ppMode->chromMode= 0;
777 ppMode->maxTmpNoise[0]= 700;
778 ppMode->maxTmpNoise[1]= 1500;
779 ppMode->maxTmpNoise[2]= 3000;
780 ppMode->maxAllowedY= 234;
781 ppMode->minAllowedY= 16;
68bf295e
MN
782 ppMode->baseDcDiff= 256/8;
783 ppMode->flatnessThreshold= 56-16-1;
c41d972d
MN
784 ppMode->maxClippedThreshold= 0.01;
785 ppMode->error=0;
df8d4d0e 786
911879d1
MN
787 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
788
162c9c2e 789 if(verbose>1) printf("pp: %s\n", name);
117e45b0 790
911879d1 791 for(;;){
911879d1 792 char *filterName;
326d40af 793 int q= 1000000; //PP_QUALITY_MAX;
911879d1 794 int chrom=-1;
3da3c8d6 795 int luma=-1;
911879d1
MN
796 char *option;
797 char *options[OPTIONS_ARRAY_SIZE];
798 int i;
799 int filterNameOk=0;
800 int numOfUnknownOptions=0;
801 int enable=1; //does the user want us to enabled or disabled the filter
802
803 filterToken= strtok(p, filterDelimiters);
804 if(filterToken == NULL) break;
117e45b0 805 p+= strlen(filterToken) + 1; // p points to next filterToken
911879d1 806 filterName= strtok(filterToken, optionDelimiters);
162c9c2e 807 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
911879d1
MN
808
809 if(*filterName == '-')
810 {
811 enable=0;
812 filterName++;
813 }
117e45b0 814
911879d1
MN
815 for(;;){ //for all options
816 option= strtok(NULL, optionDelimiters);
817 if(option == NULL) break;
818
162c9c2e 819 if(verbose>1) printf("pp: option: %s\n", option);
911879d1
MN
820 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
821 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
822 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
3da3c8d6 823 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
911879d1
MN
824 else
825 {
826 options[numOfUnknownOptions] = option;
827 numOfUnknownOptions++;
911879d1
MN
828 }
829 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
830 }
117e45b0 831 options[numOfUnknownOptions] = NULL;
911879d1
MN
832
833 /* replace stuff from the replace Table */
834 for(i=0; replaceTable[2*i]!=NULL; i++)
835 {
836 if(!strcmp(replaceTable[2*i], filterName))
837 {
838 int newlen= strlen(replaceTable[2*i + 1]);
839 int plen;
840 int spaceLeft;
841
842 if(p==NULL) p= temp, *p=0; //last filter
843 else p--, *p=','; //not last filter
844
845 plen= strlen(p);
8cd91a44 846 spaceLeft= p - temp + plen;
911879d1
MN
847 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
848 {
c41d972d 849 ppMode->error++;
911879d1
MN
850 break;
851 }
852 memmove(p + newlen, p, plen+1);
853 memcpy(p, replaceTable[2*i + 1], newlen);
854 filterNameOk=1;
855 }
856 }
857
858 for(i=0; filters[i].shortName!=NULL; i++)
859 {
117e45b0 860// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
911879d1
MN
861 if( !strcmp(filters[i].longName, filterName)
862 || !strcmp(filters[i].shortName, filterName))
863 {
c41d972d
MN
864 ppMode->lumMode &= ~filters[i].mask;
865 ppMode->chromMode &= ~filters[i].mask;
911879d1
MN
866
867 filterNameOk=1;
868 if(!enable) break; // user wants to disable it
869
3da3c8d6 870 if(q >= filters[i].minLumQuality && luma)
c41d972d 871 ppMode->lumMode|= filters[i].mask;
911879d1
MN
872 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
873 if(q >= filters[i].minChromQuality)
c41d972d 874 ppMode->chromMode|= filters[i].mask;
911879d1
MN
875
876 if(filters[i].mask == LEVEL_FIX)
877 {
878 int o;
c41d972d
MN
879 ppMode->minAllowedY= 16;
880 ppMode->maxAllowedY= 234;
911879d1 881 for(o=0; options[o]!=NULL; o++)
07f8991b 882 {
911879d1
MN
883 if( !strcmp(options[o],"fullyrange")
884 ||!strcmp(options[o],"f"))
885 {
c41d972d
MN
886 ppMode->minAllowedY= 0;
887 ppMode->maxAllowedY= 255;
911879d1
MN
888 numOfUnknownOptions--;
889 }
07f8991b 890 }
911879d1 891 }
117e45b0
MN
892 else if(filters[i].mask == TEMP_NOISE_FILTER)
893 {
894 int o;
895 int numOfNoises=0;
117e45b0
MN
896
897 for(o=0; options[o]!=NULL; o++)
898 {
899 char *tail;
c41d972d 900 ppMode->maxTmpNoise[numOfNoises]=
117e45b0
MN
901 strtol(options[o], &tail, 0);
902 if(tail!=options[o])
903 {
904 numOfNoises++;
905 numOfUnknownOptions--;
906 if(numOfNoises >= 3) break;
907 }
908 }
909 }
115329f1 910 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
12eebd26 911 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
43d52f76
MN
912 {
913 int o;
914
915 for(o=0; options[o]!=NULL && o<2; o++)
916 {
917 char *tail;
918 int val= strtol(options[o], &tail, 0);
919 if(tail==options[o]) break;
920
921 numOfUnknownOptions--;
c41d972d
MN
922 if(o==0) ppMode->baseDcDiff= val;
923 else ppMode->flatnessThreshold= val;
43d52f76
MN
924 }
925 }
8aaac435
MN
926 else if(filters[i].mask == FORCE_QUANT)
927 {
928 int o;
c41d972d 929 ppMode->forcedQuant= 15;
8aaac435
MN
930
931 for(o=0; options[o]!=NULL && o<1; o++)
932 {
933 char *tail;
934 int val= strtol(options[o], &tail, 0);
935 if(tail==options[o]) break;
936
937 numOfUnknownOptions--;
c41d972d 938 ppMode->forcedQuant= val;
8aaac435
MN
939 }
940 }
911879d1
MN
941 }
942 }
c41d972d
MN
943 if(!filterNameOk) ppMode->error++;
944 ppMode->error += numOfUnknownOptions;
911879d1
MN
945 }
946
c41d972d
MN
947 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
948 if(ppMode->error)
949 {
950 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
951 free(ppMode);
952 return NULL;
953 }
911879d1
MN
954 return ppMode;
955}
956
c41d972d
MN
957void pp_free_mode(pp_mode_t *mode){
958 if(mode) free(mode);
959}
960
88c0bc7e
MN
961static void reallocAlign(void **p, int alignment, int size){
962 if(*p) free(*p);
963 *p= memalign(alignment, size);
964 memset(*p, 0, size);
965}
966
0426af31 967static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
ec487e5d
MN
968 int mbWidth = (width+15)>>4;
969 int mbHeight= (height+15)>>4;
88c0bc7e
MN
970 int i;
971
972 c->stride= stride;
0426af31 973 c->qpStride= qpStride;
9c9e467d 974
88c0bc7e
MN
975 reallocAlign((void **)&c->tempDst, 8, stride*24);
976 reallocAlign((void **)&c->tempSrc, 8, stride*24);
977 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
978 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
9c9e467d
MN
979 for(i=0; i<256; i++)
980 c->yHistogram[i]= width*height/64*15/256;
981
982 for(i=0; i<3; i++)
211c4920 983 {
9c9e467d 984 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
88c0bc7e
MN
985 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
986 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
211c4920 987 }
45b4f285 988
134eb1e5 989 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
0426af31
MN
990 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
991 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
88c0bc7e
MN
992 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
993}
994
4cfbf61b 995static void global_init(void){
134eb1e5
MN
996 int i;
997 memset(clip_table, 0, 256);
998 for(i=256; i<512; i++)
999 clip_table[i]= i;
1000 memset(clip_table+512, 0, 256);
1001}
1002
88c0bc7e
MN
1003pp_context_t *pp_get_context(int width, int height, int cpuCaps){
1004 PPContext *c= memalign(32, sizeof(PPContext));
88c0bc7e 1005 int stride= (width+15)&(~15); //assumed / will realloc if needed
0426af31 1006 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 1007
134eb1e5
MN
1008 global_init();
1009
88c0bc7e
MN
1010 memset(c, 0, sizeof(PPContext));
1011 c->cpuCaps= cpuCaps;
e9effafd
MN
1012 if(cpuCaps&PP_FORMAT){
1013 c->hChromaSubSample= cpuCaps&0x3;
1014 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1015 }else{
1016 c->hChromaSubSample= 1;
1017 c->vChromaSubSample= 1;
1018 }
88c0bc7e 1019
0426af31 1020 reallocBuffers(c, width, height, stride, qpStride);
115329f1 1021
9c9e467d 1022 c->frameNum=-1;
45b4f285 1023
9c9e467d 1024 return c;
45b4f285
MN
1025}
1026
9cb54f43 1027void pp_free_context(void *vc){
9c9e467d
MN
1028 PPContext *c = (PPContext*)vc;
1029 int i;
115329f1 1030
9c9e467d
MN
1031 for(i=0; i<3; i++) free(c->tempBlured[i]);
1032 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
115329f1 1033
9c9e467d
MN
1034 free(c->tempBlocks);
1035 free(c->yHistogram);
1036 free(c->tempDst);
1037 free(c->tempSrc);
9c9e467d 1038 free(c->deintTemp);
0426af31 1039 free(c->stdQPTable);
ec487e5d 1040 free(c->nonBQPTable);
88c0bc7e 1041 free(c->forcedQPTable);
115329f1 1042
88c0bc7e
MN
1043 memset(c, 0, sizeof(PPContext));
1044
9c9e467d
MN
1045 free(c);
1046}
1047
9cb54f43 1048void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1049 uint8_t * dst[3], int dstStride[3],
ec487e5d 1050 int width, int height,
9c9e467d 1051 QP_STORE_T *QP_store, int QPStride,
c41d972d 1052 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1053{
ec487e5d
MN
1054 int mbWidth = (width+15)>>4;
1055 int mbHeight= (height+15)>>4;
c41d972d 1056 PPMode *mode = (PPMode*)vm;
ec487e5d 1057 PPContext *c = (PPContext*)vc;
655f688d
JM
1058 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1059 int absQPStride = ABS(QPStride);
0426af31 1060
655f688d
JM
1061 // c->stride and c->QPStride are always positive
1062 if(c->stride < minStride || c->qpStride < absQPStride)
115329f1
DB
1063 reallocBuffers(c, width, height,
1064 MAX(minStride, c->stride),
655f688d 1065 MAX(c->qpStride, absQPStride));
9c9e467d 1066
115329f1 1067 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
815cbfe7 1068 {
8aaac435 1069 int i;
88c0bc7e 1070 QP_store= c->forcedQPTable;
655f688d 1071 absQPStride = QPStride = 0;
8aaac435 1072 if(mode->lumMode & FORCE_QUANT)
88c0bc7e 1073 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
8aaac435 1074 else
88c0bc7e 1075 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
815cbfe7 1076 }
0426af31
MN
1077//printf("pict_type:%d\n", pict_type);
1078
1079 if(pict_type & PP_PICT_TYPE_QP2){
1080 int i;
655f688d 1081 const int count= mbHeight * absQPStride;
0426af31
MN
1082 for(i=0; i<(count>>2); i++){
1083 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1084 }
1085 for(i<<=2; i<count; i++){
1086 c->stdQPTable[i] = QP_store[i]>>1;
1087 }
1088 QP_store= c->stdQPTable;
115329f1 1089 QPStride= absQPStride;
0426af31
MN
1090 }
1091
ec487e5d
MN
1092if(0){
1093int x,y;
1094for(y=0; y<mbHeight; y++){
1095 for(x=0; x<mbWidth; x++){
1096 printf("%2d ", QP_store[x + y*QPStride]);
1097 }
1098 printf("\n");
1099}
1100 printf("\n");
1101}
51e19dcc 1102
0426af31 1103 if((pict_type&7)!=3)
ec487e5d 1104 {
655f688d
JM
1105 if (QPStride >= 0) {
1106 int i;
1107 const int count= mbHeight * QPStride;
1108 for(i=0; i<(count>>2); i++){
1109 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1110 }
1111 for(i<<=2; i<count; i++){
1112 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1113 }
1114 } else {
1115 int i,j;
1116 for(i=0; i<mbHeight; i++) {
1117 for(j=0; j<absQPStride; j++) {
1118 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1119 }
1120 }
ec487e5d
MN
1121 }
1122 }
815cbfe7 1123
df8d4d0e 1124 if(verbose>2)
162c9c2e
MN
1125 {
1126 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
162c9c2e
MN
1127 }
1128
9c9e467d 1129 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
b2a3fcb7 1130 width, height, QP_store, QPStride, 0, mode, c);
911879d1 1131
e9effafd
MN
1132 width = (width )>>c->hChromaSubSample;
1133 height = (height)>>c->vChromaSubSample;
911879d1 1134
4e1349d4
MN
1135 if(mode->chromMode)
1136 {
9c9e467d 1137 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
b2a3fcb7 1138 width, height, QP_store, QPStride, 1, mode, c);
9c9e467d 1139 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
b2a3fcb7 1140 width, height, QP_store, QPStride, 2, mode, c);
4e1349d4 1141 }
9c9e467d 1142 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
4e1349d4 1143 {
655f688d
JM
1144 linecpy(dst[1], src[1], height, srcStride[1]);
1145 linecpy(dst[2], src[2], height, srcStride[2]);
4e1349d4
MN
1146 }
1147 else
1148 {
1149 int y;
ec487e5d 1150 for(y=0; y<height; y++)
4e1349d4 1151 {
ec487e5d
MN
1152 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1153 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
4e1349d4
MN
1154 }
1155 }
911879d1
MN
1156}
1157