spaces in path fixes
[libav.git] / libavcodec / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
9858f773 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3057fa66 3
b0ac780a
MN
4 AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
3057fa66
A
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19*/
20
b304569a
MN
21/**
22 * @file postprocess.c
23 * postprocessing.
24 */
25
3057fa66 26/*
b0ac780a
MN
27 C MMX MMX2 3DNow AltiVec
28isVertDC Ec Ec Ec
29isVertMinMaxOk Ec Ec Ec
30doVertLowPass E e e Ec
31doVertDefFilter Ec Ec e e Ec
20646267
RD
32isHorizDC Ec Ec Ec
33isHorizMinMaxOk a E Ec
34doHorizLowPass E e e Ec
35doHorizDefFilter Ec Ec e e Ec
792a5a7c 36do_a_deblock Ec E Ec E
b0ac780a 37deRing E e e* Ecp
3b58b885 38Vertical RKAlgo1 E a a
e5c30e06 39Horizontal RKAlgo1 a a
117e45b0
MN
40Vertical X1# a E E
41Horizontal X1# a E E
acced553
MN
42LinIpolDeinterlace e E E*
43CubicIpolDeinterlace a e e*
44LinBlendDeinterlace e E E*
9b1663fc 45MedianDeinterlace# E Ec Ec
20646267 46TempDeNoiser# E e e Ec
d5a1a995 47
117e45b0
MN
48* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 50E = Exact implementation
acced553 51e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
52a = alternative / approximate impl
53c = checked against the other implementations (-vo md5)
b0ac780a 54p = partially optimized, still some work to do
3057fa66
A
55*/
56
57/*
58TODO:
3057fa66 59reduce the time wasted on the mem transfer
3057fa66 60unroll stuff if instructions depend too much on the prior one
3057fa66 61move YScale thing to the end instead of fixing QP
13e00528 62write a faster and higher quality deblocking filter :)
d5a1a995
MN
63make the mainloop more flexible (variable number of blocks at once
64 (the if/else stuff per block is slowing things down)
9f45d04d 65compare the quality & speed of all filters
9f45d04d 66split this huge file
8405b3fd 67optimize c versions
117e45b0 68try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 69...
13e00528
A
70*/
71
a6be8111 72//Changelog: use the CVS log
3057fa66 73
9858f773 74#include "config.h"
3057fa66
A
75#include <inttypes.h>
76#include <stdio.h>
d5a1a995 77#include <stdlib.h>
911879d1 78#include <string.h>
dda87e9f
PL
79#ifdef HAVE_MALLOC_H
80#include <malloc.h>
81#endif
3057fa66 82//#undef HAVE_MMX2
13e00528 83//#define HAVE_3DNOW
3057fa66 84//#undef HAVE_MMX
cc9b0679 85//#undef ARCH_X86
7f16f6e6 86//#define DEBUG_BRIGHTNESS
bba9b16c 87#ifdef USE_FASTMEMCPY
0a87c409 88#include "fastmemcpy.h"
70d4f2da 89#endif
13e00528 90#include "postprocess.h"
c41d972d 91#include "postprocess_internal.h"
bba9b16c
MN
92
93#include "mangle.h" //FIXME should be supressed
3057fa66 94
a7b2871c
RD
95#ifdef HAVE_ALTIVEC_H
96#include <altivec.h>
97#endif
98
ca390e72
ZK
99#ifndef HAVE_MEMALIGN
100#define memalign(a,b) malloc(b)
101#endif
102
e939e1c3
A
103#define MIN(a,b) ((a) > (b) ? (b) : (a))
104#define MAX(a,b) ((a) < (b) ? (b) : (a))
105#define ABS(a) ((a) > 0 ? (a) : (-(a)))
106#define SIGN(a) ((a) > 0 ? 1 : -1)
107
911879d1
MN
108#define GET_MODE_BUFFER_SIZE 500
109#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
110#define BLOCK_SIZE 8
111#define TEMP_STRIDE 8
112//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 113
3f1d4e96
DB
114#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
115# define attribute_used __attribute__((used))
12eebd26 116# define always_inline __attribute__((always_inline)) inline
3f1d4e96
DB
117#else
118# define attribute_used
12eebd26 119# define always_inline inline
3f1d4e96
DB
120#endif
121
053dea12 122#if defined(ARCH_X86) || defined(ARCH_X86_64)
3f1d4e96 123static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
39d89b69 124static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
3f1d4e96
DB
125static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
126static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
127static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
128static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
129static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
130static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 131#endif
3057fa66 132
134eb1e5
MN
133static uint8_t clip_table[3*256];
134static uint8_t * const clip_tab= clip_table + 256;
135
4df8ca9d 136static const int verbose= 0;
45b4f285 137
3f1d4e96 138static const int attribute_used deringThreshold= 20;
3057fa66 139
9c9e467d 140
911879d1
MN
141static struct PPFilter filters[]=
142{
143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
9c9e467d
MN
145/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
911879d1
MN
147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
12eebd26
MN
149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
911879d1
MN
151 {"dr", "dering", 1, 5, 6, DERING},
152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
43d52f76
MN
153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
9c9e467d 157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
134eb1e5 158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
117e45b0 159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
8aaac435 160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
911879d1
MN
161 {NULL, NULL,0,0,0,0} //End Marker
162};
163
164static char *replaceTable[]=
165{
12eebd26
MN
166 "default", "hdeblock:a,vdeblock:a,dering:a",
167 "de", "hdeblock:a,vdeblock:a,dering:a",
168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
170 "ac", "ha:a:128:7,va:a,dering:a",
911879d1
MN
171 NULL //End Marker
172};
173
3057fa66 174
053dea12 175#if defined(ARCH_X86) || defined(ARCH_X86_64)
3057fa66
A
176static inline void prefetchnta(void *p)
177{
178 asm volatile( "prefetchnta (%0)\n\t"
179 : : "r" (p)
180 );
181}
182
183static inline void prefetcht0(void *p)
184{
185 asm volatile( "prefetcht0 (%0)\n\t"
186 : : "r" (p)
187 );
188}
189
190static inline void prefetcht1(void *p)
191{
192 asm volatile( "prefetcht1 (%0)\n\t"
193 : : "r" (p)
194 );
195}
196
197static inline void prefetcht2(void *p)
198{
199 asm volatile( "prefetcht2 (%0)\n\t"
200 : : "r" (p)
201 );
202}
9a722af7 203#endif
3057fa66 204
cc9b0679 205// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 206
cf5ec61d
MN
207/**
208 * Check if the given 8x8 Block is mostly "flat"
209 */
b0ac780a 210static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d
MN
211{
212 int numEq= 0;
213 int y;
0426af31 214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
ec487e5d 215 const int dcThreshold= dcOffset*2 + 1;
0426af31 216
cf5ec61d
MN
217 for(y=0; y<BLOCK_SIZE; y++)
218 {
9c9e467d
MN
219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
226 src+= stride;
227 }
228 return numEq > c->ppMode.flatnessThreshold;
229}
230
231/**
232 * Check if the middle 8x8 Block in the given 8x16 block is flat
233 */
234static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
235 int numEq= 0;
236 int y;
0426af31 237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
ec487e5d 238 const int dcThreshold= dcOffset*2 + 1;
0426af31 239
9c9e467d
MN
240 src+= stride*4; // src points to begin of the 8x8 Block
241 for(y=0; y<BLOCK_SIZE-1; y++)
242 {
243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
cf5ec61d
MN
251 src+= stride;
252 }
9c9e467d 253 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
254}
255
b0ac780a 256static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 257{
cb482d25
MN
258 int i;
259#if 1
260 for(i=0; i<2; i++){
261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
262 src += stride;
263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
264 src += stride;
265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
266 src += stride;
267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
268 src += stride;
269 }
270#else
271 for(i=0; i<8; i++){
272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
273 src += stride;
274 }
275#endif
276 return 1;
277}
cf5ec61d 278
cb482d25
MN
279static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
280{
281#if 1
282#if 1
283 int x;
284 src+= stride*4;
285 for(x=0; x<BLOCK_SIZE; x+=4)
286 {
287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
291 }
292#else
293 int x;
294 src+= stride*3;
295 for(x=0; x<BLOCK_SIZE; x++)
296 {
297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
298 }
299#endif
300 return 1;
301#else
302 int x;
303 src+= stride*4;
304 for(x=0; x<BLOCK_SIZE; x++)
305 {
306 int min=255;
307 int max=0;
308 int y;
309 for(y=0; y<8; y++){
310 int v= src[x + y*stride];
311 if(v>max) max=v;
312 if(v<min) min=v;
313 }
314 if(max-min > 2*QP) return 0;
315 }
cf5ec61d 316 return 1;
cb482d25
MN
317#endif
318}
319
b0ac780a
MN
320static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
321 if( isHorizDC_C(src, stride, c) ){
322 if( isHorizMinMaxOk_C(src, stride, c->QP) )
323 return 1;
324 else
325 return 0;
326 }else{
327 return 2;
328 }
329}
330
cb482d25
MN
331static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
332 if( isVertDC_C(src, stride, c) ){
333 if( isVertMinMaxOk_C(src, stride, c->QP) )
334 return 1;
335 else
336 return 0;
337 }else{
338 return 2;
339 }
cf5ec61d
MN
340}
341
b0ac780a 342static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d
MN
343{
344 int y;
345 for(y=0; y<BLOCK_SIZE; y++)
346 {
b0ac780a 347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
cf5ec61d 348
b0ac780a 349 if(ABS(middleEnergy) < 8*c->QP)
cf5ec61d
MN
350 {
351 const int q=(dst[3] - dst[4])/2;
352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
354
355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
356 d= MAX(d, 0);
357
358 d= (5*d + 32) >> 6;
359 d*= SIGN(-middleEnergy);
360
361 if(q>0)
362 {
363 d= d<0 ? 0 : d;
364 d= d>q ? q : d;
365 }
366 else
367 {
368 d= d>0 ? 0 : d;
369 d= d<q ? q : d;
370 }
371
372 dst[3]-= d;
373 dst[4]+= d;
374 }
375 dst+= stride;
376 }
377}
378
379/**
380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
382 */
b0ac780a 383static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 384{
cf5ec61d
MN
385 int y;
386 for(y=0; y<BLOCK_SIZE; y++)
387 {
b0ac780a
MN
388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
cf5ec61d 390
8c8bbd10
MN
391 int sums[10];
392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
393 sums[1] = sums[0] - first + dst[3];
394 sums[2] = sums[1] - first + dst[4];
395 sums[3] = sums[2] - first + dst[5];
396 sums[4] = sums[3] - first + dst[6];
397 sums[5] = sums[4] - dst[0] + dst[7];
398 sums[6] = sums[5] - dst[1] + last;
399 sums[7] = sums[6] - dst[2] + last;
400 sums[8] = sums[7] - dst[3] + last;
401 sums[9] = sums[8] - dst[4] + last;
402
403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
cf5ec61d
MN
411
412 dst+= stride;
413 }
414}
415
4e4dcbc5 416/**
cc9b0679
MN
417 * Experimental Filter 1 (Horizontal)
418 * will not damage linear gradients
419 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
420 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
421 * MMX2 version does correct clipping C version doesnt
422 * not identical with the vertical one
4e4dcbc5 423 */
cc9b0679
MN
424static inline void horizX1Filter(uint8_t *src, int stride, int QP)
425{
117e45b0 426 int y;
cc9b0679
MN
427 static uint64_t *lut= NULL;
428 if(lut==NULL)
117e45b0 429 {
cc9b0679
MN
430 int i;
431 lut= (uint64_t*)memalign(8, 256*8);
432 for(i=0; i<256; i++)
117e45b0 433 {
cc9b0679 434 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 435/*
cc9b0679
MN
436//Simulate 112242211 9-Tap filter
437 uint64_t a= (v/16) & 0xFF;
438 uint64_t b= (v/8) & 0xFF;
439 uint64_t c= (v/4) & 0xFF;
440 uint64_t d= (3*v/8) & 0xFF;
117e45b0 441*/
cc9b0679
MN
442//Simulate piecewise linear interpolation
443 uint64_t a= (v/16) & 0xFF;
444 uint64_t b= (v*3/16) & 0xFF;
445 uint64_t c= (v*5/16) & 0xFF;
446 uint64_t d= (7*v/16) & 0xFF;
447 uint64_t A= (0x100 - a)&0xFF;
448 uint64_t B= (0x100 - b)&0xFF;
449 uint64_t C= (0x100 - c)&0xFF;
450 uint64_t D= (0x100 - c)&0xFF;
451
452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
453 (D<<24) | (C<<16) | (B<<8) | (A);
454 //lut[i] = (v<<32) | (v<<24);
117e45b0
MN
455 }
456 }
cc9b0679
MN
457
458 for(y=0; y<BLOCK_SIZE; y++)
117e45b0 459 {
cc9b0679
MN
460 int a= src[1] - src[2];
461 int b= src[3] - src[4];
462 int c= src[5] - src[6];
463
464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
465
466 if(d < QP)
117e45b0 467 {
cc9b0679
MN
468 int v = d * SIGN(-b);
469
470 src[1] +=v/8;
471 src[2] +=v/4;
472 src[3] +=3*v/8;
473 src[4] -=3*v/8;
474 src[5] -=v/4;
475 src[6] -=v/8;
476
117e45b0 477 }
cc9b0679 478 src+=stride;
117e45b0 479 }
cc9b0679
MN
480}
481
12eebd26
MN
482/**
483 * accurate deblock filter
484 */
792a5a7c 485static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
12eebd26
MN
486 int y;
487 const int QP= c->QP;
488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
489 const int dcThreshold= dcOffset*2 + 1;
490//START_TIMER
491 src+= step*4; // src points to begin of the 8x8 Block
492 for(y=0; y<8; y++){
493 int numEq= 0;
494
495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
504 if(numEq > c->ppMode.flatnessThreshold){
505 int min, max, x;
506
507 if(src[0] > src[step]){
508 max= src[0];
509 min= src[step];
510 }else{
511 max= src[step];
512 min= src[0];
513 }
514 for(x=2; x<8; x+=2){
515 if(src[x*step] > src[(x+1)*step]){
516 if(src[x *step] > max) max= src[ x *step];
517 if(src[(x+1)*step] < min) min= src[(x+1)*step];
518 }else{
519 if(src[(x+1)*step] > max) max= src[(x+1)*step];
520 if(src[ x *step] < min) min= src[ x *step];
521 }
522 }
523 if(max-min < 2*QP){
524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
526
527 int sums[10];
528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
529 sums[1] = sums[0] - first + src[3*step];
530 sums[2] = sums[1] - first + src[4*step];
531 sums[3] = sums[2] - first + src[5*step];
532 sums[4] = sums[3] - first + src[6*step];
533 sums[5] = sums[4] - src[0*step] + src[7*step];
534 sums[6] = sums[5] - src[1*step] + last;
535 sums[7] = sums[6] - src[2*step] + last;
536 sums[8] = sums[7] - src[3*step] + last;
537 sums[9] = sums[8] - src[4*step] + last;
538
539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
547 }
548 }else{
549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
550
551 if(ABS(middleEnergy) < 8*QP)
552 {
553 const int q=(src[3*step] - src[4*step])/2;
554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
556
557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
558 d= MAX(d, 0);
559
560 d= (5*d + 32) >> 6;
561 d*= SIGN(-middleEnergy);
562
563 if(q>0)
564 {
565 d= d<0 ? 0 : d;
566 d= d>q ? q : d;
567 }
568 else
569 {
570 d= d>0 ? 0 : d;
571 d= d<q ? q : d;
572 }
573
574 src[3*step]-= d;
575 src[4*step]+= d;
576 }
577 }
578
579 src += stride;
580 }
581/*if(step==16){
582 STOP_TIMER("step16")
583}else{
584 STOP_TIMER("stepX")
585}*/
586}
cc9b0679 587
e89952aa 588//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 589//Plain C versions
e89952aa
MN
590#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
591#define COMPILE_C
592#endif
593
b0ac780a
MN
594#ifdef ARCH_POWERPC
595#ifdef HAVE_ALTIVEC
596#define COMPILE_ALTIVEC
b0ac780a
MN
597#endif //HAVE_ALTIVEC
598#endif //ARCH_POWERPC
599
053dea12 600#if defined(ARCH_X86) || defined(ARCH_X86_64)
e89952aa
MN
601
602#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
603#define COMPILE_MMX
604#endif
605
606#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
607#define COMPILE_MMX2
608#endif
609
610#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
611#define COMPILE_3DNOW
612#endif
9c9e467d 613#endif //ARCH_X86
e89952aa
MN
614
615#undef HAVE_MMX
616#undef HAVE_MMX2
617#undef HAVE_3DNOW
b0ac780a 618#undef HAVE_ALTIVEC
e89952aa
MN
619
620#ifdef COMPILE_C
cc9b0679
MN
621#undef HAVE_MMX
622#undef HAVE_MMX2
623#undef HAVE_3DNOW
cc9b0679
MN
624#define RENAME(a) a ## _C
625#include "postprocess_template.c"
e89952aa 626#endif
cc9b0679 627
b0ac780a
MN
628#ifdef ARCH_POWERPC
629#ifdef COMPILE_ALTIVEC
630#undef RENAME
631#define HAVE_ALTIVEC
632#define RENAME(a) a ## _altivec
633#include "postprocess_altivec_template.c"
634#include "postprocess_template.c"
635#endif
636#endif //ARCH_POWERPC
637
cc9b0679 638//MMX versions
e89952aa 639#ifdef COMPILE_MMX
cc9b0679
MN
640#undef RENAME
641#define HAVE_MMX
642#undef HAVE_MMX2
643#undef HAVE_3DNOW
cc9b0679
MN
644#define RENAME(a) a ## _MMX
645#include "postprocess_template.c"
e89952aa 646#endif
cc9b0679
MN
647
648//MMX2 versions
e89952aa 649#ifdef COMPILE_MMX2
cc9b0679
MN
650#undef RENAME
651#define HAVE_MMX
652#define HAVE_MMX2
653#undef HAVE_3DNOW
cc9b0679
MN
654#define RENAME(a) a ## _MMX2
655#include "postprocess_template.c"
e89952aa 656#endif
cc9b0679
MN
657
658//3DNOW versions
e89952aa 659#ifdef COMPILE_3DNOW
cc9b0679
MN
660#undef RENAME
661#define HAVE_MMX
662#undef HAVE_MMX2
663#define HAVE_3DNOW
cc9b0679
MN
664#define RENAME(a) a ## _3DNow
665#include "postprocess_template.c"
e89952aa 666#endif
cc9b0679
MN
667
668// minor note: the HAVE_xyz is messed up after that line so dont use it
669
670static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
c41d972d 671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 672{
9c9e467d 673 PPContext *c= (PPContext *)vc;
c41d972d 674 PPMode *ppMode= (PPMode *)vm;
9c9e467d
MN
675 c->ppMode= *ppMode; //FIXME
676
cc9b0679
MN
677 // useing ifs here as they are faster than function pointers allthough the
678 // difference wouldnt be messureable here but its much better because
679 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 680#ifdef RUNTIME_CPUDETECT
053dea12 681#if defined(ARCH_X86) || defined(ARCH_X86_64)
cc9b0679 682 // ordered per speed fasterst first
fa6ea14e 683 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
9c9e467d 684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
9c9e467d 686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 687 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
9c9e467d 688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 689 else
9c9e467d 690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 691#else
b0ac780a
MN
692#ifdef ARCH_POWERPC
693#ifdef HAVE_ALTIVEC
71487254 694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
b0ac780a
MN
695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
696 else
697#endif
698#endif
9c9e467d 699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 700#endif
e89952aa
MN
701#else //RUNTIME_CPUDETECT
702#ifdef HAVE_MMX2
9c9e467d 703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 704#elif defined (HAVE_3DNOW)
9c9e467d 705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 706#elif defined (HAVE_MMX)
9c9e467d 707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
708#elif defined (HAVE_ALTIVEC)
709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 710#else
9c9e467d 711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
712#endif
713#endif //!RUNTIME_CPUDETECT
117e45b0
MN
714}
715
cc9b0679
MN
716//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
717// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 718
911879d1 719/* -pp Command line Help
911879d1 720*/
4407a3c4 721char *pp_help=
b01be121 722"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
4b001a13 723"long form example:\n"
b01be121 724"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
4b001a13 725"short form example:\n"
b01be121 726"vb:a/hb:a/lb de,-vb\n"
4b001a13 727"more examples:\n"
1d9324fd 728"tn:64:128:256\n"
4b001a13
MN
729"Filters Options\n"
730"short long name short long option Description\n"
6423d073
MM
731"* * a autoq CPU power dependent enabler\n"
732" c chrom chrominance filtering enabled\n"
733" y nochrom chrominance filtering disabled\n"
734"hb hdeblock (2 threshold) horizontal deblocking filter\n"
68bf295e
MN
735" 1. difference factor: default=32, higher -> more deblocking\n"
736" 2. flatness threshold: default=39, lower -> more deblocking\n"
4b001a13 737" the h & v deblocking filters share these\n"
6423d073
MM
738" so you can't set different thresholds for h / v\n"
739"vb vdeblock (2 threshold) vertical deblocking filter\n"
12eebd26
MN
740"ha hadeblock (2 threshold) horizontal deblocking filter\n"
741"va vadeblock (2 threshold) vertical deblocking filter\n"
6423d073
MM
742"h1 x1hdeblock experimental h deblock filter 1\n"
743"v1 x1vdeblock experimental v deblock filter 1\n"
744"dr dering deringing filter\n"
4b001a13
MN
745"al autolevels automatic brightness / contrast\n"
746" f fullyrange stretch luminance to (0..255)\n"
747"lb linblenddeint linear blend deinterlacer\n"
748"li linipoldeint linear interpolating deinterlace\n"
749"ci cubicipoldeint cubic interpolating deinterlacer\n"
750"md mediandeint median deinterlacer\n"
9c9e467d 751"fd ffmpegdeint ffmpeg deinterlacer\n"
12eebd26
MN
752"de default hb:a,vb:a,dr:a\n"
753"fa fast h1:a,v1:a,dr:a\n"
6423d073 754"tn tmpnoise (3 threshold) temporal noise reducer\n"
4b001a13 755" 1. <= 2. <= 3. larger -> stronger filtering\n"
6423d073 756"fq forceQuant <quantizer> force quantizer\n"
4b001a13 757;
911879d1 758
c41d972d 759pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1
MN
760{
761 char temp[GET_MODE_BUFFER_SIZE];
762 char *p= temp;
9c9e467d 763 char *filterDelimiters= ",/";
911879d1 764 char *optionDelimiters= ":";
c41d972d 765 struct PPMode *ppMode;
911879d1
MN
766 char *filterToken;
767
c41d972d
MN
768 ppMode= memalign(8, sizeof(PPMode));
769
770 ppMode->lumMode= 0;
771 ppMode->chromMode= 0;
772 ppMode->maxTmpNoise[0]= 700;
773 ppMode->maxTmpNoise[1]= 1500;
774 ppMode->maxTmpNoise[2]= 3000;
775 ppMode->maxAllowedY= 234;
776 ppMode->minAllowedY= 16;
68bf295e
MN
777 ppMode->baseDcDiff= 256/8;
778 ppMode->flatnessThreshold= 56-16-1;
c41d972d
MN
779 ppMode->maxClippedThreshold= 0.01;
780 ppMode->error=0;
df8d4d0e 781
911879d1
MN
782 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
783
162c9c2e 784 if(verbose>1) printf("pp: %s\n", name);
117e45b0 785
911879d1 786 for(;;){
911879d1 787 char *filterName;
326d40af 788 int q= 1000000; //PP_QUALITY_MAX;
911879d1
MN
789 int chrom=-1;
790 char *option;
791 char *options[OPTIONS_ARRAY_SIZE];
792 int i;
793 int filterNameOk=0;
794 int numOfUnknownOptions=0;
795 int enable=1; //does the user want us to enabled or disabled the filter
796
797 filterToken= strtok(p, filterDelimiters);
798 if(filterToken == NULL) break;
117e45b0 799 p+= strlen(filterToken) + 1; // p points to next filterToken
911879d1 800 filterName= strtok(filterToken, optionDelimiters);
162c9c2e 801 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
911879d1
MN
802
803 if(*filterName == '-')
804 {
805 enable=0;
806 filterName++;
807 }
117e45b0 808
911879d1
MN
809 for(;;){ //for all options
810 option= strtok(NULL, optionDelimiters);
811 if(option == NULL) break;
812
162c9c2e 813 if(verbose>1) printf("pp: option: %s\n", option);
911879d1
MN
814 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
815 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
816 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
817 else
818 {
819 options[numOfUnknownOptions] = option;
820 numOfUnknownOptions++;
911879d1
MN
821 }
822 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
823 }
117e45b0 824 options[numOfUnknownOptions] = NULL;
911879d1
MN
825
826 /* replace stuff from the replace Table */
827 for(i=0; replaceTable[2*i]!=NULL; i++)
828 {
829 if(!strcmp(replaceTable[2*i], filterName))
830 {
831 int newlen= strlen(replaceTable[2*i + 1]);
832 int plen;
833 int spaceLeft;
834
835 if(p==NULL) p= temp, *p=0; //last filter
836 else p--, *p=','; //not last filter
837
838 plen= strlen(p);
8cd91a44 839 spaceLeft= p - temp + plen;
911879d1
MN
840 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
841 {
c41d972d 842 ppMode->error++;
911879d1
MN
843 break;
844 }
845 memmove(p + newlen, p, plen+1);
846 memcpy(p, replaceTable[2*i + 1], newlen);
847 filterNameOk=1;
848 }
849 }
850
851 for(i=0; filters[i].shortName!=NULL; i++)
852 {
117e45b0 853// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
911879d1
MN
854 if( !strcmp(filters[i].longName, filterName)
855 || !strcmp(filters[i].shortName, filterName))
856 {
c41d972d
MN
857 ppMode->lumMode &= ~filters[i].mask;
858 ppMode->chromMode &= ~filters[i].mask;
911879d1
MN
859
860 filterNameOk=1;
861 if(!enable) break; // user wants to disable it
862
863 if(q >= filters[i].minLumQuality)
c41d972d 864 ppMode->lumMode|= filters[i].mask;
911879d1
MN
865 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
866 if(q >= filters[i].minChromQuality)
c41d972d 867 ppMode->chromMode|= filters[i].mask;
911879d1
MN
868
869 if(filters[i].mask == LEVEL_FIX)
870 {
871 int o;
c41d972d
MN
872 ppMode->minAllowedY= 16;
873 ppMode->maxAllowedY= 234;
911879d1 874 for(o=0; options[o]!=NULL; o++)
07f8991b 875 {
911879d1
MN
876 if( !strcmp(options[o],"fullyrange")
877 ||!strcmp(options[o],"f"))
878 {
c41d972d
MN
879 ppMode->minAllowedY= 0;
880 ppMode->maxAllowedY= 255;
911879d1
MN
881 numOfUnknownOptions--;
882 }
07f8991b 883 }
911879d1 884 }
117e45b0
MN
885 else if(filters[i].mask == TEMP_NOISE_FILTER)
886 {
887 int o;
888 int numOfNoises=0;
117e45b0
MN
889
890 for(o=0; options[o]!=NULL; o++)
891 {
892 char *tail;
c41d972d 893 ppMode->maxTmpNoise[numOfNoises]=
117e45b0
MN
894 strtol(options[o], &tail, 0);
895 if(tail!=options[o])
896 {
897 numOfNoises++;
898 numOfUnknownOptions--;
899 if(numOfNoises >= 3) break;
900 }
901 }
902 }
12eebd26
MN
903 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
904 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
43d52f76
MN
905 {
906 int o;
907
908 for(o=0; options[o]!=NULL && o<2; o++)
909 {
910 char *tail;
911 int val= strtol(options[o], &tail, 0);
912 if(tail==options[o]) break;
913
914 numOfUnknownOptions--;
c41d972d
MN
915 if(o==0) ppMode->baseDcDiff= val;
916 else ppMode->flatnessThreshold= val;
43d52f76
MN
917 }
918 }
8aaac435
MN
919 else if(filters[i].mask == FORCE_QUANT)
920 {
921 int o;
c41d972d 922 ppMode->forcedQuant= 15;
8aaac435
MN
923
924 for(o=0; options[o]!=NULL && o<1; o++)
925 {
926 char *tail;
927 int val= strtol(options[o], &tail, 0);
928 if(tail==options[o]) break;
929
930 numOfUnknownOptions--;
c41d972d 931 ppMode->forcedQuant= val;
8aaac435
MN
932 }
933 }
911879d1
MN
934 }
935 }
c41d972d
MN
936 if(!filterNameOk) ppMode->error++;
937 ppMode->error += numOfUnknownOptions;
911879d1
MN
938 }
939
c41d972d
MN
940 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
941 if(ppMode->error)
942 {
943 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
944 free(ppMode);
945 return NULL;
946 }
911879d1
MN
947 return ppMode;
948}
949
c41d972d
MN
950void pp_free_mode(pp_mode_t *mode){
951 if(mode) free(mode);
952}
953
88c0bc7e
MN
954static void reallocAlign(void **p, int alignment, int size){
955 if(*p) free(*p);
956 *p= memalign(alignment, size);
957 memset(*p, 0, size);
958}
959
0426af31 960static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
ec487e5d
MN
961 int mbWidth = (width+15)>>4;
962 int mbHeight= (height+15)>>4;
88c0bc7e
MN
963 int i;
964
965 c->stride= stride;
0426af31 966 c->qpStride= qpStride;
9c9e467d 967
88c0bc7e
MN
968 reallocAlign((void **)&c->tempDst, 8, stride*24);
969 reallocAlign((void **)&c->tempSrc, 8, stride*24);
970 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
971 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
9c9e467d
MN
972 for(i=0; i<256; i++)
973 c->yHistogram[i]= width*height/64*15/256;
974
975 for(i=0; i<3; i++)
211c4920 976 {
9c9e467d 977 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
88c0bc7e
MN
978 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
979 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
211c4920 980 }
45b4f285 981
134eb1e5 982 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
0426af31
MN
983 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
984 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
88c0bc7e
MN
985 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
986}
987
4cfbf61b 988static void global_init(void){
134eb1e5
MN
989 int i;
990 memset(clip_table, 0, 256);
991 for(i=256; i<512; i++)
992 clip_table[i]= i;
993 memset(clip_table+512, 0, 256);
994}
995
88c0bc7e
MN
996pp_context_t *pp_get_context(int width, int height, int cpuCaps){
997 PPContext *c= memalign(32, sizeof(PPContext));
88c0bc7e 998 int stride= (width+15)&(~15); //assumed / will realloc if needed
0426af31 999 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
88c0bc7e 1000
134eb1e5
MN
1001 global_init();
1002
88c0bc7e
MN
1003 memset(c, 0, sizeof(PPContext));
1004 c->cpuCaps= cpuCaps;
e9effafd
MN
1005 if(cpuCaps&PP_FORMAT){
1006 c->hChromaSubSample= cpuCaps&0x3;
1007 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1008 }else{
1009 c->hChromaSubSample= 1;
1010 c->vChromaSubSample= 1;
1011 }
88c0bc7e 1012
0426af31 1013 reallocBuffers(c, width, height, stride, qpStride);
88c0bc7e 1014
9c9e467d 1015 c->frameNum=-1;
45b4f285 1016
9c9e467d 1017 return c;
45b4f285
MN
1018}
1019
9cb54f43 1020void pp_free_context(void *vc){
9c9e467d
MN
1021 PPContext *c = (PPContext*)vc;
1022 int i;
1023
1024 for(i=0; i<3; i++) free(c->tempBlured[i]);
1025 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
1026
1027 free(c->tempBlocks);
1028 free(c->yHistogram);
1029 free(c->tempDst);
1030 free(c->tempSrc);
9c9e467d 1031 free(c->deintTemp);
0426af31 1032 free(c->stdQPTable);
ec487e5d 1033 free(c->nonBQPTable);
88c0bc7e
MN
1034 free(c->forcedQPTable);
1035
1036 memset(c, 0, sizeof(PPContext));
1037
9c9e467d
MN
1038 free(c);
1039}
1040
9cb54f43 1041void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1042 uint8_t * dst[3], int dstStride[3],
ec487e5d 1043 int width, int height,
9c9e467d 1044 QP_STORE_T *QP_store, int QPStride,
c41d972d 1045 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1046{
ec487e5d
MN
1047 int mbWidth = (width+15)>>4;
1048 int mbHeight= (height+15)>>4;
c41d972d 1049 PPMode *mode = (PPMode*)vm;
ec487e5d 1050 PPContext *c = (PPContext*)vc;
88c0bc7e 1051 int minStride= MAX(srcStride[0], dstStride[0]);
0426af31
MN
1052
1053 if(c->stride < minStride || c->qpStride < QPStride)
1054 reallocBuffers(c, width, height,
1055 MAX(minStride, c->stride),
1056 MAX(c->qpStride, QPStride));
9c9e467d 1057
8aaac435 1058 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
815cbfe7 1059 {
8aaac435 1060 int i;
88c0bc7e 1061 QP_store= c->forcedQPTable;
9c9e467d 1062 QPStride= 0;
8aaac435 1063 if(mode->lumMode & FORCE_QUANT)
88c0bc7e 1064 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
8aaac435 1065 else
88c0bc7e 1066 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
815cbfe7 1067 }
0426af31
MN
1068//printf("pict_type:%d\n", pict_type);
1069
1070 if(pict_type & PP_PICT_TYPE_QP2){
1071 int i;
1072 const int count= mbHeight * QPStride;
1073 for(i=0; i<(count>>2); i++){
1074 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1075 }
1076 for(i<<=2; i<count; i++){
1077 c->stdQPTable[i] = QP_store[i]>>1;
1078 }
1079 QP_store= c->stdQPTable;
1080 }
1081
ec487e5d
MN
1082if(0){
1083int x,y;
1084for(y=0; y<mbHeight; y++){
1085 for(x=0; x<mbWidth; x++){
1086 printf("%2d ", QP_store[x + y*QPStride]);
1087 }
1088 printf("\n");
1089}
1090 printf("\n");
1091}
51e19dcc 1092
0426af31 1093 if((pict_type&7)!=3)
ec487e5d 1094 {
0426af31
MN
1095 int i;
1096 const int count= mbHeight * QPStride;
1097 for(i=0; i<(count>>2); i++){
2e90b37c 1098 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
0426af31
MN
1099 }
1100 for(i<<=2; i<count; i++){
2e90b37c 1101 c->nonBQPTable[i] = QP_store[i] & 0x3F;
ec487e5d
MN
1102 }
1103 }
815cbfe7 1104
df8d4d0e 1105 if(verbose>2)
162c9c2e
MN
1106 {
1107 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
162c9c2e
MN
1108 }
1109
9c9e467d 1110 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
b2a3fcb7 1111 width, height, QP_store, QPStride, 0, mode, c);
911879d1 1112
e9effafd
MN
1113 width = (width )>>c->hChromaSubSample;
1114 height = (height)>>c->vChromaSubSample;
911879d1 1115
4e1349d4
MN
1116 if(mode->chromMode)
1117 {
9c9e467d 1118 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
b2a3fcb7 1119 width, height, QP_store, QPStride, 1, mode, c);
9c9e467d 1120 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
b2a3fcb7 1121 width, height, QP_store, QPStride, 2, mode, c);
4e1349d4 1122 }
9c9e467d 1123 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
4e1349d4 1124 {
ec487e5d
MN
1125 memcpy(dst[1], src[1], srcStride[1]*height);
1126 memcpy(dst[2], src[2], srcStride[2]*height);
4e1349d4
MN
1127 }
1128 else
1129 {
1130 int y;
ec487e5d 1131 for(y=0; y<height; y++)
4e1349d4 1132 {
ec487e5d
MN
1133 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1134 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
4e1349d4
MN
1135 }
1136 }
911879d1
MN
1137}
1138