typo fix by (Marcin 'Morgoth' Kurek <morgoth6 at box43 dot pl>)
[libav.git] / libavcodec / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
9858f773 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3057fa66 3
b0ac780a
MN
4 AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
3057fa66
A
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19*/
20
b304569a
MN
21/**
22 * @file postprocess.c
23 * postprocessing.
24 */
25
3057fa66 26/*
b0ac780a
MN
27 C MMX MMX2 3DNow AltiVec
28isVertDC Ec Ec Ec
29isVertMinMaxOk Ec Ec Ec
30doVertLowPass E e e Ec
31doVertDefFilter Ec Ec e e Ec
20646267
RD
32isHorizDC Ec Ec Ec
33isHorizMinMaxOk a E Ec
34doHorizLowPass E e e Ec
35doHorizDefFilter Ec Ec e e Ec
792a5a7c 36do_a_deblock Ec E Ec E
b0ac780a 37deRing E e e* Ecp
3b58b885 38Vertical RKAlgo1 E a a
e5c30e06 39Horizontal RKAlgo1 a a
117e45b0
MN
40Vertical X1# a E E
41Horizontal X1# a E E
acced553
MN
42LinIpolDeinterlace e E E*
43CubicIpolDeinterlace a e e*
44LinBlendDeinterlace e E E*
9b1663fc 45MedianDeinterlace# E Ec Ec
20646267 46TempDeNoiser# E e e Ec
d5a1a995 47
117e45b0
MN
48* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 50E = Exact implementation
acced553 51e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
52a = alternative / approximate impl
53c = checked against the other implementations (-vo md5)
b0ac780a 54p = partially optimized, still some work to do
3057fa66
A
55*/
56
57/*
58TODO:
3057fa66 59reduce the time wasted on the mem transfer
3057fa66 60unroll stuff if instructions depend too much on the prior one
3057fa66 61move YScale thing to the end instead of fixing QP
13e00528 62write a faster and higher quality deblocking filter :)
d5a1a995
MN
63make the mainloop more flexible (variable number of blocks at once
64 (the if/else stuff per block is slowing things down)
9f45d04d 65compare the quality & speed of all filters
9f45d04d 66split this huge file
8405b3fd 67optimize c versions
117e45b0 68try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 69...
13e00528
A
70*/
71
a6be8111 72//Changelog: use the CVS log
3057fa66 73
9858f773 74#include "config.h"
3057fa66
A
75#include <inttypes.h>
76#include <stdio.h>
d5a1a995 77#include <stdlib.h>
911879d1 78#include <string.h>
dda87e9f
PL
79#ifdef HAVE_MALLOC_H
80#include <malloc.h>
81#endif
3057fa66 82//#undef HAVE_MMX2
13e00528 83//#define HAVE_3DNOW
3057fa66 84//#undef HAVE_MMX
cc9b0679 85//#undef ARCH_X86
7f16f6e6 86//#define DEBUG_BRIGHTNESS
bba9b16c 87#ifdef USE_FASTMEMCPY
0a87c409 88#include "fastmemcpy.h"
70d4f2da 89#endif
13e00528 90#include "postprocess.h"
c41d972d 91#include "postprocess_internal.h"
bba9b16c
MN
92
93#include "mangle.h" //FIXME should be supressed
3057fa66 94
a7b2871c
RD
95#ifdef HAVE_ALTIVEC_H
96#include <altivec.h>
97#endif
98
ca390e72
ZK
99#ifndef HAVE_MEMALIGN
100#define memalign(a,b) malloc(b)
101#endif
102
e939e1c3
A
103#define MIN(a,b) ((a) > (b) ? (b) : (a))
104#define MAX(a,b) ((a) < (b) ? (b) : (a))
105#define ABS(a) ((a) > 0 ? (a) : (-(a)))
106#define SIGN(a) ((a) > 0 ? 1 : -1)
107
911879d1
MN
108#define GET_MODE_BUFFER_SIZE 500
109#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
110#define BLOCK_SIZE 8
111#define TEMP_STRIDE 8
112//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 113
3f1d4e96
DB
114#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
115# define attribute_used __attribute__((used))
12eebd26 116# define always_inline __attribute__((always_inline)) inline
3f1d4e96
DB
117#else
118# define attribute_used
12eebd26 119# define always_inline inline
3f1d4e96
DB
120#endif
121
cc9b0679 122#ifdef ARCH_X86
3f1d4e96 123static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
39d89b69 124static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
3f1d4e96
DB
125static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
126static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
127static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
128static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
129static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
130static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 131#endif
3057fa66 132
134eb1e5
MN
133static uint8_t clip_table[3*256];
134static uint8_t * const clip_tab= clip_table + 256;
135
4df8ca9d 136static const int verbose= 0;
45b4f285 137
3f1d4e96 138static const int attribute_used deringThreshold= 20;
3057fa66 139
9c9e467d 140
911879d1
MN
141static struct PPFilter filters[]=
142{
143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
9c9e467d
MN
145/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
911879d1
MN
147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
12eebd26
MN
149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
911879d1
MN
151 {"dr", "dering", 1, 5, 6, DERING},
152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
43d52f76
MN
153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
9c9e467d 157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
134eb1e5 158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
117e45b0 159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
8aaac435 160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
911879d1
MN
161 {NULL, NULL,0,0,0,0} //End Marker
162};
163
164static char *replaceTable[]=
165{
12eebd26
MN
166 "default", "hdeblock:a,vdeblock:a,dering:a",
167 "de", "hdeblock:a,vdeblock:a,dering:a",
168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
170 "ac", "ha:a:128:7,va:a,dering:a",
911879d1
MN
171 NULL //End Marker
172};
173
3057fa66 174
9c9e467d 175#ifdef ARCH_X86
3057fa66
A
176static inline void prefetchnta(void *p)
177{
178 asm volatile( "prefetchnta (%0)\n\t"
179 : : "r" (p)
180 );
181}
182
183static inline void prefetcht0(void *p)
184{
185 asm volatile( "prefetcht0 (%0)\n\t"
186 : : "r" (p)
187 );
188}
189
190static inline void prefetcht1(void *p)
191{
192 asm volatile( "prefetcht1 (%0)\n\t"
193 : : "r" (p)
194 );
195}
196
197static inline void prefetcht2(void *p)
198{
199 asm volatile( "prefetcht2 (%0)\n\t"
200 : : "r" (p)
201 );
202}
9a722af7 203#endif
3057fa66 204
cc9b0679 205// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 206
cf5ec61d
MN
207/**
208 * Check if the given 8x8 Block is mostly "flat"
209 */
b0ac780a 210static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d
MN
211{
212 int numEq= 0;
213 int y;
0426af31 214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
ec487e5d 215 const int dcThreshold= dcOffset*2 + 1;
0426af31 216
cf5ec61d
MN
217 for(y=0; y<BLOCK_SIZE; y++)
218 {
9c9e467d
MN
219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
226 src+= stride;
227 }
228 return numEq > c->ppMode.flatnessThreshold;
229}
230
231/**
232 * Check if the middle 8x8 Block in the given 8x16 block is flat
233 */
234static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
235 int numEq= 0;
236 int y;
0426af31 237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
ec487e5d 238 const int dcThreshold= dcOffset*2 + 1;
0426af31 239
9c9e467d
MN
240 src+= stride*4; // src points to begin of the 8x8 Block
241 for(y=0; y<BLOCK_SIZE-1; y++)
242 {
243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
cf5ec61d
MN
251 src+= stride;
252 }
9c9e467d 253 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
254}
255
b0ac780a 256static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 257{
cb482d25
MN
258 int i;
259#if 1
260 for(i=0; i<2; i++){
261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
262 src += stride;
263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
264 src += stride;
265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
266 src += stride;
267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
268 src += stride;
269 }
270#else
271 for(i=0; i<8; i++){
272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
273 src += stride;
274 }
275#endif
276 return 1;
277}
cf5ec61d 278
cb482d25
MN
279static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
280{
281#if 1
282#if 1
283 int x;
284 src+= stride*4;
285 for(x=0; x<BLOCK_SIZE; x+=4)
286 {
287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
291 }
292#else
293 int x;
294 src+= stride*3;
295 for(x=0; x<BLOCK_SIZE; x++)
296 {
297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
298 }
299#endif
300 return 1;
301#else
302 int x;
303 src+= stride*4;
304 for(x=0; x<BLOCK_SIZE; x++)
305 {
306 int min=255;
307 int max=0;
308 int y;
309 for(y=0; y<8; y++){
310 int v= src[x + y*stride];
311 if(v>max) max=v;
312 if(v<min) min=v;
313 }
314 if(max-min > 2*QP) return 0;
315 }
cf5ec61d 316 return 1;
cb482d25
MN
317#endif
318}
319
b0ac780a
MN
320static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
321 if( isHorizDC_C(src, stride, c) ){
322 if( isHorizMinMaxOk_C(src, stride, c->QP) )
323 return 1;
324 else
325 return 0;
326 }else{
327 return 2;
328 }
329}
330
cb482d25
MN
331static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
332 if( isVertDC_C(src, stride, c) ){
333 if( isVertMinMaxOk_C(src, stride, c->QP) )
334 return 1;
335 else
336 return 0;
337 }else{
338 return 2;
339 }
cf5ec61d
MN
340}
341
b0ac780a 342static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d
MN
343{
344 int y;
345 for(y=0; y<BLOCK_SIZE; y++)
346 {
b0ac780a 347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
cf5ec61d 348
b0ac780a 349 if(ABS(middleEnergy) < 8*c->QP)
cf5ec61d
MN
350 {
351 const int q=(dst[3] - dst[4])/2;
352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
354
355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
356 d= MAX(d, 0);
357
358 d= (5*d + 32) >> 6;
359 d*= SIGN(-middleEnergy);
360
361 if(q>0)
362 {
363 d= d<0 ? 0 : d;
364 d= d>q ? q : d;
365 }
366 else
367 {
368 d= d>0 ? 0 : d;
369 d= d<q ? q : d;
370 }
371
372 dst[3]-= d;
373 dst[4]+= d;
374 }
375 dst+= stride;
376 }
377}
378
379/**
380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
382 */
b0ac780a 383static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 384{
cf5ec61d
MN
385 int y;
386 for(y=0; y<BLOCK_SIZE; y++)
387 {
b0ac780a
MN
388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
cf5ec61d 390
8c8bbd10
MN
391 int sums[10];
392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
393 sums[1] = sums[0] - first + dst[3];
394 sums[2] = sums[1] - first + dst[4];
395 sums[3] = sums[2] - first + dst[5];
396 sums[4] = sums[3] - first + dst[6];
397 sums[5] = sums[4] - dst[0] + dst[7];
398 sums[6] = sums[5] - dst[1] + last;
399 sums[7] = sums[6] - dst[2] + last;
400 sums[8] = sums[7] - dst[3] + last;
401 sums[9] = sums[8] - dst[4] + last;
402
403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
cf5ec61d
MN
411
412 dst+= stride;
413 }
414}
415
4e4dcbc5 416/**
cc9b0679
MN
417 * Experimental Filter 1 (Horizontal)
418 * will not damage linear gradients
419 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
420 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
421 * MMX2 version does correct clipping C version doesnt
422 * not identical with the vertical one
4e4dcbc5 423 */
cc9b0679
MN
424static inline void horizX1Filter(uint8_t *src, int stride, int QP)
425{
117e45b0 426 int y;
cc9b0679
MN
427 static uint64_t *lut= NULL;
428 if(lut==NULL)
117e45b0 429 {
cc9b0679
MN
430 int i;
431 lut= (uint64_t*)memalign(8, 256*8);
432 for(i=0; i<256; i++)
117e45b0 433 {
cc9b0679 434 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 435/*
cc9b0679
MN
436//Simulate 112242211 9-Tap filter
437 uint64_t a= (v/16) & 0xFF;
438 uint64_t b= (v/8) & 0xFF;
439 uint64_t c= (v/4) & 0xFF;
440 uint64_t d= (3*v/8) & 0xFF;
117e45b0 441*/
cc9b0679
MN
442//Simulate piecewise linear interpolation
443 uint64_t a= (v/16) & 0xFF;
444 uint64_t b= (v*3/16) & 0xFF;
445 uint64_t c= (v*5/16) & 0xFF;
446 uint64_t d= (7*v/16) & 0xFF;
447 uint64_t A= (0x100 - a)&0xFF;
448 uint64_t B= (0x100 - b)&0xFF;
449 uint64_t C= (0x100 - c)&0xFF;
450 uint64_t D= (0x100 - c)&0xFF;
451
452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
453 (D<<24) | (C<<16) | (B<<8) | (A);
454 //lut[i] = (v<<32) | (v<<24);
117e45b0
MN
455 }
456 }
cc9b0679
MN
457
458 for(y=0; y<BLOCK_SIZE; y++)
117e45b0 459 {
cc9b0679
MN
460 int a= src[1] - src[2];
461 int b= src[3] - src[4];
462 int c= src[5] - src[6];
463
464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
465
466 if(d < QP)
117e45b0 467 {
cc9b0679
MN
468 int v = d * SIGN(-b);
469
470 src[1] +=v/8;
471 src[2] +=v/4;
472 src[3] +=3*v/8;
473 src[4] -=3*v/8;
474 src[5] -=v/4;
475 src[6] -=v/8;
476
117e45b0 477 }
cc9b0679 478 src+=stride;
117e45b0 479 }
cc9b0679
MN
480}
481
12eebd26
MN
482/**
483 * accurate deblock filter
484 */
792a5a7c 485static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
12eebd26
MN
486 int y;
487 const int QP= c->QP;
488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
489 const int dcThreshold= dcOffset*2 + 1;
490//START_TIMER
491 src+= step*4; // src points to begin of the 8x8 Block
492 for(y=0; y<8; y++){
493 int numEq= 0;
494
495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
504 if(numEq > c->ppMode.flatnessThreshold){
505 int min, max, x;
506
507 if(src[0] > src[step]){
508 max= src[0];
509 min= src[step];
510 }else{
511 max= src[step];
512 min= src[0];
513 }
514 for(x=2; x<8; x+=2){
515 if(src[x*step] > src[(x+1)*step]){
516 if(src[x *step] > max) max= src[ x *step];
517 if(src[(x+1)*step] < min) min= src[(x+1)*step];
518 }else{
519 if(src[(x+1)*step] > max) max= src[(x+1)*step];
520 if(src[ x *step] < min) min= src[ x *step];
521 }
522 }
523 if(max-min < 2*QP){
524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
526
527 int sums[10];
528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
529 sums[1] = sums[0] - first + src[3*step];
530 sums[2] = sums[1] - first + src[4*step];
531 sums[3] = sums[2] - first + src[5*step];
532 sums[4] = sums[3] - first + src[6*step];
533 sums[5] = sums[4] - src[0*step] + src[7*step];
534 sums[6] = sums[5] - src[1*step] + last;
535 sums[7] = sums[6] - src[2*step] + last;
536 sums[8] = sums[7] - src[3*step] + last;
537 sums[9] = sums[8] - src[4*step] + last;
538
539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
547 }
548 }else{
549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
550
551 if(ABS(middleEnergy) < 8*QP)
552 {
553 const int q=(src[3*step] - src[4*step])/2;
554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
556
557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
558 d= MAX(d, 0);
559
560 d= (5*d + 32) >> 6;
561 d*= SIGN(-middleEnergy);
562
563 if(q>0)
564 {
565 d= d<0 ? 0 : d;
566 d= d>q ? q : d;
567 }
568 else
569 {
570 d= d>0 ? 0 : d;
571 d= d<q ? q : d;
572 }
573
574 src[3*step]-= d;
575 src[4*step]+= d;
576 }
577 }
578
579 src += stride;
580 }
581/*if(step==16){
582 STOP_TIMER("step16")
583}else{
584 STOP_TIMER("stepX")
585}*/
586}
cc9b0679 587
e89952aa 588//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 589//Plain C versions
e89952aa
MN
590#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
591#define COMPILE_C
592#endif
593
b0ac780a
MN
594#ifdef ARCH_POWERPC
595#ifdef HAVE_ALTIVEC
596#define COMPILE_ALTIVEC
b0ac780a
MN
597#endif //HAVE_ALTIVEC
598#endif //ARCH_POWERPC
599
9c9e467d 600#ifdef ARCH_X86
e89952aa
MN
601
602#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
603#define COMPILE_MMX
604#endif
605
606#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
607#define COMPILE_MMX2
608#endif
609
610#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
611#define COMPILE_3DNOW
612#endif
9c9e467d 613#endif //ARCH_X86
e89952aa
MN
614
615#undef HAVE_MMX
616#undef HAVE_MMX2
617#undef HAVE_3DNOW
b0ac780a 618#undef HAVE_ALTIVEC
e89952aa
MN
619#undef ARCH_X86
620
621#ifdef COMPILE_C
cc9b0679
MN
622#undef HAVE_MMX
623#undef HAVE_MMX2
624#undef HAVE_3DNOW
625#undef ARCH_X86
626#define RENAME(a) a ## _C
627#include "postprocess_template.c"
e89952aa 628#endif
cc9b0679 629
b0ac780a
MN
630#ifdef ARCH_POWERPC
631#ifdef COMPILE_ALTIVEC
632#undef RENAME
633#define HAVE_ALTIVEC
634#define RENAME(a) a ## _altivec
635#include "postprocess_altivec_template.c"
636#include "postprocess_template.c"
637#endif
638#endif //ARCH_POWERPC
639
cc9b0679 640//MMX versions
e89952aa 641#ifdef COMPILE_MMX
cc9b0679
MN
642#undef RENAME
643#define HAVE_MMX
644#undef HAVE_MMX2
645#undef HAVE_3DNOW
646#define ARCH_X86
647#define RENAME(a) a ## _MMX
648#include "postprocess_template.c"
e89952aa 649#endif
cc9b0679
MN
650
651//MMX2 versions
e89952aa 652#ifdef COMPILE_MMX2
cc9b0679
MN
653#undef RENAME
654#define HAVE_MMX
655#define HAVE_MMX2
656#undef HAVE_3DNOW
657#define ARCH_X86
658#define RENAME(a) a ## _MMX2
659#include "postprocess_template.c"
e89952aa 660#endif
cc9b0679
MN
661
662//3DNOW versions
e89952aa 663#ifdef COMPILE_3DNOW
cc9b0679
MN
664#undef RENAME
665#define HAVE_MMX
666#undef HAVE_MMX2
667#define HAVE_3DNOW
668#define ARCH_X86
669#define RENAME(a) a ## _3DNow
670#include "postprocess_template.c"
e89952aa 671#endif
cc9b0679
MN
672
673// minor note: the HAVE_xyz is messed up after that line so dont use it
674
675static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
c41d972d 676 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 677{
9c9e467d 678 PPContext *c= (PPContext *)vc;
c41d972d 679 PPMode *ppMode= (PPMode *)vm;
9c9e467d
MN
680 c->ppMode= *ppMode; //FIXME
681
cc9b0679
MN
682 // useing ifs here as they are faster than function pointers allthough the
683 // difference wouldnt be messureable here but its much better because
684 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 685#ifdef RUNTIME_CPUDETECT
9c9e467d 686#ifdef ARCH_X86
cc9b0679 687 // ordered per speed fasterst first
fa6ea14e 688 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
9c9e467d 689 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 690 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
9c9e467d 691 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 692 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
9c9e467d 693 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 694 else
9c9e467d 695 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 696#else
b0ac780a
MN
697#ifdef ARCH_POWERPC
698#ifdef HAVE_ALTIVEC
71487254 699 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
b0ac780a
MN
700 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
701 else
702#endif
703#endif
9c9e467d 704 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 705#endif
e89952aa
MN
706#else //RUNTIME_CPUDETECT
707#ifdef HAVE_MMX2
9c9e467d 708 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 709#elif defined (HAVE_3DNOW)
9c9e467d 710 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 711#elif defined (HAVE_MMX)
9c9e467d 712 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
713#elif defined (HAVE_ALTIVEC)
714 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 715#else
9c9e467d 716 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
717#endif
718#endif //!RUNTIME_CPUDETECT
117e45b0
MN
719}
720
cc9b0679
MN
721//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
722// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 723
911879d1 724/* -pp Command line Help
911879d1 725*/
4407a3c4 726char *pp_help=
b01be121 727"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
4b001a13 728"long form example:\n"
b01be121 729"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
4b001a13 730"short form example:\n"
b01be121 731"vb:a/hb:a/lb de,-vb\n"
4b001a13 732"more examples:\n"
1d9324fd 733"tn:64:128:256\n"
4b001a13
MN
734"Filters Options\n"
735"short long name short long option Description\n"
6423d073
MM
736"* * a autoq CPU power dependent enabler\n"
737" c chrom chrominance filtering enabled\n"
738" y nochrom chrominance filtering disabled\n"
739"hb hdeblock (2 threshold) horizontal deblocking filter\n"
68bf295e
MN
740" 1. difference factor: default=32, higher -> more deblocking\n"
741" 2. flatness threshold: default=39, lower -> more deblocking\n"
4b001a13 742" the h & v deblocking filters share these\n"
6423d073
MM
743" so you can't set different thresholds for h / v\n"
744"vb vdeblock (2 threshold) vertical deblocking filter\n"
12eebd26
MN
745"ha hadeblock (2 threshold) horizontal deblocking filter\n"
746"va vadeblock (2 threshold) vertical deblocking filter\n"
6423d073
MM
747"h1 x1hdeblock experimental h deblock filter 1\n"
748"v1 x1vdeblock experimental v deblock filter 1\n"
749"dr dering deringing filter\n"
4b001a13
MN
750"al autolevels automatic brightness / contrast\n"
751" f fullyrange stretch luminance to (0..255)\n"
752"lb linblenddeint linear blend deinterlacer\n"
753"li linipoldeint linear interpolating deinterlace\n"
754"ci cubicipoldeint cubic interpolating deinterlacer\n"
755"md mediandeint median deinterlacer\n"
9c9e467d 756"fd ffmpegdeint ffmpeg deinterlacer\n"
12eebd26
MN
757"de default hb:a,vb:a,dr:a\n"
758"fa fast h1:a,v1:a,dr:a\n"
6423d073 759"tn tmpnoise (3 threshold) temporal noise reducer\n"
4b001a13 760" 1. <= 2. <= 3. larger -> stronger filtering\n"
6423d073 761"fq forceQuant <quantizer> force quantizer\n"
4b001a13 762;
911879d1 763
c41d972d 764pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1
MN
765{
766 char temp[GET_MODE_BUFFER_SIZE];
767 char *p= temp;
9c9e467d 768 char *filterDelimiters= ",/";
911879d1 769 char *optionDelimiters= ":";
c41d972d 770 struct PPMode *ppMode;
911879d1
MN
771 char *filterToken;
772
c41d972d
MN
773 ppMode= memalign(8, sizeof(PPMode));
774
775 ppMode->lumMode= 0;
776 ppMode->chromMode= 0;
777 ppMode->maxTmpNoise[0]= 700;
778 ppMode->maxTmpNoise[1]= 1500;
779 ppMode->maxTmpNoise[2]= 3000;
780 ppMode->maxAllowedY= 234;
781 ppMode->minAllowedY= 16;
68bf295e
MN
782 ppMode->baseDcDiff= 256/8;
783 ppMode->flatnessThreshold= 56-16-1;
c41d972d
MN
784 ppMode->maxClippedThreshold= 0.01;
785 ppMode->error=0;
df8d4d0e 786
911879d1
MN
787 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
788
162c9c2e 789 if(verbose>1) printf("pp: %s\n", name);
117e45b0 790
911879d1 791 for(;;){
911879d1 792 char *filterName;
326d40af 793 int q= 1000000; //PP_QUALITY_MAX;
911879d1
MN
794 int chrom=-1;
795 char *option;
796 char *options[OPTIONS_ARRAY_SIZE];
797 int i;
798 int filterNameOk=0;
799 int numOfUnknownOptions=0;
800 int enable=1; //does the user want us to enabled or disabled the filter
801
802 filterToken= strtok(p, filterDelimiters);
803 if(filterToken == NULL) break;
117e45b0 804 p+= strlen(filterToken) + 1; // p points to next filterToken
911879d1 805 filterName= strtok(filterToken, optionDelimiters);
162c9c2e 806 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
911879d1
MN
807
808 if(*filterName == '-')
809 {
810 enable=0;
811 filterName++;
812 }
117e45b0 813
911879d1
MN
814 for(;;){ //for all options
815 option= strtok(NULL, optionDelimiters);
816 if(option == NULL) break;
817
162c9c2e 818 if(verbose>1) printf("pp: option: %s\n", option);
911879d1
MN
819 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
820 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
821 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
822 else
823 {
824 options[numOfUnknownOptions] = option;
825 numOfUnknownOptions++;
911879d1
MN
826 }
827 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
828 }
117e45b0 829 options[numOfUnknownOptions] = NULL;
911879d1
MN
830
831 /* replace stuff from the replace Table */
832 for(i=0; replaceTable[2*i]!=NULL; i++)
833 {
834 if(!strcmp(replaceTable[2*i], filterName))
835 {
836 int newlen= strlen(replaceTable[2*i + 1]);
837 int plen;
838 int spaceLeft;
839
840 if(p==NULL) p= temp, *p=0; //last filter
841 else p--, *p=','; //not last filter
842
843 plen= strlen(p);
8cd91a44 844 spaceLeft= p - temp + plen;
911879d1
MN
845 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
846 {
c41d972d 847 ppMode->error++;
911879d1
MN
848 break;
849 }
850 memmove(p + newlen, p, plen+1);
851 memcpy(p, replaceTable[2*i + 1], newlen);
852 filterNameOk=1;
853 }
854 }
855
856 for(i=0; filters[i].shortName!=NULL; i++)
857 {
117e45b0 858// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
911879d1
MN
859 if( !strcmp(filters[i].longName, filterName)
860 || !strcmp(filters[i].shortName, filterName))
861 {
c41d972d
MN
862 ppMode->lumMode &= ~filters[i].mask;
863 ppMode->chromMode &= ~filters[i].mask;
911879d1
MN
864
865 filterNameOk=1;
866 if(!enable) break; // user wants to disable it
867
868 if(q >= filters[i].minLumQuality)
c41d972d 869 ppMode->lumMode|= filters[i].mask;
911879d1
MN
870 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
871 if(q >= filters[i].minChromQuality)
c41d972d 872 ppMode->chromMode|= filters[i].mask;
911879d1
MN
873
874 if(filters[i].mask == LEVEL_FIX)
875 {
876 int o;
c41d972d
MN
877 ppMode->minAllowedY= 16;
878 ppMode->maxAllowedY= 234;
911879d1 879 for(o=0; options[o]!=NULL; o++)
07f8991b 880 {
911879d1
MN
881 if( !strcmp(options[o],"fullyrange")
882 ||!strcmp(options[o],"f"))
883 {
c41d972d
MN
884 ppMode->minAllowedY= 0;
885 ppMode->maxAllowedY= 255;
911879d1
MN
886 numOfUnknownOptions--;
887 }
07f8991b 888 }
911879d1 889 }
117e45b0
MN
890 else if(filters[i].mask == TEMP_NOISE_FILTER)
891 {
892 int o;
893 int numOfNoises=0;
117e45b0
MN
894
895 for(o=0; options[o]!=NULL; o++)
896 {
897 char *tail;
c41d972d 898 ppMode->maxTmpNoise[numOfNoises]=
117e45b0
MN
899 strtol(options[o], &tail, 0);
900 if(tail!=options[o])
901 {
902 numOfNoises++;
903 numOfUnknownOptions--;
904 if(numOfNoises >= 3) break;
905 }
906 }
907 }
12eebd26
MN
908 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
909 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
43d52f76
MN
910 {
911 int o;
912
913 for(o=0; options[o]!=NULL && o<2; o++)
914 {
915 char *tail;
916 int val= strtol(options[o], &tail, 0);
917 if(tail==options[o]) break;
918
919 numOfUnknownOptions--;
c41d972d
MN
920 if(o==0) ppMode->baseDcDiff= val;
921 else ppMode->flatnessThreshold= val;
43d52f76
MN
922 }
923 }
8aaac435
MN
924 else if(filters[i].mask == FORCE_QUANT)
925 {
926 int o;
c41d972d 927 ppMode->forcedQuant= 15;
8aaac435
MN
928
929 for(o=0; options[o]!=NULL && o<1; o++)
930 {
931 char *tail;
932 int val= strtol(options[o], &tail, 0);
933 if(tail==options[o]) break;
934
935 numOfUnknownOptions--;
c41d972d 936 ppMode->forcedQuant= val;
8aaac435
MN
937 }
938 }
911879d1
MN
939 }
940 }
c41d972d
MN
941 if(!filterNameOk) ppMode->error++;
942 ppMode->error += numOfUnknownOptions;
911879d1
MN
943 }
944
c41d972d
MN
945 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
946 if(ppMode->error)
947 {
948 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
949 free(ppMode);
950 return NULL;
951 }
911879d1
MN
952 return ppMode;
953}
954
c41d972d
MN
955void pp_free_mode(pp_mode_t *mode){
956 if(mode) free(mode);
957}
958
88c0bc7e
MN
959static void reallocAlign(void **p, int alignment, int size){
960 if(*p) free(*p);
961 *p= memalign(alignment, size);
962 memset(*p, 0, size);
963}
964
0426af31 965static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
ec487e5d
MN
966 int mbWidth = (width+15)>>4;
967 int mbHeight= (height+15)>>4;
88c0bc7e
MN
968 int i;
969
970 c->stride= stride;
0426af31 971 c->qpStride= qpStride;
9c9e467d 972
88c0bc7e
MN
973 reallocAlign((void **)&c->tempDst, 8, stride*24);
974 reallocAlign((void **)&c->tempSrc, 8, stride*24);
975 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
976 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
9c9e467d
MN
977 for(i=0; i<256; i++)
978 c->yHistogram[i]= width*height/64*15/256;
979
980 for(i=0; i<3; i++)
211c4920 981 {
9c9e467d 982 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
88c0bc7e
MN
983 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
984 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
211c4920 985 }
45b4f285 986
134eb1e5 987 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
0426af31
MN
988 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
989 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
88c0bc7e
MN
990 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
991}
992
4cfbf61b 993static void global_init(void){
134eb1e5
MN
994 int i;
995 memset(clip_table, 0, 256);
996 for(i=256; i<512; i++)
997 clip_table[i]= i;
998 memset(clip_table+512, 0, 256);
999}
1000
88c0bc7e
MN
1001pp_context_t *pp_get_context(int width, int height, int cpuCaps){
1002 PPContext *c= memalign(32, sizeof(PPContext));
88c0bc7e 1003 int stride= (width+15)&(~15); //assumed / will realloc if needed
0426af31 1004 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
88c0bc7e 1005
134eb1e5
MN
1006 global_init();
1007
88c0bc7e
MN
1008 memset(c, 0, sizeof(PPContext));
1009 c->cpuCaps= cpuCaps;
e9effafd
MN
1010 if(cpuCaps&PP_FORMAT){
1011 c->hChromaSubSample= cpuCaps&0x3;
1012 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1013 }else{
1014 c->hChromaSubSample= 1;
1015 c->vChromaSubSample= 1;
1016 }
88c0bc7e 1017
0426af31 1018 reallocBuffers(c, width, height, stride, qpStride);
88c0bc7e 1019
9c9e467d 1020 c->frameNum=-1;
45b4f285 1021
9c9e467d 1022 return c;
45b4f285
MN
1023}
1024
9cb54f43 1025void pp_free_context(void *vc){
9c9e467d
MN
1026 PPContext *c = (PPContext*)vc;
1027 int i;
1028
1029 for(i=0; i<3; i++) free(c->tempBlured[i]);
1030 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
1031
1032 free(c->tempBlocks);
1033 free(c->yHistogram);
1034 free(c->tempDst);
1035 free(c->tempSrc);
9c9e467d 1036 free(c->deintTemp);
0426af31 1037 free(c->stdQPTable);
ec487e5d 1038 free(c->nonBQPTable);
88c0bc7e
MN
1039 free(c->forcedQPTable);
1040
1041 memset(c, 0, sizeof(PPContext));
1042
9c9e467d
MN
1043 free(c);
1044}
1045
9cb54f43 1046void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1047 uint8_t * dst[3], int dstStride[3],
ec487e5d 1048 int width, int height,
9c9e467d 1049 QP_STORE_T *QP_store, int QPStride,
c41d972d 1050 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1051{
ec487e5d
MN
1052 int mbWidth = (width+15)>>4;
1053 int mbHeight= (height+15)>>4;
c41d972d 1054 PPMode *mode = (PPMode*)vm;
ec487e5d 1055 PPContext *c = (PPContext*)vc;
88c0bc7e 1056 int minStride= MAX(srcStride[0], dstStride[0]);
0426af31
MN
1057
1058 if(c->stride < minStride || c->qpStride < QPStride)
1059 reallocBuffers(c, width, height,
1060 MAX(minStride, c->stride),
1061 MAX(c->qpStride, QPStride));
9c9e467d 1062
8aaac435 1063 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
815cbfe7 1064 {
8aaac435 1065 int i;
88c0bc7e 1066 QP_store= c->forcedQPTable;
9c9e467d 1067 QPStride= 0;
8aaac435 1068 if(mode->lumMode & FORCE_QUANT)
88c0bc7e 1069 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
8aaac435 1070 else
88c0bc7e 1071 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
815cbfe7 1072 }
0426af31
MN
1073//printf("pict_type:%d\n", pict_type);
1074
1075 if(pict_type & PP_PICT_TYPE_QP2){
1076 int i;
1077 const int count= mbHeight * QPStride;
1078 for(i=0; i<(count>>2); i++){
1079 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1080 }
1081 for(i<<=2; i<count; i++){
1082 c->stdQPTable[i] = QP_store[i]>>1;
1083 }
1084 QP_store= c->stdQPTable;
1085 }
1086
ec487e5d
MN
1087if(0){
1088int x,y;
1089for(y=0; y<mbHeight; y++){
1090 for(x=0; x<mbWidth; x++){
1091 printf("%2d ", QP_store[x + y*QPStride]);
1092 }
1093 printf("\n");
1094}
1095 printf("\n");
1096}
51e19dcc 1097
0426af31 1098 if((pict_type&7)!=3)
ec487e5d 1099 {
0426af31
MN
1100 int i;
1101 const int count= mbHeight * QPStride;
1102 for(i=0; i<(count>>2); i++){
2e90b37c 1103 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
0426af31
MN
1104 }
1105 for(i<<=2; i<count; i++){
2e90b37c 1106 c->nonBQPTable[i] = QP_store[i] & 0x3F;
ec487e5d
MN
1107 }
1108 }
815cbfe7 1109
df8d4d0e 1110 if(verbose>2)
162c9c2e
MN
1111 {
1112 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
162c9c2e
MN
1113 }
1114
9c9e467d 1115 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
b2a3fcb7 1116 width, height, QP_store, QPStride, 0, mode, c);
911879d1 1117
e9effafd
MN
1118 width = (width )>>c->hChromaSubSample;
1119 height = (height)>>c->vChromaSubSample;
911879d1 1120
4e1349d4
MN
1121 if(mode->chromMode)
1122 {
9c9e467d 1123 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
b2a3fcb7 1124 width, height, QP_store, QPStride, 1, mode, c);
9c9e467d 1125 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
b2a3fcb7 1126 width, height, QP_store, QPStride, 2, mode, c);
4e1349d4 1127 }
9c9e467d 1128 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
4e1349d4 1129 {
ec487e5d
MN
1130 memcpy(dst[1], src[1], srcStride[1]*height);
1131 memcpy(dst[2], src[2], srcStride[2]*height);
4e1349d4
MN
1132 }
1133 else
1134 {
1135 int y;
ec487e5d 1136 for(y=0; y<height; y++)
4e1349d4 1137 {
ec487e5d
MN
1138 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1139 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
4e1349d4
MN
1140 }
1141 }
911879d1
MN
1142}
1143