per line lowpass filter in mmx
[libav.git] / libavcodec / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
9858f773 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3057fa66 3
b0ac780a
MN
4 AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
3057fa66
A
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19*/
20
b304569a
MN
21/**
22 * @file postprocess.c
23 * postprocessing.
24 */
25
3057fa66 26/*
b0ac780a
MN
27 C MMX MMX2 3DNow AltiVec
28isVertDC Ec Ec Ec
29isVertMinMaxOk Ec Ec Ec
30doVertLowPass E e e Ec
31doVertDefFilter Ec Ec e e Ec
3057fa66 32isHorizDC Ec Ec
4e4dcbc5
MN
33isHorizMinMaxOk a E
34doHorizLowPass E e e
7f16f6e6 35doHorizDefFilter Ec Ec e e
792a5a7c 36do_a_deblock Ec E Ec E
b0ac780a 37deRing E e e* Ecp
3b58b885 38Vertical RKAlgo1 E a a
e5c30e06 39Horizontal RKAlgo1 a a
117e45b0
MN
40Vertical X1# a E E
41Horizontal X1# a E E
acced553
MN
42LinIpolDeinterlace e E E*
43CubicIpolDeinterlace a e e*
44LinBlendDeinterlace e E E*
9b1663fc 45MedianDeinterlace# E Ec Ec
be44a4d7 46TempDeNoiser# E e e
d5a1a995 47
117e45b0
MN
48* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 50E = Exact implementation
acced553 51e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
52a = alternative / approximate impl
53c = checked against the other implementations (-vo md5)
b0ac780a 54p = partially optimized, still some work to do
3057fa66
A
55*/
56
57/*
58TODO:
3057fa66 59reduce the time wasted on the mem transfer
3057fa66 60unroll stuff if instructions depend too much on the prior one
3057fa66 61move YScale thing to the end instead of fixing QP
13e00528 62write a faster and higher quality deblocking filter :)
d5a1a995
MN
63make the mainloop more flexible (variable number of blocks at once
64 (the if/else stuff per block is slowing things down)
9f45d04d 65compare the quality & speed of all filters
9f45d04d 66split this huge file
8405b3fd 67optimize c versions
117e45b0 68try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 69...
13e00528
A
70*/
71
a6be8111 72//Changelog: use the CVS log
3057fa66 73
9858f773 74#include "config.h"
3057fa66
A
75#include <inttypes.h>
76#include <stdio.h>
d5a1a995 77#include <stdlib.h>
911879d1 78#include <string.h>
dda87e9f
PL
79#ifdef HAVE_MALLOC_H
80#include <malloc.h>
81#endif
3057fa66 82//#undef HAVE_MMX2
13e00528 83//#define HAVE_3DNOW
3057fa66 84//#undef HAVE_MMX
cc9b0679 85//#undef ARCH_X86
7f16f6e6 86//#define DEBUG_BRIGHTNESS
bba9b16c 87#ifdef USE_FASTMEMCPY
0a87c409 88#include "fastmemcpy.h"
70d4f2da 89#endif
13e00528 90#include "postprocess.h"
c41d972d 91#include "postprocess_internal.h"
bba9b16c
MN
92
93#include "mangle.h" //FIXME should be supressed
3057fa66 94
ca390e72
ZK
95#ifndef HAVE_MEMALIGN
96#define memalign(a,b) malloc(b)
97#endif
98
e939e1c3
A
99#define MIN(a,b) ((a) > (b) ? (b) : (a))
100#define MAX(a,b) ((a) < (b) ? (b) : (a))
101#define ABS(a) ((a) > 0 ? (a) : (-(a)))
102#define SIGN(a) ((a) > 0 ? 1 : -1)
103
911879d1
MN
104#define GET_MODE_BUFFER_SIZE 500
105#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
106#define BLOCK_SIZE 8
107#define TEMP_STRIDE 8
108//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 109
3f1d4e96
DB
110#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
111# define attribute_used __attribute__((used))
12eebd26 112# define always_inline __attribute__((always_inline)) inline
3f1d4e96
DB
113#else
114# define attribute_used
12eebd26 115# define always_inline inline
3f1d4e96
DB
116#endif
117
cc9b0679 118#ifdef ARCH_X86
3f1d4e96 119static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
39d89b69 120static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
3f1d4e96
DB
121static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
122static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
123static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
124static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
125static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
126static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 127#endif
3057fa66 128
134eb1e5
MN
129static uint8_t clip_table[3*256];
130static uint8_t * const clip_tab= clip_table + 256;
131
4df8ca9d 132static const int verbose= 0;
45b4f285 133
3f1d4e96 134static const int attribute_used deringThreshold= 20;
3057fa66 135
9c9e467d 136
911879d1
MN
137static struct PPFilter filters[]=
138{
139 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
140 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
9c9e467d
MN
141/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
142 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
911879d1
MN
143 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
144 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
12eebd26
MN
145 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
146 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
911879d1
MN
147 {"dr", "dering", 1, 5, 6, DERING},
148 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
43d52f76
MN
149 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
150 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
151 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
152 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
9c9e467d 153 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
134eb1e5 154 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
117e45b0 155 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
8aaac435 156 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
911879d1
MN
157 {NULL, NULL,0,0,0,0} //End Marker
158};
159
160static char *replaceTable[]=
161{
12eebd26
MN
162 "default", "hdeblock:a,vdeblock:a,dering:a",
163 "de", "hdeblock:a,vdeblock:a,dering:a",
164 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
165 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
166 "ac", "ha:a:128:7,va:a,dering:a",
911879d1
MN
167 NULL //End Marker
168};
169
3057fa66 170
9c9e467d 171#ifdef ARCH_X86
3057fa66
A
172static inline void prefetchnta(void *p)
173{
174 asm volatile( "prefetchnta (%0)\n\t"
175 : : "r" (p)
176 );
177}
178
179static inline void prefetcht0(void *p)
180{
181 asm volatile( "prefetcht0 (%0)\n\t"
182 : : "r" (p)
183 );
184}
185
186static inline void prefetcht1(void *p)
187{
188 asm volatile( "prefetcht1 (%0)\n\t"
189 : : "r" (p)
190 );
191}
192
193static inline void prefetcht2(void *p)
194{
195 asm volatile( "prefetcht2 (%0)\n\t"
196 : : "r" (p)
197 );
198}
9a722af7 199#endif
3057fa66 200
cc9b0679 201// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 202
cf5ec61d
MN
203/**
204 * Check if the given 8x8 Block is mostly "flat"
205 */
b0ac780a 206static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d
MN
207{
208 int numEq= 0;
209 int y;
0426af31 210 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
ec487e5d 211 const int dcThreshold= dcOffset*2 + 1;
0426af31 212
cf5ec61d
MN
213 for(y=0; y<BLOCK_SIZE; y++)
214 {
9c9e467d
MN
215 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
216 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
217 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
218 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
219 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
222 src+= stride;
223 }
224 return numEq > c->ppMode.flatnessThreshold;
225}
226
227/**
228 * Check if the middle 8x8 Block in the given 8x16 block is flat
229 */
230static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
231 int numEq= 0;
232 int y;
0426af31 233 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
ec487e5d 234 const int dcThreshold= dcOffset*2 + 1;
0426af31 235
9c9e467d
MN
236 src+= stride*4; // src points to begin of the 8x8 Block
237 for(y=0; y<BLOCK_SIZE-1; y++)
238 {
239 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
240 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
241 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
242 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
243 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
244 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
245 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
246 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
cf5ec61d
MN
247 src+= stride;
248 }
9c9e467d 249 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
250}
251
b0ac780a 252static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 253{
cb482d25
MN
254 int i;
255#if 1
256 for(i=0; i<2; i++){
257 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
258 src += stride;
259 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
260 src += stride;
261 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
262 src += stride;
263 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
264 src += stride;
265 }
266#else
267 for(i=0; i<8; i++){
268 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
269 src += stride;
270 }
271#endif
272 return 1;
273}
cf5ec61d 274
cb482d25
MN
275static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
276{
277#if 1
278#if 1
279 int x;
280 src+= stride*4;
281 for(x=0; x<BLOCK_SIZE; x+=4)
282 {
283 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
284 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
285 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
286 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
287 }
288#else
289 int x;
290 src+= stride*3;
291 for(x=0; x<BLOCK_SIZE; x++)
292 {
293 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
294 }
295#endif
296 return 1;
297#else
298 int x;
299 src+= stride*4;
300 for(x=0; x<BLOCK_SIZE; x++)
301 {
302 int min=255;
303 int max=0;
304 int y;
305 for(y=0; y<8; y++){
306 int v= src[x + y*stride];
307 if(v>max) max=v;
308 if(v<min) min=v;
309 }
310 if(max-min > 2*QP) return 0;
311 }
cf5ec61d 312 return 1;
cb482d25
MN
313#endif
314}
315
b0ac780a
MN
316static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
317 if( isHorizDC_C(src, stride, c) ){
318 if( isHorizMinMaxOk_C(src, stride, c->QP) )
319 return 1;
320 else
321 return 0;
322 }else{
323 return 2;
324 }
325}
326
cb482d25
MN
327static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
328 if( isVertDC_C(src, stride, c) ){
329 if( isVertMinMaxOk_C(src, stride, c->QP) )
330 return 1;
331 else
332 return 0;
333 }else{
334 return 2;
335 }
cf5ec61d
MN
336}
337
b0ac780a 338static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d
MN
339{
340 int y;
341 for(y=0; y<BLOCK_SIZE; y++)
342 {
b0ac780a 343 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
cf5ec61d 344
b0ac780a 345 if(ABS(middleEnergy) < 8*c->QP)
cf5ec61d
MN
346 {
347 const int q=(dst[3] - dst[4])/2;
348 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
349 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
350
351 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
352 d= MAX(d, 0);
353
354 d= (5*d + 32) >> 6;
355 d*= SIGN(-middleEnergy);
356
357 if(q>0)
358 {
359 d= d<0 ? 0 : d;
360 d= d>q ? q : d;
361 }
362 else
363 {
364 d= d>0 ? 0 : d;
365 d= d<q ? q : d;
366 }
367
368 dst[3]-= d;
369 dst[4]+= d;
370 }
371 dst+= stride;
372 }
373}
374
375/**
376 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
377 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
378 */
b0ac780a 379static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 380{
cf5ec61d
MN
381 int y;
382 for(y=0; y<BLOCK_SIZE; y++)
383 {
b0ac780a
MN
384 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
385 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
cf5ec61d 386
8c8bbd10
MN
387 int sums[10];
388 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
389 sums[1] = sums[0] - first + dst[3];
390 sums[2] = sums[1] - first + dst[4];
391 sums[3] = sums[2] - first + dst[5];
392 sums[4] = sums[3] - first + dst[6];
393 sums[5] = sums[4] - dst[0] + dst[7];
394 sums[6] = sums[5] - dst[1] + last;
395 sums[7] = sums[6] - dst[2] + last;
396 sums[8] = sums[7] - dst[3] + last;
397 sums[9] = sums[8] - dst[4] + last;
398
399 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
400 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
401 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
402 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
403 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
404 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
405 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
406 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
cf5ec61d
MN
407
408 dst+= stride;
409 }
410}
411
4e4dcbc5 412/**
cc9b0679
MN
413 * Experimental Filter 1 (Horizontal)
414 * will not damage linear gradients
415 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
416 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
417 * MMX2 version does correct clipping C version doesnt
418 * not identical with the vertical one
4e4dcbc5 419 */
cc9b0679
MN
420static inline void horizX1Filter(uint8_t *src, int stride, int QP)
421{
117e45b0 422 int y;
cc9b0679
MN
423 static uint64_t *lut= NULL;
424 if(lut==NULL)
117e45b0 425 {
cc9b0679
MN
426 int i;
427 lut= (uint64_t*)memalign(8, 256*8);
428 for(i=0; i<256; i++)
117e45b0 429 {
cc9b0679 430 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 431/*
cc9b0679
MN
432//Simulate 112242211 9-Tap filter
433 uint64_t a= (v/16) & 0xFF;
434 uint64_t b= (v/8) & 0xFF;
435 uint64_t c= (v/4) & 0xFF;
436 uint64_t d= (3*v/8) & 0xFF;
117e45b0 437*/
cc9b0679
MN
438//Simulate piecewise linear interpolation
439 uint64_t a= (v/16) & 0xFF;
440 uint64_t b= (v*3/16) & 0xFF;
441 uint64_t c= (v*5/16) & 0xFF;
442 uint64_t d= (7*v/16) & 0xFF;
443 uint64_t A= (0x100 - a)&0xFF;
444 uint64_t B= (0x100 - b)&0xFF;
445 uint64_t C= (0x100 - c)&0xFF;
446 uint64_t D= (0x100 - c)&0xFF;
447
448 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
449 (D<<24) | (C<<16) | (B<<8) | (A);
450 //lut[i] = (v<<32) | (v<<24);
117e45b0
MN
451 }
452 }
cc9b0679
MN
453
454 for(y=0; y<BLOCK_SIZE; y++)
117e45b0 455 {
cc9b0679
MN
456 int a= src[1] - src[2];
457 int b= src[3] - src[4];
458 int c= src[5] - src[6];
459
460 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
461
462 if(d < QP)
117e45b0 463 {
cc9b0679
MN
464 int v = d * SIGN(-b);
465
466 src[1] +=v/8;
467 src[2] +=v/4;
468 src[3] +=3*v/8;
469 src[4] -=3*v/8;
470 src[5] -=v/4;
471 src[6] -=v/8;
472
117e45b0 473 }
cc9b0679 474 src+=stride;
117e45b0 475 }
cc9b0679
MN
476}
477
12eebd26
MN
478/**
479 * accurate deblock filter
480 */
792a5a7c 481static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
12eebd26
MN
482 int y;
483 const int QP= c->QP;
484 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
485 const int dcThreshold= dcOffset*2 + 1;
486//START_TIMER
487 src+= step*4; // src points to begin of the 8x8 Block
488 for(y=0; y<8; y++){
489 int numEq= 0;
490
491 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
492 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
493 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
494 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
495 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
496 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
497 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
498 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
499 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
500 if(numEq > c->ppMode.flatnessThreshold){
501 int min, max, x;
502
503 if(src[0] > src[step]){
504 max= src[0];
505 min= src[step];
506 }else{
507 max= src[step];
508 min= src[0];
509 }
510 for(x=2; x<8; x+=2){
511 if(src[x*step] > src[(x+1)*step]){
512 if(src[x *step] > max) max= src[ x *step];
513 if(src[(x+1)*step] < min) min= src[(x+1)*step];
514 }else{
515 if(src[(x+1)*step] > max) max= src[(x+1)*step];
516 if(src[ x *step] < min) min= src[ x *step];
517 }
518 }
519 if(max-min < 2*QP){
520 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
521 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
522
523 int sums[10];
524 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
525 sums[1] = sums[0] - first + src[3*step];
526 sums[2] = sums[1] - first + src[4*step];
527 sums[3] = sums[2] - first + src[5*step];
528 sums[4] = sums[3] - first + src[6*step];
529 sums[5] = sums[4] - src[0*step] + src[7*step];
530 sums[6] = sums[5] - src[1*step] + last;
531 sums[7] = sums[6] - src[2*step] + last;
532 sums[8] = sums[7] - src[3*step] + last;
533 sums[9] = sums[8] - src[4*step] + last;
534
535 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
536 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
537 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
538 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
539 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
540 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
541 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
542 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
543 }
544 }else{
545 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
546
547 if(ABS(middleEnergy) < 8*QP)
548 {
549 const int q=(src[3*step] - src[4*step])/2;
550 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
551 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
552
553 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
554 d= MAX(d, 0);
555
556 d= (5*d + 32) >> 6;
557 d*= SIGN(-middleEnergy);
558
559 if(q>0)
560 {
561 d= d<0 ? 0 : d;
562 d= d>q ? q : d;
563 }
564 else
565 {
566 d= d>0 ? 0 : d;
567 d= d<q ? q : d;
568 }
569
570 src[3*step]-= d;
571 src[4*step]+= d;
572 }
573 }
574
575 src += stride;
576 }
577/*if(step==16){
578 STOP_TIMER("step16")
579}else{
580 STOP_TIMER("stepX")
581}*/
582}
cc9b0679 583
e89952aa 584//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 585//Plain C versions
e89952aa
MN
586#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
587#define COMPILE_C
588#endif
589
b0ac780a
MN
590#ifdef ARCH_POWERPC
591#ifdef HAVE_ALTIVEC
592#define COMPILE_ALTIVEC
593#ifndef CONFIG_DARWIN
594#warning "################################################################################"
595#warning "WARNING: No gcc available as of today (2004-05-25) seems to be able to compile properly some of the code under non-Darwin PPC OSes. Some functions result in wrong results, while others simply won't compile (gcc explodes after allocating 1GiB+)."
596#warning "################################################################################"
597#endif //CONFIG_DARWIN
598#endif //HAVE_ALTIVEC
599#endif //ARCH_POWERPC
600
9c9e467d 601#ifdef ARCH_X86
e89952aa
MN
602
603#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
604#define COMPILE_MMX
605#endif
606
607#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
608#define COMPILE_MMX2
609#endif
610
611#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
612#define COMPILE_3DNOW
613#endif
9c9e467d 614#endif //ARCH_X86
e89952aa
MN
615
616#undef HAVE_MMX
617#undef HAVE_MMX2
618#undef HAVE_3DNOW
b0ac780a 619#undef HAVE_ALTIVEC
e89952aa
MN
620#undef ARCH_X86
621
622#ifdef COMPILE_C
cc9b0679
MN
623#undef HAVE_MMX
624#undef HAVE_MMX2
625#undef HAVE_3DNOW
626#undef ARCH_X86
627#define RENAME(a) a ## _C
628#include "postprocess_template.c"
e89952aa 629#endif
cc9b0679 630
b0ac780a
MN
631#ifdef ARCH_POWERPC
632#ifdef COMPILE_ALTIVEC
633#undef RENAME
634#define HAVE_ALTIVEC
635#define RENAME(a) a ## _altivec
636#include "postprocess_altivec_template.c"
637#include "postprocess_template.c"
638#endif
639#endif //ARCH_POWERPC
640
cc9b0679 641//MMX versions
e89952aa 642#ifdef COMPILE_MMX
cc9b0679
MN
643#undef RENAME
644#define HAVE_MMX
645#undef HAVE_MMX2
646#undef HAVE_3DNOW
647#define ARCH_X86
648#define RENAME(a) a ## _MMX
649#include "postprocess_template.c"
e89952aa 650#endif
cc9b0679
MN
651
652//MMX2 versions
e89952aa 653#ifdef COMPILE_MMX2
cc9b0679
MN
654#undef RENAME
655#define HAVE_MMX
656#define HAVE_MMX2
657#undef HAVE_3DNOW
658#define ARCH_X86
659#define RENAME(a) a ## _MMX2
660#include "postprocess_template.c"
e89952aa 661#endif
cc9b0679
MN
662
663//3DNOW versions
e89952aa 664#ifdef COMPILE_3DNOW
cc9b0679
MN
665#undef RENAME
666#define HAVE_MMX
667#undef HAVE_MMX2
668#define HAVE_3DNOW
669#define ARCH_X86
670#define RENAME(a) a ## _3DNow
671#include "postprocess_template.c"
e89952aa 672#endif
cc9b0679
MN
673
674// minor note: the HAVE_xyz is messed up after that line so dont use it
675
676static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
c41d972d 677 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 678{
9c9e467d 679 PPContext *c= (PPContext *)vc;
c41d972d 680 PPMode *ppMode= (PPMode *)vm;
9c9e467d
MN
681 c->ppMode= *ppMode; //FIXME
682
cc9b0679
MN
683 // useing ifs here as they are faster than function pointers allthough the
684 // difference wouldnt be messureable here but its much better because
685 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 686#ifdef RUNTIME_CPUDETECT
9c9e467d 687#ifdef ARCH_X86
cc9b0679 688 // ordered per speed fasterst first
fa6ea14e 689 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
9c9e467d 690 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 691 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
9c9e467d 692 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
fa6ea14e 693 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
9c9e467d 694 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 695 else
9c9e467d 696 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 697#else
b0ac780a
MN
698#ifdef ARCH_POWERPC
699#ifdef HAVE_ALTIVEC
700 else if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
701 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
702 else
703#endif
704#endif
9c9e467d 705 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 706#endif
e89952aa
MN
707#else //RUNTIME_CPUDETECT
708#ifdef HAVE_MMX2
9c9e467d 709 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 710#elif defined (HAVE_3DNOW)
9c9e467d 711 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 712#elif defined (HAVE_MMX)
9c9e467d 713 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
714#elif defined (HAVE_ALTIVEC)
715 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 716#else
9c9e467d 717 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
718#endif
719#endif //!RUNTIME_CPUDETECT
117e45b0
MN
720}
721
cc9b0679
MN
722//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
723// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 724
911879d1 725/* -pp Command line Help
911879d1 726*/
4407a3c4 727char *pp_help=
b01be121 728"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
4b001a13 729"long form example:\n"
b01be121 730"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
4b001a13 731"short form example:\n"
b01be121 732"vb:a/hb:a/lb de,-vb\n"
4b001a13 733"more examples:\n"
1d9324fd 734"tn:64:128:256\n"
4b001a13
MN
735"Filters Options\n"
736"short long name short long option Description\n"
6423d073
MM
737"* * a autoq CPU power dependent enabler\n"
738" c chrom chrominance filtering enabled\n"
739" y nochrom chrominance filtering disabled\n"
740"hb hdeblock (2 threshold) horizontal deblocking filter\n"
68bf295e
MN
741" 1. difference factor: default=32, higher -> more deblocking\n"
742" 2. flatness threshold: default=39, lower -> more deblocking\n"
4b001a13 743" the h & v deblocking filters share these\n"
6423d073
MM
744" so you can't set different thresholds for h / v\n"
745"vb vdeblock (2 threshold) vertical deblocking filter\n"
12eebd26
MN
746"ha hadeblock (2 threshold) horizontal deblocking filter\n"
747"va vadeblock (2 threshold) vertical deblocking filter\n"
6423d073
MM
748"h1 x1hdeblock experimental h deblock filter 1\n"
749"v1 x1vdeblock experimental v deblock filter 1\n"
750"dr dering deringing filter\n"
4b001a13
MN
751"al autolevels automatic brightness / contrast\n"
752" f fullyrange stretch luminance to (0..255)\n"
753"lb linblenddeint linear blend deinterlacer\n"
754"li linipoldeint linear interpolating deinterlace\n"
755"ci cubicipoldeint cubic interpolating deinterlacer\n"
756"md mediandeint median deinterlacer\n"
9c9e467d 757"fd ffmpegdeint ffmpeg deinterlacer\n"
12eebd26
MN
758"de default hb:a,vb:a,dr:a\n"
759"fa fast h1:a,v1:a,dr:a\n"
6423d073 760"tn tmpnoise (3 threshold) temporal noise reducer\n"
4b001a13 761" 1. <= 2. <= 3. larger -> stronger filtering\n"
6423d073 762"fq forceQuant <quantizer> force quantizer\n"
4b001a13 763;
911879d1 764
c41d972d 765pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1
MN
766{
767 char temp[GET_MODE_BUFFER_SIZE];
768 char *p= temp;
9c9e467d 769 char *filterDelimiters= ",/";
911879d1 770 char *optionDelimiters= ":";
c41d972d 771 struct PPMode *ppMode;
911879d1
MN
772 char *filterToken;
773
c41d972d
MN
774 ppMode= memalign(8, sizeof(PPMode));
775
776 ppMode->lumMode= 0;
777 ppMode->chromMode= 0;
778 ppMode->maxTmpNoise[0]= 700;
779 ppMode->maxTmpNoise[1]= 1500;
780 ppMode->maxTmpNoise[2]= 3000;
781 ppMode->maxAllowedY= 234;
782 ppMode->minAllowedY= 16;
68bf295e
MN
783 ppMode->baseDcDiff= 256/8;
784 ppMode->flatnessThreshold= 56-16-1;
c41d972d
MN
785 ppMode->maxClippedThreshold= 0.01;
786 ppMode->error=0;
df8d4d0e 787
911879d1
MN
788 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
789
162c9c2e 790 if(verbose>1) printf("pp: %s\n", name);
117e45b0 791
911879d1 792 for(;;){
911879d1 793 char *filterName;
326d40af 794 int q= 1000000; //PP_QUALITY_MAX;
911879d1
MN
795 int chrom=-1;
796 char *option;
797 char *options[OPTIONS_ARRAY_SIZE];
798 int i;
799 int filterNameOk=0;
800 int numOfUnknownOptions=0;
801 int enable=1; //does the user want us to enabled or disabled the filter
802
803 filterToken= strtok(p, filterDelimiters);
804 if(filterToken == NULL) break;
117e45b0 805 p+= strlen(filterToken) + 1; // p points to next filterToken
911879d1 806 filterName= strtok(filterToken, optionDelimiters);
162c9c2e 807 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
911879d1
MN
808
809 if(*filterName == '-')
810 {
811 enable=0;
812 filterName++;
813 }
117e45b0 814
911879d1
MN
815 for(;;){ //for all options
816 option= strtok(NULL, optionDelimiters);
817 if(option == NULL) break;
818
162c9c2e 819 if(verbose>1) printf("pp: option: %s\n", option);
911879d1
MN
820 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
821 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
822 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
823 else
824 {
825 options[numOfUnknownOptions] = option;
826 numOfUnknownOptions++;
911879d1
MN
827 }
828 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
829 }
117e45b0 830 options[numOfUnknownOptions] = NULL;
911879d1
MN
831
832 /* replace stuff from the replace Table */
833 for(i=0; replaceTable[2*i]!=NULL; i++)
834 {
835 if(!strcmp(replaceTable[2*i], filterName))
836 {
837 int newlen= strlen(replaceTable[2*i + 1]);
838 int plen;
839 int spaceLeft;
840
841 if(p==NULL) p= temp, *p=0; //last filter
842 else p--, *p=','; //not last filter
843
844 plen= strlen(p);
8cd91a44 845 spaceLeft= p - temp + plen;
911879d1
MN
846 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
847 {
c41d972d 848 ppMode->error++;
911879d1
MN
849 break;
850 }
851 memmove(p + newlen, p, plen+1);
852 memcpy(p, replaceTable[2*i + 1], newlen);
853 filterNameOk=1;
854 }
855 }
856
857 for(i=0; filters[i].shortName!=NULL; i++)
858 {
117e45b0 859// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
911879d1
MN
860 if( !strcmp(filters[i].longName, filterName)
861 || !strcmp(filters[i].shortName, filterName))
862 {
c41d972d
MN
863 ppMode->lumMode &= ~filters[i].mask;
864 ppMode->chromMode &= ~filters[i].mask;
911879d1
MN
865
866 filterNameOk=1;
867 if(!enable) break; // user wants to disable it
868
869 if(q >= filters[i].minLumQuality)
c41d972d 870 ppMode->lumMode|= filters[i].mask;
911879d1
MN
871 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
872 if(q >= filters[i].minChromQuality)
c41d972d 873 ppMode->chromMode|= filters[i].mask;
911879d1
MN
874
875 if(filters[i].mask == LEVEL_FIX)
876 {
877 int o;
c41d972d
MN
878 ppMode->minAllowedY= 16;
879 ppMode->maxAllowedY= 234;
911879d1 880 for(o=0; options[o]!=NULL; o++)
07f8991b 881 {
911879d1
MN
882 if( !strcmp(options[o],"fullyrange")
883 ||!strcmp(options[o],"f"))
884 {
c41d972d
MN
885 ppMode->minAllowedY= 0;
886 ppMode->maxAllowedY= 255;
911879d1
MN
887 numOfUnknownOptions--;
888 }
07f8991b 889 }
911879d1 890 }
117e45b0
MN
891 else if(filters[i].mask == TEMP_NOISE_FILTER)
892 {
893 int o;
894 int numOfNoises=0;
117e45b0
MN
895
896 for(o=0; options[o]!=NULL; o++)
897 {
898 char *tail;
c41d972d 899 ppMode->maxTmpNoise[numOfNoises]=
117e45b0
MN
900 strtol(options[o], &tail, 0);
901 if(tail!=options[o])
902 {
903 numOfNoises++;
904 numOfUnknownOptions--;
905 if(numOfNoises >= 3) break;
906 }
907 }
908 }
12eebd26
MN
909 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
910 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
43d52f76
MN
911 {
912 int o;
913
914 for(o=0; options[o]!=NULL && o<2; o++)
915 {
916 char *tail;
917 int val= strtol(options[o], &tail, 0);
918 if(tail==options[o]) break;
919
920 numOfUnknownOptions--;
c41d972d
MN
921 if(o==0) ppMode->baseDcDiff= val;
922 else ppMode->flatnessThreshold= val;
43d52f76
MN
923 }
924 }
8aaac435
MN
925 else if(filters[i].mask == FORCE_QUANT)
926 {
927 int o;
c41d972d 928 ppMode->forcedQuant= 15;
8aaac435
MN
929
930 for(o=0; options[o]!=NULL && o<1; o++)
931 {
932 char *tail;
933 int val= strtol(options[o], &tail, 0);
934 if(tail==options[o]) break;
935
936 numOfUnknownOptions--;
c41d972d 937 ppMode->forcedQuant= val;
8aaac435
MN
938 }
939 }
911879d1
MN
940 }
941 }
c41d972d
MN
942 if(!filterNameOk) ppMode->error++;
943 ppMode->error += numOfUnknownOptions;
911879d1
MN
944 }
945
c41d972d
MN
946 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
947 if(ppMode->error)
948 {
949 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
950 free(ppMode);
951 return NULL;
952 }
911879d1
MN
953 return ppMode;
954}
955
c41d972d
MN
956void pp_free_mode(pp_mode_t *mode){
957 if(mode) free(mode);
958}
959
88c0bc7e
MN
960static void reallocAlign(void **p, int alignment, int size){
961 if(*p) free(*p);
962 *p= memalign(alignment, size);
963 memset(*p, 0, size);
964}
965
0426af31 966static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
ec487e5d
MN
967 int mbWidth = (width+15)>>4;
968 int mbHeight= (height+15)>>4;
88c0bc7e
MN
969 int i;
970
971 c->stride= stride;
0426af31 972 c->qpStride= qpStride;
9c9e467d 973
88c0bc7e
MN
974 reallocAlign((void **)&c->tempDst, 8, stride*24);
975 reallocAlign((void **)&c->tempSrc, 8, stride*24);
976 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
977 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
9c9e467d
MN
978 for(i=0; i<256; i++)
979 c->yHistogram[i]= width*height/64*15/256;
980
981 for(i=0; i<3; i++)
211c4920 982 {
9c9e467d 983 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
88c0bc7e
MN
984 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
985 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
211c4920 986 }
45b4f285 987
134eb1e5 988 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
0426af31
MN
989 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
990 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
88c0bc7e
MN
991 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
992}
993
4cfbf61b 994static void global_init(void){
134eb1e5
MN
995 int i;
996 memset(clip_table, 0, 256);
997 for(i=256; i<512; i++)
998 clip_table[i]= i;
999 memset(clip_table+512, 0, 256);
1000}
1001
88c0bc7e
MN
1002pp_context_t *pp_get_context(int width, int height, int cpuCaps){
1003 PPContext *c= memalign(32, sizeof(PPContext));
88c0bc7e 1004 int stride= (width+15)&(~15); //assumed / will realloc if needed
0426af31 1005 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
88c0bc7e 1006
134eb1e5
MN
1007 global_init();
1008
88c0bc7e
MN
1009 memset(c, 0, sizeof(PPContext));
1010 c->cpuCaps= cpuCaps;
e9effafd
MN
1011 if(cpuCaps&PP_FORMAT){
1012 c->hChromaSubSample= cpuCaps&0x3;
1013 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1014 }else{
1015 c->hChromaSubSample= 1;
1016 c->vChromaSubSample= 1;
1017 }
88c0bc7e 1018
0426af31 1019 reallocBuffers(c, width, height, stride, qpStride);
88c0bc7e 1020
9c9e467d 1021 c->frameNum=-1;
45b4f285 1022
9c9e467d 1023 return c;
45b4f285
MN
1024}
1025
9cb54f43 1026void pp_free_context(void *vc){
9c9e467d
MN
1027 PPContext *c = (PPContext*)vc;
1028 int i;
1029
1030 for(i=0; i<3; i++) free(c->tempBlured[i]);
1031 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
1032
1033 free(c->tempBlocks);
1034 free(c->yHistogram);
1035 free(c->tempDst);
1036 free(c->tempSrc);
9c9e467d 1037 free(c->deintTemp);
0426af31 1038 free(c->stdQPTable);
ec487e5d 1039 free(c->nonBQPTable);
88c0bc7e
MN
1040 free(c->forcedQPTable);
1041
1042 memset(c, 0, sizeof(PPContext));
1043
9c9e467d
MN
1044 free(c);
1045}
1046
9cb54f43 1047void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1048 uint8_t * dst[3], int dstStride[3],
ec487e5d 1049 int width, int height,
9c9e467d 1050 QP_STORE_T *QP_store, int QPStride,
c41d972d 1051 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1052{
ec487e5d
MN
1053 int mbWidth = (width+15)>>4;
1054 int mbHeight= (height+15)>>4;
c41d972d 1055 PPMode *mode = (PPMode*)vm;
ec487e5d 1056 PPContext *c = (PPContext*)vc;
88c0bc7e 1057 int minStride= MAX(srcStride[0], dstStride[0]);
0426af31
MN
1058
1059 if(c->stride < minStride || c->qpStride < QPStride)
1060 reallocBuffers(c, width, height,
1061 MAX(minStride, c->stride),
1062 MAX(c->qpStride, QPStride));
9c9e467d 1063
8aaac435 1064 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
815cbfe7 1065 {
8aaac435 1066 int i;
88c0bc7e 1067 QP_store= c->forcedQPTable;
9c9e467d 1068 QPStride= 0;
8aaac435 1069 if(mode->lumMode & FORCE_QUANT)
88c0bc7e 1070 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
8aaac435 1071 else
88c0bc7e 1072 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
815cbfe7 1073 }
0426af31
MN
1074//printf("pict_type:%d\n", pict_type);
1075
1076 if(pict_type & PP_PICT_TYPE_QP2){
1077 int i;
1078 const int count= mbHeight * QPStride;
1079 for(i=0; i<(count>>2); i++){
1080 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1081 }
1082 for(i<<=2; i<count; i++){
1083 c->stdQPTable[i] = QP_store[i]>>1;
1084 }
1085 QP_store= c->stdQPTable;
1086 }
1087
ec487e5d
MN
1088if(0){
1089int x,y;
1090for(y=0; y<mbHeight; y++){
1091 for(x=0; x<mbWidth; x++){
1092 printf("%2d ", QP_store[x + y*QPStride]);
1093 }
1094 printf("\n");
1095}
1096 printf("\n");
1097}
51e19dcc 1098
0426af31 1099 if((pict_type&7)!=3)
ec487e5d 1100 {
0426af31
MN
1101 int i;
1102 const int count= mbHeight * QPStride;
1103 for(i=0; i<(count>>2); i++){
2e90b37c 1104 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
0426af31
MN
1105 }
1106 for(i<<=2; i<count; i++){
2e90b37c 1107 c->nonBQPTable[i] = QP_store[i] & 0x3F;
ec487e5d
MN
1108 }
1109 }
815cbfe7 1110
df8d4d0e 1111 if(verbose>2)
162c9c2e
MN
1112 {
1113 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
162c9c2e
MN
1114 }
1115
9c9e467d 1116 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
b2a3fcb7 1117 width, height, QP_store, QPStride, 0, mode, c);
911879d1 1118
e9effafd
MN
1119 width = (width )>>c->hChromaSubSample;
1120 height = (height)>>c->vChromaSubSample;
911879d1 1121
4e1349d4
MN
1122 if(mode->chromMode)
1123 {
9c9e467d 1124 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
b2a3fcb7 1125 width, height, QP_store, QPStride, 1, mode, c);
9c9e467d 1126 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
b2a3fcb7 1127 width, height, QP_store, QPStride, 2, mode, c);
4e1349d4 1128 }
9c9e467d 1129 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
4e1349d4 1130 {
ec487e5d
MN
1131 memcpy(dst[1], src[1], srcStride[1]*height);
1132 memcpy(dst[2], src[2], srcStride[2]*height);
4e1349d4
MN
1133 }
1134 else
1135 {
1136 int y;
ec487e5d 1137 for(y=0; y<height; y++)
4e1349d4 1138 {
ec487e5d
MN
1139 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1140 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
4e1349d4
MN
1141 }
1142 }
911879d1
MN
1143}
1144