Fix default license header.
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
117e45b0
MN
50* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 52E = Exact implementation
acced553 53e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
6ab6c7c3 77#include "avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
dda87e9f
PL
82#ifdef HAVE_MALLOC_H
83#include <malloc.h>
84#endif
3057fa66 85//#undef HAVE_MMX2
13e00528 86//#define HAVE_3DNOW
3057fa66 87//#undef HAVE_MMX
cc9b0679 88//#undef ARCH_X86
7f16f6e6 89//#define DEBUG_BRIGHTNESS
bba9b16c 90#ifdef USE_FASTMEMCPY
f4bd289a 91#include "libvo/fastmemcpy.h"
70d4f2da 92#endif
13e00528 93#include "postprocess.h"
c41d972d 94#include "postprocess_internal.h"
bba9b16c
MN
95
96#include "mangle.h" //FIXME should be supressed
3057fa66 97
a7b2871c
RD
98#ifdef HAVE_ALTIVEC_H
99#include <altivec.h>
100#endif
101
e939e1c3
A
102#define MIN(a,b) ((a) > (b) ? (b) : (a))
103#define MAX(a,b) ((a) < (b) ? (b) : (a))
104#define ABS(a) ((a) > 0 ? (a) : (-(a)))
105#define SIGN(a) ((a) > 0 ? 1 : -1)
106
911879d1
MN
107#define GET_MODE_BUFFER_SIZE 500
108#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
109#define BLOCK_SIZE 8
110#define TEMP_STRIDE 8
111//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 112
3f1d4e96
DB
113#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
114# define attribute_used __attribute__((used))
12eebd26 115# define always_inline __attribute__((always_inline)) inline
3f1d4e96
DB
116#else
117# define attribute_used
12eebd26 118# define always_inline inline
3f1d4e96
DB
119#endif
120
053dea12 121#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
122static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
123static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
124static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
125static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
126static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
127static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
128static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
129static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 130#endif
3057fa66 131
134eb1e5
MN
132static uint8_t clip_table[3*256];
133static uint8_t * const clip_tab= clip_table + 256;
134
4df8ca9d 135static const int verbose= 0;
45b4f285 136
3f1d4e96 137static const int attribute_used deringThreshold= 20;
3057fa66 138
9c9e467d 139
911879d1
MN
140static struct PPFilter filters[]=
141{
bb270c08
DB
142 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
143 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
144/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
145 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
146 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
147 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
148 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
149 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
150 {"dr", "dering", 1, 5, 6, DERING},
151 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
152 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
153 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
154 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
155 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
156 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
157 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
158 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
159 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
160 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
161};
162
7b49ce2e 163static const char *replaceTable[]=
911879d1 164{
bb270c08
DB
165 "default", "hdeblock:a,vdeblock:a,dering:a",
166 "de", "hdeblock:a,vdeblock:a,dering:a",
167 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
168 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
169 "ac", "ha:a:128:7,va:a,dering:a",
170 NULL //End Marker
911879d1
MN
171};
172
3057fa66 173
053dea12 174#if defined(ARCH_X86) || defined(ARCH_X86_64)
3057fa66
A
175static inline void prefetchnta(void *p)
176{
bb270c08
DB
177 asm volatile( "prefetchnta (%0)\n\t"
178 : : "r" (p)
179 );
3057fa66
A
180}
181
182static inline void prefetcht0(void *p)
183{
bb270c08
DB
184 asm volatile( "prefetcht0 (%0)\n\t"
185 : : "r" (p)
186 );
3057fa66
A
187}
188
189static inline void prefetcht1(void *p)
190{
bb270c08
DB
191 asm volatile( "prefetcht1 (%0)\n\t"
192 : : "r" (p)
193 );
3057fa66
A
194}
195
196static inline void prefetcht2(void *p)
197{
bb270c08
DB
198 asm volatile( "prefetcht2 (%0)\n\t"
199 : : "r" (p)
200 );
3057fa66 201}
9a722af7 202#endif
3057fa66 203
cc9b0679 204// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 205
cf5ec61d
MN
206/**
207 * Check if the given 8x8 Block is mostly "flat"
208 */
b0ac780a 209static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 210{
bb270c08
DB
211 int numEq= 0;
212 int y;
213 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
214 const int dcThreshold= dcOffset*2 + 1;
215
216 for(y=0; y<BLOCK_SIZE; y++)
217 {
218 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
219 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
225 src+= stride;
226 }
227 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
228}
229
230/**
231 * Check if the middle 8x8 Block in the given 8x16 block is flat
232 */
233static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
234 int numEq= 0;
235 int y;
236 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
237 const int dcThreshold= dcOffset*2 + 1;
238
239 src+= stride*4; // src points to begin of the 8x8 Block
240 for(y=0; y<BLOCK_SIZE-1; y++)
241 {
242 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
243 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
244 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
245 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
246 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
247 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
248 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
249 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
250 src+= stride;
251 }
252 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
253}
254
b0ac780a 255static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 256{
bb270c08 257 int i;
cb482d25 258#if 1
bb270c08
DB
259 for(i=0; i<2; i++){
260 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
261 src += stride;
262 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
263 src += stride;
264 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
265 src += stride;
266 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
267 src += stride;
268 }
115329f1 269#else
bb270c08
DB
270 for(i=0; i<8; i++){
271 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
272 src += stride;
273 }
cb482d25 274#endif
bb270c08 275 return 1;
cb482d25 276}
cf5ec61d 277
cb482d25
MN
278static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
279{
280#if 1
281#if 1
bb270c08
DB
282 int x;
283 src+= stride*4;
284 for(x=0; x<BLOCK_SIZE; x+=4)
285 {
286 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
287 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
288 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
289 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
290 }
cb482d25 291#else
bb270c08
DB
292 int x;
293 src+= stride*3;
294 for(x=0; x<BLOCK_SIZE; x++)
295 {
296 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
297 }
cb482d25 298#endif
bb270c08 299 return 1;
cb482d25 300#else
bb270c08
DB
301 int x;
302 src+= stride*4;
303 for(x=0; x<BLOCK_SIZE; x++)
304 {
305 int min=255;
306 int max=0;
307 int y;
308 for(y=0; y<8; y++){
309 int v= src[x + y*stride];
310 if(v>max) max=v;
311 if(v<min) min=v;
312 }
313 if(max-min > 2*QP) return 0;
314 }
315 return 1;
cb482d25
MN
316#endif
317}
318
b0ac780a 319static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
320 if( isHorizDC_C(src, stride, c) ){
321 if( isHorizMinMaxOk_C(src, stride, c->QP) )
322 return 1;
323 else
324 return 0;
325 }else{
326 return 2;
327 }
b0ac780a
MN
328}
329
cb482d25 330static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
331 if( isVertDC_C(src, stride, c) ){
332 if( isVertMinMaxOk_C(src, stride, c->QP) )
333 return 1;
334 else
335 return 0;
336 }else{
337 return 2;
338 }
cf5ec61d
MN
339}
340
b0ac780a 341static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 342{
bb270c08
DB
343 int y;
344 for(y=0; y<BLOCK_SIZE; y++)
345 {
346 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
347
348 if(ABS(middleEnergy) < 8*c->QP)
349 {
350 const int q=(dst[3] - dst[4])/2;
351 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
352 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
353
354 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
355 d= MAX(d, 0);
356
357 d= (5*d + 32) >> 6;
358 d*= SIGN(-middleEnergy);
359
360 if(q>0)
361 {
362 d= d<0 ? 0 : d;
363 d= d>q ? q : d;
364 }
365 else
366 {
367 d= d>0 ? 0 : d;
368 d= d<q ? q : d;
369 }
370
371 dst[3]-= d;
372 dst[4]+= d;
373 }
374 dst+= stride;
375 }
cf5ec61d
MN
376}
377
378/**
379 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
380 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
381 */
b0ac780a 382static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 383{
bb270c08
DB
384 int y;
385 for(y=0; y<BLOCK_SIZE; y++)
386 {
387 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
388 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
389
390 int sums[10];
391 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
392 sums[1] = sums[0] - first + dst[3];
393 sums[2] = sums[1] - first + dst[4];
394 sums[3] = sums[2] - first + dst[5];
395 sums[4] = sums[3] - first + dst[6];
396 sums[5] = sums[4] - dst[0] + dst[7];
397 sums[6] = sums[5] - dst[1] + last;
398 sums[7] = sums[6] - dst[2] + last;
399 sums[8] = sums[7] - dst[3] + last;
400 sums[9] = sums[8] - dst[4] + last;
401
402 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
403 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
404 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
405 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
406 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
407 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
408 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
409 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
410
411 dst+= stride;
412 }
cf5ec61d
MN
413}
414
4e4dcbc5 415/**
cc9b0679
MN
416 * Experimental Filter 1 (Horizontal)
417 * will not damage linear gradients
418 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
419 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
420 * MMX2 version does correct clipping C version doesnt
421 * not identical with the vertical one
4e4dcbc5 422 */
cc9b0679
MN
423static inline void horizX1Filter(uint8_t *src, int stride, int QP)
424{
bb270c08
DB
425 int y;
426 static uint64_t *lut= NULL;
427 if(lut==NULL)
428 {
429 int i;
6ab6c7c3 430 lut = av_malloc(256*8);
bb270c08
DB
431 for(i=0; i<256; i++)
432 {
433 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 434/*
cc9b0679 435//Simulate 112242211 9-Tap filter
bb270c08
DB
436 uint64_t a= (v/16) & 0xFF;
437 uint64_t b= (v/8) & 0xFF;
438 uint64_t c= (v/4) & 0xFF;
439 uint64_t d= (3*v/8) & 0xFF;
117e45b0 440*/
cc9b0679 441//Simulate piecewise linear interpolation
bb270c08
DB
442 uint64_t a= (v/16) & 0xFF;
443 uint64_t b= (v*3/16) & 0xFF;
444 uint64_t c= (v*5/16) & 0xFF;
445 uint64_t d= (7*v/16) & 0xFF;
446 uint64_t A= (0x100 - a)&0xFF;
447 uint64_t B= (0x100 - b)&0xFF;
448 uint64_t C= (0x100 - c)&0xFF;
449 uint64_t D= (0x100 - c)&0xFF;
450
451 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
452 (D<<24) | (C<<16) | (B<<8) | (A);
453 //lut[i] = (v<<32) | (v<<24);
454 }
455 }
456
457 for(y=0; y<BLOCK_SIZE; y++)
458 {
459 int a= src[1] - src[2];
460 int b= src[3] - src[4];
461 int c= src[5] - src[6];
462
463 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
464
465 if(d < QP)
466 {
467 int v = d * SIGN(-b);
468
469 src[1] +=v/8;
470 src[2] +=v/4;
471 src[3] +=3*v/8;
472 src[4] -=3*v/8;
473 src[5] -=v/4;
474 src[6] -=v/8;
475
476 }
477 src+=stride;
478 }
cc9b0679
MN
479}
480
12eebd26
MN
481/**
482 * accurate deblock filter
483 */
792a5a7c 484static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
bb270c08
DB
485 int y;
486 const int QP= c->QP;
487 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
488 const int dcThreshold= dcOffset*2 + 1;
12eebd26 489//START_TIMER
bb270c08
DB
490 src+= step*4; // src points to begin of the 8x8 Block
491 for(y=0; y<8; y++){
492 int numEq= 0;
493
494 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
495 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
496 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
497 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
498 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
499 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
500 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
501 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
502 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
503 if(numEq > c->ppMode.flatnessThreshold){
504 int min, max, x;
505
506 if(src[0] > src[step]){
507 max= src[0];
508 min= src[step];
509 }else{
510 max= src[step];
511 min= src[0];
512 }
513 for(x=2; x<8; x+=2){
514 if(src[x*step] > src[(x+1)*step]){
515 if(src[x *step] > max) max= src[ x *step];
516 if(src[(x+1)*step] < min) min= src[(x+1)*step];
517 }else{
518 if(src[(x+1)*step] > max) max= src[(x+1)*step];
519 if(src[ x *step] < min) min= src[ x *step];
520 }
521 }
522 if(max-min < 2*QP){
523 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
524 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
525
526 int sums[10];
527 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
528 sums[1] = sums[0] - first + src[3*step];
529 sums[2] = sums[1] - first + src[4*step];
530 sums[3] = sums[2] - first + src[5*step];
531 sums[4] = sums[3] - first + src[6*step];
532 sums[5] = sums[4] - src[0*step] + src[7*step];
533 sums[6] = sums[5] - src[1*step] + last;
534 sums[7] = sums[6] - src[2*step] + last;
535 sums[8] = sums[7] - src[3*step] + last;
536 sums[9] = sums[8] - src[4*step] + last;
537
538 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
539 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
540 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
541 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
542 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
543 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
544 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
545 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
546 }
547 }else{
548 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
549
550 if(ABS(middleEnergy) < 8*QP)
551 {
552 const int q=(src[3*step] - src[4*step])/2;
553 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
554 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
555
556 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
557 d= MAX(d, 0);
558
559 d= (5*d + 32) >> 6;
560 d*= SIGN(-middleEnergy);
561
562 if(q>0)
563 {
564 d= d<0 ? 0 : d;
565 d= d>q ? q : d;
566 }
567 else
568 {
569 d= d>0 ? 0 : d;
570 d= d<q ? q : d;
571 }
572
573 src[3*step]-= d;
574 src[4*step]+= d;
575 }
576 }
577
578 src += stride;
579 }
12eebd26
MN
580/*if(step==16){
581 STOP_TIMER("step16")
582}else{
583 STOP_TIMER("stepX")
584}*/
585}
cc9b0679 586
e89952aa 587//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 588//Plain C versions
e89952aa
MN
589#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
590#define COMPILE_C
591#endif
592
b0ac780a
MN
593#ifdef ARCH_POWERPC
594#ifdef HAVE_ALTIVEC
595#define COMPILE_ALTIVEC
b0ac780a
MN
596#endif //HAVE_ALTIVEC
597#endif //ARCH_POWERPC
598
053dea12 599#if defined(ARCH_X86) || defined(ARCH_X86_64)
e89952aa
MN
600
601#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
602#define COMPILE_MMX
603#endif
604
605#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
606#define COMPILE_MMX2
607#endif
608
609#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
610#define COMPILE_3DNOW
611#endif
9c9e467d 612#endif //ARCH_X86
e89952aa
MN
613
614#undef HAVE_MMX
615#undef HAVE_MMX2
616#undef HAVE_3DNOW
b0ac780a 617#undef HAVE_ALTIVEC
e89952aa
MN
618
619#ifdef COMPILE_C
cc9b0679
MN
620#undef HAVE_MMX
621#undef HAVE_MMX2
622#undef HAVE_3DNOW
cc9b0679
MN
623#define RENAME(a) a ## _C
624#include "postprocess_template.c"
e89952aa 625#endif
cc9b0679 626
b0ac780a
MN
627#ifdef ARCH_POWERPC
628#ifdef COMPILE_ALTIVEC
629#undef RENAME
630#define HAVE_ALTIVEC
631#define RENAME(a) a ## _altivec
632#include "postprocess_altivec_template.c"
633#include "postprocess_template.c"
634#endif
635#endif //ARCH_POWERPC
636
cc9b0679 637//MMX versions
e89952aa 638#ifdef COMPILE_MMX
cc9b0679
MN
639#undef RENAME
640#define HAVE_MMX
641#undef HAVE_MMX2
642#undef HAVE_3DNOW
cc9b0679
MN
643#define RENAME(a) a ## _MMX
644#include "postprocess_template.c"
e89952aa 645#endif
cc9b0679
MN
646
647//MMX2 versions
e89952aa 648#ifdef COMPILE_MMX2
cc9b0679
MN
649#undef RENAME
650#define HAVE_MMX
651#define HAVE_MMX2
652#undef HAVE_3DNOW
cc9b0679
MN
653#define RENAME(a) a ## _MMX2
654#include "postprocess_template.c"
e89952aa 655#endif
cc9b0679
MN
656
657//3DNOW versions
e89952aa 658#ifdef COMPILE_3DNOW
cc9b0679
MN
659#undef RENAME
660#define HAVE_MMX
661#undef HAVE_MMX2
662#define HAVE_3DNOW
cc9b0679
MN
663#define RENAME(a) a ## _3DNow
664#include "postprocess_template.c"
e89952aa 665#endif
cc9b0679
MN
666
667// minor note: the HAVE_xyz is messed up after that line so dont use it
668
669static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 670 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 671{
bb270c08
DB
672 PPContext *c= (PPContext *)vc;
673 PPMode *ppMode= (PPMode *)vm;
674 c->ppMode= *ppMode; //FIXME
9c9e467d 675
bb270c08
DB
676 // useing ifs here as they are faster than function pointers allthough the
677 // difference wouldnt be messureable here but its much better because
678 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 679#ifdef RUNTIME_CPUDETECT
053dea12 680#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
681 // ordered per speed fasterst first
682 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
683 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
684 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
685 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
686 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
687 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688 else
689 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 690#else
b0ac780a
MN
691#ifdef ARCH_POWERPC
692#ifdef HAVE_ALTIVEC
71487254 693 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
bb270c08 694 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
695 else
696#endif
697#endif
bb270c08 698 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 699#endif
e89952aa
MN
700#else //RUNTIME_CPUDETECT
701#ifdef HAVE_MMX2
bb270c08 702 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 703#elif defined (HAVE_3DNOW)
bb270c08 704 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 705#elif defined (HAVE_MMX)
bb270c08 706 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 707#elif defined (HAVE_ALTIVEC)
bb270c08 708 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 709#else
bb270c08 710 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
711#endif
712#endif //!RUNTIME_CPUDETECT
117e45b0
MN
713}
714
cc9b0679 715//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 716// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 717
911879d1 718/* -pp Command line Help
911879d1 719*/
4407a3c4 720char *pp_help=
bf69c4e5 721"Available postprocessing filters:\n"
bb270c08
DB
722"Filters Options\n"
723"short long name short long option Description\n"
724"* * a autoq CPU power dependent enabler\n"
725" c chrom chrominance filtering enabled\n"
726" y nochrom chrominance filtering disabled\n"
727" n noluma luma filtering disabled\n"
728"hb hdeblock (2 threshold) horizontal deblocking filter\n"
729" 1. difference factor: default=32, higher -> more deblocking\n"
730" 2. flatness threshold: default=39, lower -> more deblocking\n"
731" the h & v deblocking filters share these\n"
732" so you can't set different thresholds for h / v\n"
733"vb vdeblock (2 threshold) vertical deblocking filter\n"
734"ha hadeblock (2 threshold) horizontal deblocking filter\n"
735"va vadeblock (2 threshold) vertical deblocking filter\n"
736"h1 x1hdeblock experimental h deblock filter 1\n"
737"v1 x1vdeblock experimental v deblock filter 1\n"
738"dr dering deringing filter\n"
739"al autolevels automatic brightness / contrast\n"
740" f fullyrange stretch luminance to (0..255)\n"
741"lb linblenddeint linear blend deinterlacer\n"
742"li linipoldeint linear interpolating deinterlace\n"
743"ci cubicipoldeint cubic interpolating deinterlacer\n"
744"md mediandeint median deinterlacer\n"
745"fd ffmpegdeint ffmpeg deinterlacer\n"
746"l5 lowpass5 FIR lowpass deinterlacer\n"
747"de default hb:a,vb:a,dr:a\n"
748"fa fast h1:a,v1:a,dr:a\n"
749"ac ha:a:128:7,va:a,dr:a\n"
750"tn tmpnoise (3 threshold) temporal noise reducer\n"
751" 1. <= 2. <= 3. larger -> stronger filtering\n"
752"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
753"Usage:\n"
754"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
755"long form example:\n"
bb270c08 756"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 757"short form example:\n"
bb270c08 758"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
759"more examples:\n"
760"tn:64:128:256\n"
14b005d0 761"\n"
4b001a13 762;
911879d1 763
c41d972d 764pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1 765{
bb270c08
DB
766 char temp[GET_MODE_BUFFER_SIZE];
767 char *p= temp;
7b49ce2e
SH
768 const char *filterDelimiters= ",/";
769 const char *optionDelimiters= ":";
bb270c08
DB
770 struct PPMode *ppMode;
771 char *filterToken;
772
6ab6c7c3 773 ppMode= av_malloc(sizeof(PPMode));
bb270c08
DB
774
775 ppMode->lumMode= 0;
776 ppMode->chromMode= 0;
777 ppMode->maxTmpNoise[0]= 700;
778 ppMode->maxTmpNoise[1]= 1500;
779 ppMode->maxTmpNoise[2]= 3000;
780 ppMode->maxAllowedY= 234;
781 ppMode->minAllowedY= 16;
782 ppMode->baseDcDiff= 256/8;
783 ppMode->flatnessThreshold= 56-16-1;
784 ppMode->maxClippedThreshold= 0.01;
785 ppMode->error=0;
786
787 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
788
789 if(verbose>1) printf("pp: %s\n", name);
790
791 for(;;){
792 char *filterName;
793 int q= 1000000; //PP_QUALITY_MAX;
794 int chrom=-1;
795 int luma=-1;
796 char *option;
797 char *options[OPTIONS_ARRAY_SIZE];
798 int i;
799 int filterNameOk=0;
800 int numOfUnknownOptions=0;
801 int enable=1; //does the user want us to enabled or disabled the filter
802
803 filterToken= strtok(p, filterDelimiters);
804 if(filterToken == NULL) break;
805 p+= strlen(filterToken) + 1; // p points to next filterToken
806 filterName= strtok(filterToken, optionDelimiters);
807 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
808
809 if(*filterName == '-')
810 {
811 enable=0;
812 filterName++;
813 }
814
815 for(;;){ //for all options
816 option= strtok(NULL, optionDelimiters);
817 if(option == NULL) break;
818
819 if(verbose>1) printf("pp: option: %s\n", option);
820 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
821 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
822 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
823 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
824 else
825 {
826 options[numOfUnknownOptions] = option;
827 numOfUnknownOptions++;
828 }
829 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
830 }
831 options[numOfUnknownOptions] = NULL;
832
833 /* replace stuff from the replace Table */
834 for(i=0; replaceTable[2*i]!=NULL; i++)
835 {
836 if(!strcmp(replaceTable[2*i], filterName))
837 {
838 int newlen= strlen(replaceTable[2*i + 1]);
839 int plen;
840 int spaceLeft;
841
842 if(p==NULL) p= temp, *p=0; //last filter
843 else p--, *p=','; //not last filter
844
845 plen= strlen(p);
846 spaceLeft= p - temp + plen;
847 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
848 {
849 ppMode->error++;
850 break;
851 }
852 memmove(p + newlen, p, plen+1);
853 memcpy(p, replaceTable[2*i + 1], newlen);
854 filterNameOk=1;
855 }
856 }
857
858 for(i=0; filters[i].shortName!=NULL; i++)
859 {
860// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
861 if( !strcmp(filters[i].longName, filterName)
862 || !strcmp(filters[i].shortName, filterName))
863 {
864 ppMode->lumMode &= ~filters[i].mask;
865 ppMode->chromMode &= ~filters[i].mask;
866
867 filterNameOk=1;
868 if(!enable) break; // user wants to disable it
869
870 if(q >= filters[i].minLumQuality && luma)
871 ppMode->lumMode|= filters[i].mask;
872 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
873 if(q >= filters[i].minChromQuality)
874 ppMode->chromMode|= filters[i].mask;
875
876 if(filters[i].mask == LEVEL_FIX)
877 {
878 int o;
879 ppMode->minAllowedY= 16;
880 ppMode->maxAllowedY= 234;
881 for(o=0; options[o]!=NULL; o++)
882 {
883 if( !strcmp(options[o],"fullyrange")
884 ||!strcmp(options[o],"f"))
885 {
886 ppMode->minAllowedY= 0;
887 ppMode->maxAllowedY= 255;
888 numOfUnknownOptions--;
889 }
890 }
891 }
892 else if(filters[i].mask == TEMP_NOISE_FILTER)
893 {
894 int o;
895 int numOfNoises=0;
896
897 for(o=0; options[o]!=NULL; o++)
898 {
899 char *tail;
900 ppMode->maxTmpNoise[numOfNoises]=
901 strtol(options[o], &tail, 0);
902 if(tail!=options[o])
903 {
904 numOfNoises++;
905 numOfUnknownOptions--;
906 if(numOfNoises >= 3) break;
907 }
908 }
909 }
910 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
911 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
912 {
913 int o;
914
915 for(o=0; options[o]!=NULL && o<2; o++)
916 {
917 char *tail;
918 int val= strtol(options[o], &tail, 0);
919 if(tail==options[o]) break;
920
921 numOfUnknownOptions--;
922 if(o==0) ppMode->baseDcDiff= val;
923 else ppMode->flatnessThreshold= val;
924 }
925 }
926 else if(filters[i].mask == FORCE_QUANT)
927 {
928 int o;
929 ppMode->forcedQuant= 15;
930
931 for(o=0; options[o]!=NULL && o<1; o++)
932 {
933 char *tail;
934 int val= strtol(options[o], &tail, 0);
935 if(tail==options[o]) break;
936
937 numOfUnknownOptions--;
938 ppMode->forcedQuant= val;
939 }
940 }
941 }
942 }
943 if(!filterNameOk) ppMode->error++;
944 ppMode->error += numOfUnknownOptions;
945 }
946
947 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
948 if(ppMode->error)
949 {
950 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
6ab6c7c3 951 av_free(ppMode);
bb270c08
DB
952 return NULL;
953 }
954 return ppMode;
911879d1
MN
955}
956
c41d972d 957void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 958 av_free(mode);
c41d972d
MN
959}
960
88c0bc7e 961static void reallocAlign(void **p, int alignment, int size){
4851f2ad 962 av_free(*p);
6ab6c7c3 963 *p= av_mallocz(size);
88c0bc7e
MN
964}
965
0426af31 966static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
bb270c08
DB
967 int mbWidth = (width+15)>>4;
968 int mbHeight= (height+15)>>4;
969 int i;
970
971 c->stride= stride;
972 c->qpStride= qpStride;
973
974 reallocAlign((void **)&c->tempDst, 8, stride*24);
975 reallocAlign((void **)&c->tempSrc, 8, stride*24);
976 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
977 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
978 for(i=0; i<256; i++)
979 c->yHistogram[i]= width*height/64*15/256;
980
981 for(i=0; i<3; i++)
982 {
983 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
984 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
985 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
986 }
987
988 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
989 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
990 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
991 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
992}
993
4cfbf61b 994static void global_init(void){
bb270c08
DB
995 int i;
996 memset(clip_table, 0, 256);
997 for(i=256; i<512; i++)
998 clip_table[i]= i;
999 memset(clip_table+512, 0, 256);
134eb1e5
MN
1000}
1001
88c0bc7e 1002pp_context_t *pp_get_context(int width, int height, int cpuCaps){
6ab6c7c3 1003 PPContext *c= av_malloc(sizeof(PPContext));
bb270c08
DB
1004 int stride= (width+15)&(~15); //assumed / will realloc if needed
1005 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 1006
bb270c08 1007 global_init();
134eb1e5 1008
bb270c08
DB
1009 memset(c, 0, sizeof(PPContext));
1010 c->cpuCaps= cpuCaps;
1011 if(cpuCaps&PP_FORMAT){
1012 c->hChromaSubSample= cpuCaps&0x3;
1013 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1014 }else{
1015 c->hChromaSubSample= 1;
1016 c->vChromaSubSample= 1;
1017 }
88c0bc7e 1018
bb270c08 1019 reallocBuffers(c, width, height, stride, qpStride);
115329f1 1020
bb270c08 1021 c->frameNum=-1;
45b4f285 1022
bb270c08 1023 return c;
45b4f285
MN
1024}
1025
9cb54f43 1026void pp_free_context(void *vc){
bb270c08
DB
1027 PPContext *c = (PPContext*)vc;
1028 int i;
115329f1 1029
6ab6c7c3
LB
1030 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1031 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
115329f1 1032
6ab6c7c3
LB
1033 av_free(c->tempBlocks);
1034 av_free(c->yHistogram);
1035 av_free(c->tempDst);
1036 av_free(c->tempSrc);
1037 av_free(c->deintTemp);
1038 av_free(c->stdQPTable);
1039 av_free(c->nonBQPTable);
1040 av_free(c->forcedQPTable);
115329f1 1041
bb270c08 1042 memset(c, 0, sizeof(PPContext));
88c0bc7e 1043
6ab6c7c3 1044 av_free(c);
9c9e467d
MN
1045}
1046
9cb54f43 1047void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1048 uint8_t * dst[3], int dstStride[3],
ec487e5d 1049 int width, int height,
9c9e467d 1050 QP_STORE_T *QP_store, int QPStride,
bb270c08 1051 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1052{
bb270c08
DB
1053 int mbWidth = (width+15)>>4;
1054 int mbHeight= (height+15)>>4;
1055 PPMode *mode = (PPMode*)vm;
1056 PPContext *c = (PPContext*)vc;
1057 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1058 int absQPStride = ABS(QPStride);
1059
1060 // c->stride and c->QPStride are always positive
1061 if(c->stride < minStride || c->qpStride < absQPStride)
1062 reallocBuffers(c, width, height,
1063 MAX(minStride, c->stride),
1064 MAX(c->qpStride, absQPStride));
1065
1066 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1067 {
1068 int i;
1069 QP_store= c->forcedQPTable;
1070 absQPStride = QPStride = 0;
1071 if(mode->lumMode & FORCE_QUANT)
1072 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1073 else
1074 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1075 }
0426af31
MN
1076//printf("pict_type:%d\n", pict_type);
1077
bb270c08
DB
1078 if(pict_type & PP_PICT_TYPE_QP2){
1079 int i;
1080 const int count= mbHeight * absQPStride;
1081 for(i=0; i<(count>>2); i++){
1082 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1083 }
1084 for(i<<=2; i<count; i++){
1085 c->stdQPTable[i] = QP_store[i]>>1;
1086 }
0426af31 1087 QP_store= c->stdQPTable;
bb270c08
DB
1088 QPStride= absQPStride;
1089 }
0426af31 1090
ec487e5d
MN
1091if(0){
1092int x,y;
1093for(y=0; y<mbHeight; y++){
bb270c08
DB
1094 for(x=0; x<mbWidth; x++){
1095 printf("%2d ", QP_store[x + y*QPStride]);
1096 }
1097 printf("\n");
ec487e5d 1098}
bb270c08 1099 printf("\n");
ec487e5d 1100}
51e19dcc 1101
bb270c08
DB
1102 if((pict_type&7)!=3)
1103 {
1104 if (QPStride >= 0) {
1105 int i;
1106 const int count= mbHeight * QPStride;
1107 for(i=0; i<(count>>2); i++){
1108 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1109 }
1110 for(i<<=2; i<count; i++){
1111 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1112 }
1113 } else {
1114 int i,j;
1115 for(i=0; i<mbHeight; i++) {
1116 for(j=0; j<absQPStride; j++) {
1117 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1118 }
1119 }
1120 }
1121 }
1122
1123 if(verbose>2)
1124 {
1125 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1126 }
1127
1128 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1129 width, height, QP_store, QPStride, 0, mode, c);
1130
1131 width = (width )>>c->hChromaSubSample;
1132 height = (height)>>c->vChromaSubSample;
1133
1134 if(mode->chromMode)
1135 {
1136 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1137 width, height, QP_store, QPStride, 1, mode, c);
1138 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1139 width, height, QP_store, QPStride, 2, mode, c);
1140 }
1141 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1142 {
1143 linecpy(dst[1], src[1], height, srcStride[1]);
1144 linecpy(dst[2], src[2], height, srcStride[2]);
1145 }
1146 else
1147 {
1148 int y;
1149 for(y=0; y<height; y++)
1150 {
1151 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1152 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1153 }
1154 }
911879d1
MN
1155}
1156