Move sign macro to libavutil.
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
117e45b0
MN
50* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 52E = Exact implementation
acced553 53e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
6ab6c7c3 77#include "avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
dda87e9f
PL
82#ifdef HAVE_MALLOC_H
83#include <malloc.h>
84#endif
3057fa66 85//#undef HAVE_MMX2
13e00528 86//#define HAVE_3DNOW
3057fa66 87//#undef HAVE_MMX
cc9b0679 88//#undef ARCH_X86
7f16f6e6 89//#define DEBUG_BRIGHTNESS
bba9b16c 90#ifdef USE_FASTMEMCPY
f4bd289a 91#include "libvo/fastmemcpy.h"
70d4f2da 92#endif
13e00528 93#include "postprocess.h"
c41d972d 94#include "postprocess_internal.h"
bba9b16c
MN
95
96#include "mangle.h" //FIXME should be supressed
3057fa66 97
a7b2871c
RD
98#ifdef HAVE_ALTIVEC_H
99#include <altivec.h>
100#endif
101
e939e1c3
A
102#define MIN(a,b) ((a) > (b) ? (b) : (a))
103#define MAX(a,b) ((a) < (b) ? (b) : (a))
e939e1c3 104
911879d1
MN
105#define GET_MODE_BUFFER_SIZE 500
106#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
107#define BLOCK_SIZE 8
108#define TEMP_STRIDE 8
109//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 110
053dea12 111#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
112static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
113static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
114static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
115static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
116static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
117static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
118static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
119static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 120#endif
3057fa66 121
134eb1e5
MN
122static uint8_t clip_table[3*256];
123static uint8_t * const clip_tab= clip_table + 256;
124
4df8ca9d 125static const int verbose= 0;
45b4f285 126
3f1d4e96 127static const int attribute_used deringThreshold= 20;
3057fa66 128
9c9e467d 129
911879d1
MN
130static struct PPFilter filters[]=
131{
bb270c08
DB
132 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
133 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
134/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
135 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
136 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
137 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
138 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
139 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
140 {"dr", "dering", 1, 5, 6, DERING},
141 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
142 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
143 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
144 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
145 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
146 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
147 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
148 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
149 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
150 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
151};
152
7b49ce2e 153static const char *replaceTable[]=
911879d1 154{
bb270c08
DB
155 "default", "hdeblock:a,vdeblock:a,dering:a",
156 "de", "hdeblock:a,vdeblock:a,dering:a",
157 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
158 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
159 "ac", "ha:a:128:7,va:a,dering:a",
160 NULL //End Marker
911879d1
MN
161};
162
3057fa66 163
053dea12 164#if defined(ARCH_X86) || defined(ARCH_X86_64)
3057fa66
A
165static inline void prefetchnta(void *p)
166{
bb270c08
DB
167 asm volatile( "prefetchnta (%0)\n\t"
168 : : "r" (p)
169 );
3057fa66
A
170}
171
172static inline void prefetcht0(void *p)
173{
bb270c08
DB
174 asm volatile( "prefetcht0 (%0)\n\t"
175 : : "r" (p)
176 );
3057fa66
A
177}
178
179static inline void prefetcht1(void *p)
180{
bb270c08
DB
181 asm volatile( "prefetcht1 (%0)\n\t"
182 : : "r" (p)
183 );
3057fa66
A
184}
185
186static inline void prefetcht2(void *p)
187{
bb270c08
DB
188 asm volatile( "prefetcht2 (%0)\n\t"
189 : : "r" (p)
190 );
3057fa66 191}
9a722af7 192#endif
3057fa66 193
cc9b0679 194// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 195
cf5ec61d
MN
196/**
197 * Check if the given 8x8 Block is mostly "flat"
198 */
b0ac780a 199static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 200{
bb270c08
DB
201 int numEq= 0;
202 int y;
203 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
204 const int dcThreshold= dcOffset*2 + 1;
205
206 for(y=0; y<BLOCK_SIZE; y++)
207 {
208 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
209 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
210 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
211 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
212 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
214 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
215 src+= stride;
216 }
217 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
218}
219
220/**
221 * Check if the middle 8x8 Block in the given 8x16 block is flat
222 */
223static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
224 int numEq= 0;
225 int y;
226 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
227 const int dcThreshold= dcOffset*2 + 1;
228
229 src+= stride*4; // src points to begin of the 8x8 Block
230 for(y=0; y<BLOCK_SIZE-1; y++)
231 {
232 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
233 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
240 src+= stride;
241 }
242 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
243}
244
b0ac780a 245static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 246{
bb270c08 247 int i;
cb482d25 248#if 1
bb270c08
DB
249 for(i=0; i<2; i++){
250 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
251 src += stride;
252 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
253 src += stride;
254 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
255 src += stride;
256 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
257 src += stride;
258 }
115329f1 259#else
bb270c08
DB
260 for(i=0; i<8; i++){
261 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
262 src += stride;
263 }
cb482d25 264#endif
bb270c08 265 return 1;
cb482d25 266}
cf5ec61d 267
cb482d25
MN
268static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
269{
270#if 1
271#if 1
bb270c08
DB
272 int x;
273 src+= stride*4;
274 for(x=0; x<BLOCK_SIZE; x+=4)
275 {
276 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
277 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
278 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
279 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
280 }
cb482d25 281#else
bb270c08
DB
282 int x;
283 src+= stride*3;
284 for(x=0; x<BLOCK_SIZE; x++)
285 {
286 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
287 }
cb482d25 288#endif
bb270c08 289 return 1;
cb482d25 290#else
bb270c08
DB
291 int x;
292 src+= stride*4;
293 for(x=0; x<BLOCK_SIZE; x++)
294 {
295 int min=255;
296 int max=0;
297 int y;
298 for(y=0; y<8; y++){
299 int v= src[x + y*stride];
300 if(v>max) max=v;
301 if(v<min) min=v;
302 }
303 if(max-min > 2*QP) return 0;
304 }
305 return 1;
cb482d25
MN
306#endif
307}
308
b0ac780a 309static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
310 if( isHorizDC_C(src, stride, c) ){
311 if( isHorizMinMaxOk_C(src, stride, c->QP) )
312 return 1;
313 else
314 return 0;
315 }else{
316 return 2;
317 }
b0ac780a
MN
318}
319
cb482d25 320static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
321 if( isVertDC_C(src, stride, c) ){
322 if( isVertMinMaxOk_C(src, stride, c->QP) )
323 return 1;
324 else
325 return 0;
326 }else{
327 return 2;
328 }
cf5ec61d
MN
329}
330
b0ac780a 331static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 332{
bb270c08
DB
333 int y;
334 for(y=0; y<BLOCK_SIZE; y++)
335 {
336 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
337
338 if(ABS(middleEnergy) < 8*c->QP)
339 {
340 const int q=(dst[3] - dst[4])/2;
341 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
342 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
343
344 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
345 d= MAX(d, 0);
346
347 d= (5*d + 32) >> 6;
348 d*= SIGN(-middleEnergy);
349
350 if(q>0)
351 {
352 d= d<0 ? 0 : d;
353 d= d>q ? q : d;
354 }
355 else
356 {
357 d= d>0 ? 0 : d;
358 d= d<q ? q : d;
359 }
360
361 dst[3]-= d;
362 dst[4]+= d;
363 }
364 dst+= stride;
365 }
cf5ec61d
MN
366}
367
368/**
369 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
370 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
371 */
b0ac780a 372static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 373{
bb270c08
DB
374 int y;
375 for(y=0; y<BLOCK_SIZE; y++)
376 {
377 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
378 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
379
380 int sums[10];
381 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
382 sums[1] = sums[0] - first + dst[3];
383 sums[2] = sums[1] - first + dst[4];
384 sums[3] = sums[2] - first + dst[5];
385 sums[4] = sums[3] - first + dst[6];
386 sums[5] = sums[4] - dst[0] + dst[7];
387 sums[6] = sums[5] - dst[1] + last;
388 sums[7] = sums[6] - dst[2] + last;
389 sums[8] = sums[7] - dst[3] + last;
390 sums[9] = sums[8] - dst[4] + last;
391
392 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
393 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
394 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
395 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
396 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
397 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
398 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
399 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
400
401 dst+= stride;
402 }
cf5ec61d
MN
403}
404
4e4dcbc5 405/**
cc9b0679
MN
406 * Experimental Filter 1 (Horizontal)
407 * will not damage linear gradients
408 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
409 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
410 * MMX2 version does correct clipping C version doesnt
411 * not identical with the vertical one
4e4dcbc5 412 */
cc9b0679
MN
413static inline void horizX1Filter(uint8_t *src, int stride, int QP)
414{
bb270c08
DB
415 int y;
416 static uint64_t *lut= NULL;
417 if(lut==NULL)
418 {
419 int i;
6ab6c7c3 420 lut = av_malloc(256*8);
bb270c08
DB
421 for(i=0; i<256; i++)
422 {
423 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 424/*
cc9b0679 425//Simulate 112242211 9-Tap filter
bb270c08
DB
426 uint64_t a= (v/16) & 0xFF;
427 uint64_t b= (v/8) & 0xFF;
428 uint64_t c= (v/4) & 0xFF;
429 uint64_t d= (3*v/8) & 0xFF;
117e45b0 430*/
cc9b0679 431//Simulate piecewise linear interpolation
bb270c08
DB
432 uint64_t a= (v/16) & 0xFF;
433 uint64_t b= (v*3/16) & 0xFF;
434 uint64_t c= (v*5/16) & 0xFF;
435 uint64_t d= (7*v/16) & 0xFF;
436 uint64_t A= (0x100 - a)&0xFF;
437 uint64_t B= (0x100 - b)&0xFF;
438 uint64_t C= (0x100 - c)&0xFF;
439 uint64_t D= (0x100 - c)&0xFF;
440
441 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
442 (D<<24) | (C<<16) | (B<<8) | (A);
443 //lut[i] = (v<<32) | (v<<24);
444 }
445 }
446
447 for(y=0; y<BLOCK_SIZE; y++)
448 {
449 int a= src[1] - src[2];
450 int b= src[3] - src[4];
451 int c= src[5] - src[6];
452
453 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
454
455 if(d < QP)
456 {
457 int v = d * SIGN(-b);
458
459 src[1] +=v/8;
460 src[2] +=v/4;
461 src[3] +=3*v/8;
462 src[4] -=3*v/8;
463 src[5] -=v/4;
464 src[6] -=v/8;
465
466 }
467 src+=stride;
468 }
cc9b0679
MN
469}
470
12eebd26
MN
471/**
472 * accurate deblock filter
473 */
792a5a7c 474static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
bb270c08
DB
475 int y;
476 const int QP= c->QP;
477 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
478 const int dcThreshold= dcOffset*2 + 1;
12eebd26 479//START_TIMER
bb270c08
DB
480 src+= step*4; // src points to begin of the 8x8 Block
481 for(y=0; y<8; y++){
482 int numEq= 0;
483
484 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
485 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
486 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
487 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
488 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
489 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
490 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
491 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
492 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
493 if(numEq > c->ppMode.flatnessThreshold){
494 int min, max, x;
495
496 if(src[0] > src[step]){
497 max= src[0];
498 min= src[step];
499 }else{
500 max= src[step];
501 min= src[0];
502 }
503 for(x=2; x<8; x+=2){
504 if(src[x*step] > src[(x+1)*step]){
505 if(src[x *step] > max) max= src[ x *step];
506 if(src[(x+1)*step] < min) min= src[(x+1)*step];
507 }else{
508 if(src[(x+1)*step] > max) max= src[(x+1)*step];
509 if(src[ x *step] < min) min= src[ x *step];
510 }
511 }
512 if(max-min < 2*QP){
513 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
514 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
515
516 int sums[10];
517 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
518 sums[1] = sums[0] - first + src[3*step];
519 sums[2] = sums[1] - first + src[4*step];
520 sums[3] = sums[2] - first + src[5*step];
521 sums[4] = sums[3] - first + src[6*step];
522 sums[5] = sums[4] - src[0*step] + src[7*step];
523 sums[6] = sums[5] - src[1*step] + last;
524 sums[7] = sums[6] - src[2*step] + last;
525 sums[8] = sums[7] - src[3*step] + last;
526 sums[9] = sums[8] - src[4*step] + last;
527
528 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
529 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
530 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
531 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
532 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
533 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
534 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
535 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
536 }
537 }else{
538 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
539
540 if(ABS(middleEnergy) < 8*QP)
541 {
542 const int q=(src[3*step] - src[4*step])/2;
543 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
544 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
545
546 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
547 d= MAX(d, 0);
548
549 d= (5*d + 32) >> 6;
550 d*= SIGN(-middleEnergy);
551
552 if(q>0)
553 {
554 d= d<0 ? 0 : d;
555 d= d>q ? q : d;
556 }
557 else
558 {
559 d= d>0 ? 0 : d;
560 d= d<q ? q : d;
561 }
562
563 src[3*step]-= d;
564 src[4*step]+= d;
565 }
566 }
567
568 src += stride;
569 }
12eebd26
MN
570/*if(step==16){
571 STOP_TIMER("step16")
572}else{
573 STOP_TIMER("stepX")
574}*/
575}
cc9b0679 576
e89952aa 577//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 578//Plain C versions
e89952aa
MN
579#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
580#define COMPILE_C
581#endif
582
b0ac780a
MN
583#ifdef ARCH_POWERPC
584#ifdef HAVE_ALTIVEC
585#define COMPILE_ALTIVEC
b0ac780a
MN
586#endif //HAVE_ALTIVEC
587#endif //ARCH_POWERPC
588
053dea12 589#if defined(ARCH_X86) || defined(ARCH_X86_64)
e89952aa
MN
590
591#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
592#define COMPILE_MMX
593#endif
594
595#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
596#define COMPILE_MMX2
597#endif
598
599#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
600#define COMPILE_3DNOW
601#endif
9c9e467d 602#endif //ARCH_X86
e89952aa
MN
603
604#undef HAVE_MMX
605#undef HAVE_MMX2
606#undef HAVE_3DNOW
b0ac780a 607#undef HAVE_ALTIVEC
e89952aa
MN
608
609#ifdef COMPILE_C
cc9b0679
MN
610#undef HAVE_MMX
611#undef HAVE_MMX2
612#undef HAVE_3DNOW
cc9b0679
MN
613#define RENAME(a) a ## _C
614#include "postprocess_template.c"
e89952aa 615#endif
cc9b0679 616
b0ac780a
MN
617#ifdef ARCH_POWERPC
618#ifdef COMPILE_ALTIVEC
619#undef RENAME
620#define HAVE_ALTIVEC
621#define RENAME(a) a ## _altivec
622#include "postprocess_altivec_template.c"
623#include "postprocess_template.c"
624#endif
625#endif //ARCH_POWERPC
626
cc9b0679 627//MMX versions
e89952aa 628#ifdef COMPILE_MMX
cc9b0679
MN
629#undef RENAME
630#define HAVE_MMX
631#undef HAVE_MMX2
632#undef HAVE_3DNOW
cc9b0679
MN
633#define RENAME(a) a ## _MMX
634#include "postprocess_template.c"
e89952aa 635#endif
cc9b0679
MN
636
637//MMX2 versions
e89952aa 638#ifdef COMPILE_MMX2
cc9b0679
MN
639#undef RENAME
640#define HAVE_MMX
641#define HAVE_MMX2
642#undef HAVE_3DNOW
cc9b0679
MN
643#define RENAME(a) a ## _MMX2
644#include "postprocess_template.c"
e89952aa 645#endif
cc9b0679
MN
646
647//3DNOW versions
e89952aa 648#ifdef COMPILE_3DNOW
cc9b0679
MN
649#undef RENAME
650#define HAVE_MMX
651#undef HAVE_MMX2
652#define HAVE_3DNOW
cc9b0679
MN
653#define RENAME(a) a ## _3DNow
654#include "postprocess_template.c"
e89952aa 655#endif
cc9b0679
MN
656
657// minor note: the HAVE_xyz is messed up after that line so dont use it
658
659static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 660 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 661{
bb270c08
DB
662 PPContext *c= (PPContext *)vc;
663 PPMode *ppMode= (PPMode *)vm;
664 c->ppMode= *ppMode; //FIXME
9c9e467d 665
bb270c08
DB
666 // useing ifs here as they are faster than function pointers allthough the
667 // difference wouldnt be messureable here but its much better because
668 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 669#ifdef RUNTIME_CPUDETECT
053dea12 670#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
671 // ordered per speed fasterst first
672 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
673 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
675 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
676 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
677 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
678 else
679 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 680#else
b0ac780a
MN
681#ifdef ARCH_POWERPC
682#ifdef HAVE_ALTIVEC
71487254 683 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
bb270c08 684 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
685 else
686#endif
687#endif
bb270c08 688 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 689#endif
e89952aa
MN
690#else //RUNTIME_CPUDETECT
691#ifdef HAVE_MMX2
bb270c08 692 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 693#elif defined (HAVE_3DNOW)
bb270c08 694 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 695#elif defined (HAVE_MMX)
bb270c08 696 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 697#elif defined (HAVE_ALTIVEC)
bb270c08 698 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 699#else
bb270c08 700 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
701#endif
702#endif //!RUNTIME_CPUDETECT
117e45b0
MN
703}
704
cc9b0679 705//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 706// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 707
911879d1 708/* -pp Command line Help
911879d1 709*/
4407a3c4 710char *pp_help=
bf69c4e5 711"Available postprocessing filters:\n"
bb270c08
DB
712"Filters Options\n"
713"short long name short long option Description\n"
714"* * a autoq CPU power dependent enabler\n"
715" c chrom chrominance filtering enabled\n"
716" y nochrom chrominance filtering disabled\n"
717" n noluma luma filtering disabled\n"
718"hb hdeblock (2 threshold) horizontal deblocking filter\n"
719" 1. difference factor: default=32, higher -> more deblocking\n"
720" 2. flatness threshold: default=39, lower -> more deblocking\n"
721" the h & v deblocking filters share these\n"
722" so you can't set different thresholds for h / v\n"
723"vb vdeblock (2 threshold) vertical deblocking filter\n"
724"ha hadeblock (2 threshold) horizontal deblocking filter\n"
725"va vadeblock (2 threshold) vertical deblocking filter\n"
726"h1 x1hdeblock experimental h deblock filter 1\n"
727"v1 x1vdeblock experimental v deblock filter 1\n"
728"dr dering deringing filter\n"
729"al autolevels automatic brightness / contrast\n"
730" f fullyrange stretch luminance to (0..255)\n"
731"lb linblenddeint linear blend deinterlacer\n"
732"li linipoldeint linear interpolating deinterlace\n"
733"ci cubicipoldeint cubic interpolating deinterlacer\n"
734"md mediandeint median deinterlacer\n"
735"fd ffmpegdeint ffmpeg deinterlacer\n"
736"l5 lowpass5 FIR lowpass deinterlacer\n"
737"de default hb:a,vb:a,dr:a\n"
738"fa fast h1:a,v1:a,dr:a\n"
739"ac ha:a:128:7,va:a,dr:a\n"
740"tn tmpnoise (3 threshold) temporal noise reducer\n"
741" 1. <= 2. <= 3. larger -> stronger filtering\n"
742"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
743"Usage:\n"
744"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
745"long form example:\n"
bb270c08 746"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 747"short form example:\n"
bb270c08 748"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
749"more examples:\n"
750"tn:64:128:256\n"
14b005d0 751"\n"
4b001a13 752;
911879d1 753
c41d972d 754pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1 755{
bb270c08
DB
756 char temp[GET_MODE_BUFFER_SIZE];
757 char *p= temp;
7b49ce2e
SH
758 const char *filterDelimiters= ",/";
759 const char *optionDelimiters= ":";
bb270c08
DB
760 struct PPMode *ppMode;
761 char *filterToken;
762
6ab6c7c3 763 ppMode= av_malloc(sizeof(PPMode));
bb270c08
DB
764
765 ppMode->lumMode= 0;
766 ppMode->chromMode= 0;
767 ppMode->maxTmpNoise[0]= 700;
768 ppMode->maxTmpNoise[1]= 1500;
769 ppMode->maxTmpNoise[2]= 3000;
770 ppMode->maxAllowedY= 234;
771 ppMode->minAllowedY= 16;
772 ppMode->baseDcDiff= 256/8;
773 ppMode->flatnessThreshold= 56-16-1;
774 ppMode->maxClippedThreshold= 0.01;
775 ppMode->error=0;
776
777 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
778
779 if(verbose>1) printf("pp: %s\n", name);
780
781 for(;;){
782 char *filterName;
783 int q= 1000000; //PP_QUALITY_MAX;
784 int chrom=-1;
785 int luma=-1;
786 char *option;
787 char *options[OPTIONS_ARRAY_SIZE];
788 int i;
789 int filterNameOk=0;
790 int numOfUnknownOptions=0;
791 int enable=1; //does the user want us to enabled or disabled the filter
792
793 filterToken= strtok(p, filterDelimiters);
794 if(filterToken == NULL) break;
795 p+= strlen(filterToken) + 1; // p points to next filterToken
796 filterName= strtok(filterToken, optionDelimiters);
797 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
798
799 if(*filterName == '-')
800 {
801 enable=0;
802 filterName++;
803 }
804
805 for(;;){ //for all options
806 option= strtok(NULL, optionDelimiters);
807 if(option == NULL) break;
808
809 if(verbose>1) printf("pp: option: %s\n", option);
810 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
811 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
812 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
813 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
814 else
815 {
816 options[numOfUnknownOptions] = option;
817 numOfUnknownOptions++;
818 }
819 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
820 }
821 options[numOfUnknownOptions] = NULL;
822
823 /* replace stuff from the replace Table */
824 for(i=0; replaceTable[2*i]!=NULL; i++)
825 {
826 if(!strcmp(replaceTable[2*i], filterName))
827 {
828 int newlen= strlen(replaceTable[2*i + 1]);
829 int plen;
830 int spaceLeft;
831
832 if(p==NULL) p= temp, *p=0; //last filter
833 else p--, *p=','; //not last filter
834
835 plen= strlen(p);
836 spaceLeft= p - temp + plen;
837 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
838 {
839 ppMode->error++;
840 break;
841 }
842 memmove(p + newlen, p, plen+1);
843 memcpy(p, replaceTable[2*i + 1], newlen);
844 filterNameOk=1;
845 }
846 }
847
848 for(i=0; filters[i].shortName!=NULL; i++)
849 {
850// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
851 if( !strcmp(filters[i].longName, filterName)
852 || !strcmp(filters[i].shortName, filterName))
853 {
854 ppMode->lumMode &= ~filters[i].mask;
855 ppMode->chromMode &= ~filters[i].mask;
856
857 filterNameOk=1;
858 if(!enable) break; // user wants to disable it
859
860 if(q >= filters[i].minLumQuality && luma)
861 ppMode->lumMode|= filters[i].mask;
862 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
863 if(q >= filters[i].minChromQuality)
864 ppMode->chromMode|= filters[i].mask;
865
866 if(filters[i].mask == LEVEL_FIX)
867 {
868 int o;
869 ppMode->minAllowedY= 16;
870 ppMode->maxAllowedY= 234;
871 for(o=0; options[o]!=NULL; o++)
872 {
873 if( !strcmp(options[o],"fullyrange")
874 ||!strcmp(options[o],"f"))
875 {
876 ppMode->minAllowedY= 0;
877 ppMode->maxAllowedY= 255;
878 numOfUnknownOptions--;
879 }
880 }
881 }
882 else if(filters[i].mask == TEMP_NOISE_FILTER)
883 {
884 int o;
885 int numOfNoises=0;
886
887 for(o=0; options[o]!=NULL; o++)
888 {
889 char *tail;
890 ppMode->maxTmpNoise[numOfNoises]=
891 strtol(options[o], &tail, 0);
892 if(tail!=options[o])
893 {
894 numOfNoises++;
895 numOfUnknownOptions--;
896 if(numOfNoises >= 3) break;
897 }
898 }
899 }
900 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
901 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
902 {
903 int o;
904
905 for(o=0; options[o]!=NULL && o<2; o++)
906 {
907 char *tail;
908 int val= strtol(options[o], &tail, 0);
909 if(tail==options[o]) break;
910
911 numOfUnknownOptions--;
912 if(o==0) ppMode->baseDcDiff= val;
913 else ppMode->flatnessThreshold= val;
914 }
915 }
916 else if(filters[i].mask == FORCE_QUANT)
917 {
918 int o;
919 ppMode->forcedQuant= 15;
920
921 for(o=0; options[o]!=NULL && o<1; o++)
922 {
923 char *tail;
924 int val= strtol(options[o], &tail, 0);
925 if(tail==options[o]) break;
926
927 numOfUnknownOptions--;
928 ppMode->forcedQuant= val;
929 }
930 }
931 }
932 }
933 if(!filterNameOk) ppMode->error++;
934 ppMode->error += numOfUnknownOptions;
935 }
936
937 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
938 if(ppMode->error)
939 {
940 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
6ab6c7c3 941 av_free(ppMode);
bb270c08
DB
942 return NULL;
943 }
944 return ppMode;
911879d1
MN
945}
946
c41d972d 947void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 948 av_free(mode);
c41d972d
MN
949}
950
88c0bc7e 951static void reallocAlign(void **p, int alignment, int size){
4851f2ad 952 av_free(*p);
6ab6c7c3 953 *p= av_mallocz(size);
88c0bc7e
MN
954}
955
0426af31 956static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
bb270c08
DB
957 int mbWidth = (width+15)>>4;
958 int mbHeight= (height+15)>>4;
959 int i;
960
961 c->stride= stride;
962 c->qpStride= qpStride;
963
964 reallocAlign((void **)&c->tempDst, 8, stride*24);
965 reallocAlign((void **)&c->tempSrc, 8, stride*24);
966 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
967 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
968 for(i=0; i<256; i++)
969 c->yHistogram[i]= width*height/64*15/256;
970
971 for(i=0; i<3; i++)
972 {
973 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
974 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
975 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
976 }
977
978 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
979 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
980 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
981 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
982}
983
4cfbf61b 984static void global_init(void){
bb270c08
DB
985 int i;
986 memset(clip_table, 0, 256);
987 for(i=256; i<512; i++)
988 clip_table[i]= i;
989 memset(clip_table+512, 0, 256);
134eb1e5
MN
990}
991
88c0bc7e 992pp_context_t *pp_get_context(int width, int height, int cpuCaps){
6ab6c7c3 993 PPContext *c= av_malloc(sizeof(PPContext));
bb270c08
DB
994 int stride= (width+15)&(~15); //assumed / will realloc if needed
995 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 996
bb270c08 997 global_init();
134eb1e5 998
bb270c08
DB
999 memset(c, 0, sizeof(PPContext));
1000 c->cpuCaps= cpuCaps;
1001 if(cpuCaps&PP_FORMAT){
1002 c->hChromaSubSample= cpuCaps&0x3;
1003 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1004 }else{
1005 c->hChromaSubSample= 1;
1006 c->vChromaSubSample= 1;
1007 }
88c0bc7e 1008
bb270c08 1009 reallocBuffers(c, width, height, stride, qpStride);
115329f1 1010
bb270c08 1011 c->frameNum=-1;
45b4f285 1012
bb270c08 1013 return c;
45b4f285
MN
1014}
1015
9cb54f43 1016void pp_free_context(void *vc){
bb270c08
DB
1017 PPContext *c = (PPContext*)vc;
1018 int i;
115329f1 1019
6ab6c7c3
LB
1020 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1021 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
115329f1 1022
6ab6c7c3
LB
1023 av_free(c->tempBlocks);
1024 av_free(c->yHistogram);
1025 av_free(c->tempDst);
1026 av_free(c->tempSrc);
1027 av_free(c->deintTemp);
1028 av_free(c->stdQPTable);
1029 av_free(c->nonBQPTable);
1030 av_free(c->forcedQPTable);
115329f1 1031
bb270c08 1032 memset(c, 0, sizeof(PPContext));
88c0bc7e 1033
6ab6c7c3 1034 av_free(c);
9c9e467d
MN
1035}
1036
9cb54f43 1037void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1038 uint8_t * dst[3], int dstStride[3],
ec487e5d 1039 int width, int height,
9c9e467d 1040 QP_STORE_T *QP_store, int QPStride,
bb270c08 1041 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1042{
bb270c08
DB
1043 int mbWidth = (width+15)>>4;
1044 int mbHeight= (height+15)>>4;
1045 PPMode *mode = (PPMode*)vm;
1046 PPContext *c = (PPContext*)vc;
1047 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1048 int absQPStride = ABS(QPStride);
1049
1050 // c->stride and c->QPStride are always positive
1051 if(c->stride < minStride || c->qpStride < absQPStride)
1052 reallocBuffers(c, width, height,
1053 MAX(minStride, c->stride),
1054 MAX(c->qpStride, absQPStride));
1055
1056 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1057 {
1058 int i;
1059 QP_store= c->forcedQPTable;
1060 absQPStride = QPStride = 0;
1061 if(mode->lumMode & FORCE_QUANT)
1062 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1063 else
1064 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1065 }
0426af31
MN
1066//printf("pict_type:%d\n", pict_type);
1067
bb270c08
DB
1068 if(pict_type & PP_PICT_TYPE_QP2){
1069 int i;
1070 const int count= mbHeight * absQPStride;
1071 for(i=0; i<(count>>2); i++){
1072 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1073 }
1074 for(i<<=2; i<count; i++){
1075 c->stdQPTable[i] = QP_store[i]>>1;
1076 }
0426af31 1077 QP_store= c->stdQPTable;
bb270c08
DB
1078 QPStride= absQPStride;
1079 }
0426af31 1080
ec487e5d
MN
1081if(0){
1082int x,y;
1083for(y=0; y<mbHeight; y++){
bb270c08
DB
1084 for(x=0; x<mbWidth; x++){
1085 printf("%2d ", QP_store[x + y*QPStride]);
1086 }
1087 printf("\n");
ec487e5d 1088}
bb270c08 1089 printf("\n");
ec487e5d 1090}
51e19dcc 1091
bb270c08
DB
1092 if((pict_type&7)!=3)
1093 {
1094 if (QPStride >= 0) {
1095 int i;
1096 const int count= mbHeight * QPStride;
1097 for(i=0; i<(count>>2); i++){
1098 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1099 }
1100 for(i<<=2; i<count; i++){
1101 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1102 }
1103 } else {
1104 int i,j;
1105 for(i=0; i<mbHeight; i++) {
1106 for(j=0; j<absQPStride; j++) {
1107 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1108 }
1109 }
1110 }
1111 }
1112
1113 if(verbose>2)
1114 {
1115 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1116 }
1117
1118 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1119 width, height, QP_store, QPStride, 0, mode, c);
1120
1121 width = (width )>>c->hChromaSubSample;
1122 height = (height)>>c->vChromaSubSample;
1123
1124 if(mode->chromMode)
1125 {
1126 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1127 width, height, QP_store, QPStride, 1, mode, c);
1128 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1129 width, height, QP_store, QPStride, 2, mode, c);
1130 }
1131 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1132 {
1133 linecpy(dst[1], src[1], height, srcStride[1]);
1134 linecpy(dst[2], src[2], height, srcStride[2]);
1135 }
1136 else
1137 {
1138 int y;
1139 for(y=0; y<height; y++)
1140 {
1141 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1142 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1143 }
1144 }
911879d1
MN
1145}
1146