Use ABS macro from libavutil.
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
117e45b0
MN
50* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 52E = Exact implementation
acced553 53e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
6ab6c7c3 77#include "avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
dda87e9f
PL
82#ifdef HAVE_MALLOC_H
83#include <malloc.h>
84#endif
3057fa66 85//#undef HAVE_MMX2
13e00528 86//#define HAVE_3DNOW
3057fa66 87//#undef HAVE_MMX
cc9b0679 88//#undef ARCH_X86
7f16f6e6 89//#define DEBUG_BRIGHTNESS
bba9b16c 90#ifdef USE_FASTMEMCPY
f4bd289a 91#include "libvo/fastmemcpy.h"
70d4f2da 92#endif
13e00528 93#include "postprocess.h"
c41d972d 94#include "postprocess_internal.h"
bba9b16c
MN
95
96#include "mangle.h" //FIXME should be supressed
3057fa66 97
a7b2871c
RD
98#ifdef HAVE_ALTIVEC_H
99#include <altivec.h>
100#endif
101
e939e1c3
A
102#define MIN(a,b) ((a) > (b) ? (b) : (a))
103#define MAX(a,b) ((a) < (b) ? (b) : (a))
e939e1c3
A
104#define SIGN(a) ((a) > 0 ? 1 : -1)
105
911879d1
MN
106#define GET_MODE_BUFFER_SIZE 500
107#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
108#define BLOCK_SIZE 8
109#define TEMP_STRIDE 8
110//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 111
053dea12 112#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
113static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
114static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
115static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
116static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
117static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
118static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
119static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
120static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 121#endif
3057fa66 122
134eb1e5
MN
123static uint8_t clip_table[3*256];
124static uint8_t * const clip_tab= clip_table + 256;
125
4df8ca9d 126static const int verbose= 0;
45b4f285 127
3f1d4e96 128static const int attribute_used deringThreshold= 20;
3057fa66 129
9c9e467d 130
911879d1
MN
131static struct PPFilter filters[]=
132{
bb270c08
DB
133 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
134 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
135/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
136 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
137 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
138 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
139 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
140 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
141 {"dr", "dering", 1, 5, 6, DERING},
142 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
143 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
144 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
145 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
146 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
147 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
148 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
149 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
150 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
151 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
152};
153
7b49ce2e 154static const char *replaceTable[]=
911879d1 155{
bb270c08
DB
156 "default", "hdeblock:a,vdeblock:a,dering:a",
157 "de", "hdeblock:a,vdeblock:a,dering:a",
158 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
159 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
160 "ac", "ha:a:128:7,va:a,dering:a",
161 NULL //End Marker
911879d1
MN
162};
163
3057fa66 164
053dea12 165#if defined(ARCH_X86) || defined(ARCH_X86_64)
3057fa66
A
166static inline void prefetchnta(void *p)
167{
bb270c08
DB
168 asm volatile( "prefetchnta (%0)\n\t"
169 : : "r" (p)
170 );
3057fa66
A
171}
172
173static inline void prefetcht0(void *p)
174{
bb270c08
DB
175 asm volatile( "prefetcht0 (%0)\n\t"
176 : : "r" (p)
177 );
3057fa66
A
178}
179
180static inline void prefetcht1(void *p)
181{
bb270c08
DB
182 asm volatile( "prefetcht1 (%0)\n\t"
183 : : "r" (p)
184 );
3057fa66
A
185}
186
187static inline void prefetcht2(void *p)
188{
bb270c08
DB
189 asm volatile( "prefetcht2 (%0)\n\t"
190 : : "r" (p)
191 );
3057fa66 192}
9a722af7 193#endif
3057fa66 194
cc9b0679 195// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 196
cf5ec61d
MN
197/**
198 * Check if the given 8x8 Block is mostly "flat"
199 */
b0ac780a 200static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 201{
bb270c08
DB
202 int numEq= 0;
203 int y;
204 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
205 const int dcThreshold= dcOffset*2 + 1;
206
207 for(y=0; y<BLOCK_SIZE; y++)
208 {
209 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
210 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
211 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
212 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
214 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
215 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
216 src+= stride;
217 }
218 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
219}
220
221/**
222 * Check if the middle 8x8 Block in the given 8x16 block is flat
223 */
224static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
225 int numEq= 0;
226 int y;
227 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
228 const int dcThreshold= dcOffset*2 + 1;
229
230 src+= stride*4; // src points to begin of the 8x8 Block
231 for(y=0; y<BLOCK_SIZE-1; y++)
232 {
233 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
240 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
241 src+= stride;
242 }
243 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
244}
245
b0ac780a 246static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 247{
bb270c08 248 int i;
cb482d25 249#if 1
bb270c08
DB
250 for(i=0; i<2; i++){
251 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
252 src += stride;
253 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
254 src += stride;
255 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
256 src += stride;
257 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
258 src += stride;
259 }
115329f1 260#else
bb270c08
DB
261 for(i=0; i<8; i++){
262 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
263 src += stride;
264 }
cb482d25 265#endif
bb270c08 266 return 1;
cb482d25 267}
cf5ec61d 268
cb482d25
MN
269static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
270{
271#if 1
272#if 1
bb270c08
DB
273 int x;
274 src+= stride*4;
275 for(x=0; x<BLOCK_SIZE; x+=4)
276 {
277 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
278 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
279 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
280 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
281 }
cb482d25 282#else
bb270c08
DB
283 int x;
284 src+= stride*3;
285 for(x=0; x<BLOCK_SIZE; x++)
286 {
287 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
288 }
cb482d25 289#endif
bb270c08 290 return 1;
cb482d25 291#else
bb270c08
DB
292 int x;
293 src+= stride*4;
294 for(x=0; x<BLOCK_SIZE; x++)
295 {
296 int min=255;
297 int max=0;
298 int y;
299 for(y=0; y<8; y++){
300 int v= src[x + y*stride];
301 if(v>max) max=v;
302 if(v<min) min=v;
303 }
304 if(max-min > 2*QP) return 0;
305 }
306 return 1;
cb482d25
MN
307#endif
308}
309
b0ac780a 310static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
311 if( isHorizDC_C(src, stride, c) ){
312 if( isHorizMinMaxOk_C(src, stride, c->QP) )
313 return 1;
314 else
315 return 0;
316 }else{
317 return 2;
318 }
b0ac780a
MN
319}
320
cb482d25 321static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
322 if( isVertDC_C(src, stride, c) ){
323 if( isVertMinMaxOk_C(src, stride, c->QP) )
324 return 1;
325 else
326 return 0;
327 }else{
328 return 2;
329 }
cf5ec61d
MN
330}
331
b0ac780a 332static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 333{
bb270c08
DB
334 int y;
335 for(y=0; y<BLOCK_SIZE; y++)
336 {
337 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
338
339 if(ABS(middleEnergy) < 8*c->QP)
340 {
341 const int q=(dst[3] - dst[4])/2;
342 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
343 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
344
345 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
346 d= MAX(d, 0);
347
348 d= (5*d + 32) >> 6;
349 d*= SIGN(-middleEnergy);
350
351 if(q>0)
352 {
353 d= d<0 ? 0 : d;
354 d= d>q ? q : d;
355 }
356 else
357 {
358 d= d>0 ? 0 : d;
359 d= d<q ? q : d;
360 }
361
362 dst[3]-= d;
363 dst[4]+= d;
364 }
365 dst+= stride;
366 }
cf5ec61d
MN
367}
368
369/**
370 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
371 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
372 */
b0ac780a 373static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 374{
bb270c08
DB
375 int y;
376 for(y=0; y<BLOCK_SIZE; y++)
377 {
378 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
379 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
380
381 int sums[10];
382 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
383 sums[1] = sums[0] - first + dst[3];
384 sums[2] = sums[1] - first + dst[4];
385 sums[3] = sums[2] - first + dst[5];
386 sums[4] = sums[3] - first + dst[6];
387 sums[5] = sums[4] - dst[0] + dst[7];
388 sums[6] = sums[5] - dst[1] + last;
389 sums[7] = sums[6] - dst[2] + last;
390 sums[8] = sums[7] - dst[3] + last;
391 sums[9] = sums[8] - dst[4] + last;
392
393 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
394 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
395 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
396 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
397 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
398 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
399 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
400 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
401
402 dst+= stride;
403 }
cf5ec61d
MN
404}
405
4e4dcbc5 406/**
cc9b0679
MN
407 * Experimental Filter 1 (Horizontal)
408 * will not damage linear gradients
409 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
410 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
411 * MMX2 version does correct clipping C version doesnt
412 * not identical with the vertical one
4e4dcbc5 413 */
cc9b0679
MN
414static inline void horizX1Filter(uint8_t *src, int stride, int QP)
415{
bb270c08
DB
416 int y;
417 static uint64_t *lut= NULL;
418 if(lut==NULL)
419 {
420 int i;
6ab6c7c3 421 lut = av_malloc(256*8);
bb270c08
DB
422 for(i=0; i<256; i++)
423 {
424 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 425/*
cc9b0679 426//Simulate 112242211 9-Tap filter
bb270c08
DB
427 uint64_t a= (v/16) & 0xFF;
428 uint64_t b= (v/8) & 0xFF;
429 uint64_t c= (v/4) & 0xFF;
430 uint64_t d= (3*v/8) & 0xFF;
117e45b0 431*/
cc9b0679 432//Simulate piecewise linear interpolation
bb270c08
DB
433 uint64_t a= (v/16) & 0xFF;
434 uint64_t b= (v*3/16) & 0xFF;
435 uint64_t c= (v*5/16) & 0xFF;
436 uint64_t d= (7*v/16) & 0xFF;
437 uint64_t A= (0x100 - a)&0xFF;
438 uint64_t B= (0x100 - b)&0xFF;
439 uint64_t C= (0x100 - c)&0xFF;
440 uint64_t D= (0x100 - c)&0xFF;
441
442 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
443 (D<<24) | (C<<16) | (B<<8) | (A);
444 //lut[i] = (v<<32) | (v<<24);
445 }
446 }
447
448 for(y=0; y<BLOCK_SIZE; y++)
449 {
450 int a= src[1] - src[2];
451 int b= src[3] - src[4];
452 int c= src[5] - src[6];
453
454 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
455
456 if(d < QP)
457 {
458 int v = d * SIGN(-b);
459
460 src[1] +=v/8;
461 src[2] +=v/4;
462 src[3] +=3*v/8;
463 src[4] -=3*v/8;
464 src[5] -=v/4;
465 src[6] -=v/8;
466
467 }
468 src+=stride;
469 }
cc9b0679
MN
470}
471
12eebd26
MN
472/**
473 * accurate deblock filter
474 */
792a5a7c 475static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
bb270c08
DB
476 int y;
477 const int QP= c->QP;
478 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
479 const int dcThreshold= dcOffset*2 + 1;
12eebd26 480//START_TIMER
bb270c08
DB
481 src+= step*4; // src points to begin of the 8x8 Block
482 for(y=0; y<8; y++){
483 int numEq= 0;
484
485 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
486 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
487 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
488 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
489 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
490 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
491 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
492 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
493 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
494 if(numEq > c->ppMode.flatnessThreshold){
495 int min, max, x;
496
497 if(src[0] > src[step]){
498 max= src[0];
499 min= src[step];
500 }else{
501 max= src[step];
502 min= src[0];
503 }
504 for(x=2; x<8; x+=2){
505 if(src[x*step] > src[(x+1)*step]){
506 if(src[x *step] > max) max= src[ x *step];
507 if(src[(x+1)*step] < min) min= src[(x+1)*step];
508 }else{
509 if(src[(x+1)*step] > max) max= src[(x+1)*step];
510 if(src[ x *step] < min) min= src[ x *step];
511 }
512 }
513 if(max-min < 2*QP){
514 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
515 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
516
517 int sums[10];
518 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
519 sums[1] = sums[0] - first + src[3*step];
520 sums[2] = sums[1] - first + src[4*step];
521 sums[3] = sums[2] - first + src[5*step];
522 sums[4] = sums[3] - first + src[6*step];
523 sums[5] = sums[4] - src[0*step] + src[7*step];
524 sums[6] = sums[5] - src[1*step] + last;
525 sums[7] = sums[6] - src[2*step] + last;
526 sums[8] = sums[7] - src[3*step] + last;
527 sums[9] = sums[8] - src[4*step] + last;
528
529 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
530 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
531 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
532 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
533 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
534 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
535 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
536 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
537 }
538 }else{
539 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
540
541 if(ABS(middleEnergy) < 8*QP)
542 {
543 const int q=(src[3*step] - src[4*step])/2;
544 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
545 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
546
547 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
548 d= MAX(d, 0);
549
550 d= (5*d + 32) >> 6;
551 d*= SIGN(-middleEnergy);
552
553 if(q>0)
554 {
555 d= d<0 ? 0 : d;
556 d= d>q ? q : d;
557 }
558 else
559 {
560 d= d>0 ? 0 : d;
561 d= d<q ? q : d;
562 }
563
564 src[3*step]-= d;
565 src[4*step]+= d;
566 }
567 }
568
569 src += stride;
570 }
12eebd26
MN
571/*if(step==16){
572 STOP_TIMER("step16")
573}else{
574 STOP_TIMER("stepX")
575}*/
576}
cc9b0679 577
e89952aa 578//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 579//Plain C versions
e89952aa
MN
580#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
581#define COMPILE_C
582#endif
583
b0ac780a
MN
584#ifdef ARCH_POWERPC
585#ifdef HAVE_ALTIVEC
586#define COMPILE_ALTIVEC
b0ac780a
MN
587#endif //HAVE_ALTIVEC
588#endif //ARCH_POWERPC
589
053dea12 590#if defined(ARCH_X86) || defined(ARCH_X86_64)
e89952aa
MN
591
592#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
593#define COMPILE_MMX
594#endif
595
596#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
597#define COMPILE_MMX2
598#endif
599
600#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
601#define COMPILE_3DNOW
602#endif
9c9e467d 603#endif //ARCH_X86
e89952aa
MN
604
605#undef HAVE_MMX
606#undef HAVE_MMX2
607#undef HAVE_3DNOW
b0ac780a 608#undef HAVE_ALTIVEC
e89952aa
MN
609
610#ifdef COMPILE_C
cc9b0679
MN
611#undef HAVE_MMX
612#undef HAVE_MMX2
613#undef HAVE_3DNOW
cc9b0679
MN
614#define RENAME(a) a ## _C
615#include "postprocess_template.c"
e89952aa 616#endif
cc9b0679 617
b0ac780a
MN
618#ifdef ARCH_POWERPC
619#ifdef COMPILE_ALTIVEC
620#undef RENAME
621#define HAVE_ALTIVEC
622#define RENAME(a) a ## _altivec
623#include "postprocess_altivec_template.c"
624#include "postprocess_template.c"
625#endif
626#endif //ARCH_POWERPC
627
cc9b0679 628//MMX versions
e89952aa 629#ifdef COMPILE_MMX
cc9b0679
MN
630#undef RENAME
631#define HAVE_MMX
632#undef HAVE_MMX2
633#undef HAVE_3DNOW
cc9b0679
MN
634#define RENAME(a) a ## _MMX
635#include "postprocess_template.c"
e89952aa 636#endif
cc9b0679
MN
637
638//MMX2 versions
e89952aa 639#ifdef COMPILE_MMX2
cc9b0679
MN
640#undef RENAME
641#define HAVE_MMX
642#define HAVE_MMX2
643#undef HAVE_3DNOW
cc9b0679
MN
644#define RENAME(a) a ## _MMX2
645#include "postprocess_template.c"
e89952aa 646#endif
cc9b0679
MN
647
648//3DNOW versions
e89952aa 649#ifdef COMPILE_3DNOW
cc9b0679
MN
650#undef RENAME
651#define HAVE_MMX
652#undef HAVE_MMX2
653#define HAVE_3DNOW
cc9b0679
MN
654#define RENAME(a) a ## _3DNow
655#include "postprocess_template.c"
e89952aa 656#endif
cc9b0679
MN
657
658// minor note: the HAVE_xyz is messed up after that line so dont use it
659
660static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 661 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 662{
bb270c08
DB
663 PPContext *c= (PPContext *)vc;
664 PPMode *ppMode= (PPMode *)vm;
665 c->ppMode= *ppMode; //FIXME
9c9e467d 666
bb270c08
DB
667 // useing ifs here as they are faster than function pointers allthough the
668 // difference wouldnt be messureable here but its much better because
669 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 670#ifdef RUNTIME_CPUDETECT
053dea12 671#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
672 // ordered per speed fasterst first
673 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
674 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
676 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
677 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
678 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
679 else
680 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 681#else
b0ac780a
MN
682#ifdef ARCH_POWERPC
683#ifdef HAVE_ALTIVEC
71487254 684 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
bb270c08 685 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
686 else
687#endif
688#endif
bb270c08 689 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 690#endif
e89952aa
MN
691#else //RUNTIME_CPUDETECT
692#ifdef HAVE_MMX2
bb270c08 693 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 694#elif defined (HAVE_3DNOW)
bb270c08 695 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 696#elif defined (HAVE_MMX)
bb270c08 697 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 698#elif defined (HAVE_ALTIVEC)
bb270c08 699 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 700#else
bb270c08 701 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
702#endif
703#endif //!RUNTIME_CPUDETECT
117e45b0
MN
704}
705
cc9b0679 706//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 707// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 708
911879d1 709/* -pp Command line Help
911879d1 710*/
4407a3c4 711char *pp_help=
bf69c4e5 712"Available postprocessing filters:\n"
bb270c08
DB
713"Filters Options\n"
714"short long name short long option Description\n"
715"* * a autoq CPU power dependent enabler\n"
716" c chrom chrominance filtering enabled\n"
717" y nochrom chrominance filtering disabled\n"
718" n noluma luma filtering disabled\n"
719"hb hdeblock (2 threshold) horizontal deblocking filter\n"
720" 1. difference factor: default=32, higher -> more deblocking\n"
721" 2. flatness threshold: default=39, lower -> more deblocking\n"
722" the h & v deblocking filters share these\n"
723" so you can't set different thresholds for h / v\n"
724"vb vdeblock (2 threshold) vertical deblocking filter\n"
725"ha hadeblock (2 threshold) horizontal deblocking filter\n"
726"va vadeblock (2 threshold) vertical deblocking filter\n"
727"h1 x1hdeblock experimental h deblock filter 1\n"
728"v1 x1vdeblock experimental v deblock filter 1\n"
729"dr dering deringing filter\n"
730"al autolevels automatic brightness / contrast\n"
731" f fullyrange stretch luminance to (0..255)\n"
732"lb linblenddeint linear blend deinterlacer\n"
733"li linipoldeint linear interpolating deinterlace\n"
734"ci cubicipoldeint cubic interpolating deinterlacer\n"
735"md mediandeint median deinterlacer\n"
736"fd ffmpegdeint ffmpeg deinterlacer\n"
737"l5 lowpass5 FIR lowpass deinterlacer\n"
738"de default hb:a,vb:a,dr:a\n"
739"fa fast h1:a,v1:a,dr:a\n"
740"ac ha:a:128:7,va:a,dr:a\n"
741"tn tmpnoise (3 threshold) temporal noise reducer\n"
742" 1. <= 2. <= 3. larger -> stronger filtering\n"
743"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
744"Usage:\n"
745"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
746"long form example:\n"
bb270c08 747"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 748"short form example:\n"
bb270c08 749"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
750"more examples:\n"
751"tn:64:128:256\n"
14b005d0 752"\n"
4b001a13 753;
911879d1 754
c41d972d 755pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1 756{
bb270c08
DB
757 char temp[GET_MODE_BUFFER_SIZE];
758 char *p= temp;
7b49ce2e
SH
759 const char *filterDelimiters= ",/";
760 const char *optionDelimiters= ":";
bb270c08
DB
761 struct PPMode *ppMode;
762 char *filterToken;
763
6ab6c7c3 764 ppMode= av_malloc(sizeof(PPMode));
bb270c08
DB
765
766 ppMode->lumMode= 0;
767 ppMode->chromMode= 0;
768 ppMode->maxTmpNoise[0]= 700;
769 ppMode->maxTmpNoise[1]= 1500;
770 ppMode->maxTmpNoise[2]= 3000;
771 ppMode->maxAllowedY= 234;
772 ppMode->minAllowedY= 16;
773 ppMode->baseDcDiff= 256/8;
774 ppMode->flatnessThreshold= 56-16-1;
775 ppMode->maxClippedThreshold= 0.01;
776 ppMode->error=0;
777
778 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
779
780 if(verbose>1) printf("pp: %s\n", name);
781
782 for(;;){
783 char *filterName;
784 int q= 1000000; //PP_QUALITY_MAX;
785 int chrom=-1;
786 int luma=-1;
787 char *option;
788 char *options[OPTIONS_ARRAY_SIZE];
789 int i;
790 int filterNameOk=0;
791 int numOfUnknownOptions=0;
792 int enable=1; //does the user want us to enabled or disabled the filter
793
794 filterToken= strtok(p, filterDelimiters);
795 if(filterToken == NULL) break;
796 p+= strlen(filterToken) + 1; // p points to next filterToken
797 filterName= strtok(filterToken, optionDelimiters);
798 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
799
800 if(*filterName == '-')
801 {
802 enable=0;
803 filterName++;
804 }
805
806 for(;;){ //for all options
807 option= strtok(NULL, optionDelimiters);
808 if(option == NULL) break;
809
810 if(verbose>1) printf("pp: option: %s\n", option);
811 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
812 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
813 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
814 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
815 else
816 {
817 options[numOfUnknownOptions] = option;
818 numOfUnknownOptions++;
819 }
820 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
821 }
822 options[numOfUnknownOptions] = NULL;
823
824 /* replace stuff from the replace Table */
825 for(i=0; replaceTable[2*i]!=NULL; i++)
826 {
827 if(!strcmp(replaceTable[2*i], filterName))
828 {
829 int newlen= strlen(replaceTable[2*i + 1]);
830 int plen;
831 int spaceLeft;
832
833 if(p==NULL) p= temp, *p=0; //last filter
834 else p--, *p=','; //not last filter
835
836 plen= strlen(p);
837 spaceLeft= p - temp + plen;
838 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
839 {
840 ppMode->error++;
841 break;
842 }
843 memmove(p + newlen, p, plen+1);
844 memcpy(p, replaceTable[2*i + 1], newlen);
845 filterNameOk=1;
846 }
847 }
848
849 for(i=0; filters[i].shortName!=NULL; i++)
850 {
851// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
852 if( !strcmp(filters[i].longName, filterName)
853 || !strcmp(filters[i].shortName, filterName))
854 {
855 ppMode->lumMode &= ~filters[i].mask;
856 ppMode->chromMode &= ~filters[i].mask;
857
858 filterNameOk=1;
859 if(!enable) break; // user wants to disable it
860
861 if(q >= filters[i].minLumQuality && luma)
862 ppMode->lumMode|= filters[i].mask;
863 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
864 if(q >= filters[i].minChromQuality)
865 ppMode->chromMode|= filters[i].mask;
866
867 if(filters[i].mask == LEVEL_FIX)
868 {
869 int o;
870 ppMode->minAllowedY= 16;
871 ppMode->maxAllowedY= 234;
872 for(o=0; options[o]!=NULL; o++)
873 {
874 if( !strcmp(options[o],"fullyrange")
875 ||!strcmp(options[o],"f"))
876 {
877 ppMode->minAllowedY= 0;
878 ppMode->maxAllowedY= 255;
879 numOfUnknownOptions--;
880 }
881 }
882 }
883 else if(filters[i].mask == TEMP_NOISE_FILTER)
884 {
885 int o;
886 int numOfNoises=0;
887
888 for(o=0; options[o]!=NULL; o++)
889 {
890 char *tail;
891 ppMode->maxTmpNoise[numOfNoises]=
892 strtol(options[o], &tail, 0);
893 if(tail!=options[o])
894 {
895 numOfNoises++;
896 numOfUnknownOptions--;
897 if(numOfNoises >= 3) break;
898 }
899 }
900 }
901 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
902 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
903 {
904 int o;
905
906 for(o=0; options[o]!=NULL && o<2; o++)
907 {
908 char *tail;
909 int val= strtol(options[o], &tail, 0);
910 if(tail==options[o]) break;
911
912 numOfUnknownOptions--;
913 if(o==0) ppMode->baseDcDiff= val;
914 else ppMode->flatnessThreshold= val;
915 }
916 }
917 else if(filters[i].mask == FORCE_QUANT)
918 {
919 int o;
920 ppMode->forcedQuant= 15;
921
922 for(o=0; options[o]!=NULL && o<1; o++)
923 {
924 char *tail;
925 int val= strtol(options[o], &tail, 0);
926 if(tail==options[o]) break;
927
928 numOfUnknownOptions--;
929 ppMode->forcedQuant= val;
930 }
931 }
932 }
933 }
934 if(!filterNameOk) ppMode->error++;
935 ppMode->error += numOfUnknownOptions;
936 }
937
938 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
939 if(ppMode->error)
940 {
941 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
6ab6c7c3 942 av_free(ppMode);
bb270c08
DB
943 return NULL;
944 }
945 return ppMode;
911879d1
MN
946}
947
c41d972d 948void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 949 av_free(mode);
c41d972d
MN
950}
951
88c0bc7e 952static void reallocAlign(void **p, int alignment, int size){
4851f2ad 953 av_free(*p);
6ab6c7c3 954 *p= av_mallocz(size);
88c0bc7e
MN
955}
956
0426af31 957static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
bb270c08
DB
958 int mbWidth = (width+15)>>4;
959 int mbHeight= (height+15)>>4;
960 int i;
961
962 c->stride= stride;
963 c->qpStride= qpStride;
964
965 reallocAlign((void **)&c->tempDst, 8, stride*24);
966 reallocAlign((void **)&c->tempSrc, 8, stride*24);
967 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
968 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
969 for(i=0; i<256; i++)
970 c->yHistogram[i]= width*height/64*15/256;
971
972 for(i=0; i<3; i++)
973 {
974 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
975 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
976 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
977 }
978
979 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
980 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
981 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
982 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
983}
984
4cfbf61b 985static void global_init(void){
bb270c08
DB
986 int i;
987 memset(clip_table, 0, 256);
988 for(i=256; i<512; i++)
989 clip_table[i]= i;
990 memset(clip_table+512, 0, 256);
134eb1e5
MN
991}
992
88c0bc7e 993pp_context_t *pp_get_context(int width, int height, int cpuCaps){
6ab6c7c3 994 PPContext *c= av_malloc(sizeof(PPContext));
bb270c08
DB
995 int stride= (width+15)&(~15); //assumed / will realloc if needed
996 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 997
bb270c08 998 global_init();
134eb1e5 999
bb270c08
DB
1000 memset(c, 0, sizeof(PPContext));
1001 c->cpuCaps= cpuCaps;
1002 if(cpuCaps&PP_FORMAT){
1003 c->hChromaSubSample= cpuCaps&0x3;
1004 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1005 }else{
1006 c->hChromaSubSample= 1;
1007 c->vChromaSubSample= 1;
1008 }
88c0bc7e 1009
bb270c08 1010 reallocBuffers(c, width, height, stride, qpStride);
115329f1 1011
bb270c08 1012 c->frameNum=-1;
45b4f285 1013
bb270c08 1014 return c;
45b4f285
MN
1015}
1016
9cb54f43 1017void pp_free_context(void *vc){
bb270c08
DB
1018 PPContext *c = (PPContext*)vc;
1019 int i;
115329f1 1020
6ab6c7c3
LB
1021 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1022 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
115329f1 1023
6ab6c7c3
LB
1024 av_free(c->tempBlocks);
1025 av_free(c->yHistogram);
1026 av_free(c->tempDst);
1027 av_free(c->tempSrc);
1028 av_free(c->deintTemp);
1029 av_free(c->stdQPTable);
1030 av_free(c->nonBQPTable);
1031 av_free(c->forcedQPTable);
115329f1 1032
bb270c08 1033 memset(c, 0, sizeof(PPContext));
88c0bc7e 1034
6ab6c7c3 1035 av_free(c);
9c9e467d
MN
1036}
1037
9cb54f43 1038void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1039 uint8_t * dst[3], int dstStride[3],
ec487e5d 1040 int width, int height,
9c9e467d 1041 QP_STORE_T *QP_store, int QPStride,
bb270c08 1042 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1043{
bb270c08
DB
1044 int mbWidth = (width+15)>>4;
1045 int mbHeight= (height+15)>>4;
1046 PPMode *mode = (PPMode*)vm;
1047 PPContext *c = (PPContext*)vc;
1048 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1049 int absQPStride = ABS(QPStride);
1050
1051 // c->stride and c->QPStride are always positive
1052 if(c->stride < minStride || c->qpStride < absQPStride)
1053 reallocBuffers(c, width, height,
1054 MAX(minStride, c->stride),
1055 MAX(c->qpStride, absQPStride));
1056
1057 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1058 {
1059 int i;
1060 QP_store= c->forcedQPTable;
1061 absQPStride = QPStride = 0;
1062 if(mode->lumMode & FORCE_QUANT)
1063 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1064 else
1065 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1066 }
0426af31
MN
1067//printf("pict_type:%d\n", pict_type);
1068
bb270c08
DB
1069 if(pict_type & PP_PICT_TYPE_QP2){
1070 int i;
1071 const int count= mbHeight * absQPStride;
1072 for(i=0; i<(count>>2); i++){
1073 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1074 }
1075 for(i<<=2; i<count; i++){
1076 c->stdQPTable[i] = QP_store[i]>>1;
1077 }
0426af31 1078 QP_store= c->stdQPTable;
bb270c08
DB
1079 QPStride= absQPStride;
1080 }
0426af31 1081
ec487e5d
MN
1082if(0){
1083int x,y;
1084for(y=0; y<mbHeight; y++){
bb270c08
DB
1085 for(x=0; x<mbWidth; x++){
1086 printf("%2d ", QP_store[x + y*QPStride]);
1087 }
1088 printf("\n");
ec487e5d 1089}
bb270c08 1090 printf("\n");
ec487e5d 1091}
51e19dcc 1092
bb270c08
DB
1093 if((pict_type&7)!=3)
1094 {
1095 if (QPStride >= 0) {
1096 int i;
1097 const int count= mbHeight * QPStride;
1098 for(i=0; i<(count>>2); i++){
1099 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1100 }
1101 for(i<<=2; i<count; i++){
1102 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1103 }
1104 } else {
1105 int i,j;
1106 for(i=0; i<mbHeight; i++) {
1107 for(j=0; j<absQPStride; j++) {
1108 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1109 }
1110 }
1111 }
1112 }
1113
1114 if(verbose>2)
1115 {
1116 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1117 }
1118
1119 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1120 width, height, QP_store, QPStride, 0, mode, c);
1121
1122 width = (width )>>c->hChromaSubSample;
1123 height = (height)>>c->vChromaSubSample;
1124
1125 if(mode->chromMode)
1126 {
1127 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1128 width, height, QP_store, QPStride, 1, mode, c);
1129 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1130 width, height, QP_store, QPStride, 2, mode, c);
1131 }
1132 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1133 {
1134 linecpy(dst[1], src[1], height, srcStride[1]);
1135 linecpy(dst[2], src[2], height, srcStride[2]);
1136 }
1137 else
1138 {
1139 int y;
1140 for(y=0; y<height; y++)
1141 {
1142 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1143 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1144 }
1145 }
911879d1
MN
1146}
1147