1000000l for lu_zero (commiting untested? code)
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
9858f773 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3057fa66 3
b0ac780a
MN
4 AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
3057fa66
A
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
5509bffa 18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3057fa66
A
19*/
20
b304569a
MN
21/**
22 * @file postprocess.c
23 * postprocessing.
24 */
115329f1 25
3057fa66 26/*
bb270c08
DB
27 C MMX MMX2 3DNow AltiVec
28isVertDC Ec Ec Ec
29isVertMinMaxOk Ec Ec Ec
30doVertLowPass E e e Ec
31doVertDefFilter Ec Ec e e Ec
32isHorizDC Ec Ec Ec
33isHorizMinMaxOk a E Ec
34doHorizLowPass E e e Ec
35doHorizDefFilter Ec Ec e e Ec
36do_a_deblock Ec E Ec E
37deRing E e e* Ecp
38Vertical RKAlgo1 E a a
39Horizontal RKAlgo1 a a
40Vertical X1# a E E
41Horizontal X1# a E E
42LinIpolDeinterlace e E E*
43CubicIpolDeinterlace a e e*
44LinBlendDeinterlace e E E*
45MedianDeinterlace# E Ec Ec
46TempDeNoiser# E e e Ec
d5a1a995 47
117e45b0
MN
48* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 50E = Exact implementation
acced553 51e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
52a = alternative / approximate impl
53c = checked against the other implementations (-vo md5)
b0ac780a 54p = partially optimized, still some work to do
3057fa66
A
55*/
56
57/*
58TODO:
3057fa66 59reduce the time wasted on the mem transfer
3057fa66 60unroll stuff if instructions depend too much on the prior one
3057fa66 61move YScale thing to the end instead of fixing QP
13e00528 62write a faster and higher quality deblocking filter :)
d5a1a995 63make the mainloop more flexible (variable number of blocks at once
bb270c08 64 (the if/else stuff per block is slowing things down)
9f45d04d 65compare the quality & speed of all filters
9f45d04d 66split this huge file
8405b3fd 67optimize c versions
117e45b0 68try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 69...
13e00528
A
70*/
71
36b1b0bc 72//Changelog: use the Subversion log
3057fa66 73
9858f773 74#include "config.h"
6ab6c7c3 75#include "avutil.h"
3057fa66
A
76#include <inttypes.h>
77#include <stdio.h>
d5a1a995 78#include <stdlib.h>
911879d1 79#include <string.h>
dda87e9f
PL
80#ifdef HAVE_MALLOC_H
81#include <malloc.h>
82#endif
3057fa66 83//#undef HAVE_MMX2
13e00528 84//#define HAVE_3DNOW
3057fa66 85//#undef HAVE_MMX
cc9b0679 86//#undef ARCH_X86
7f16f6e6 87//#define DEBUG_BRIGHTNESS
bba9b16c 88#ifdef USE_FASTMEMCPY
f4bd289a 89#include "libvo/fastmemcpy.h"
70d4f2da 90#endif
13e00528 91#include "postprocess.h"
c41d972d 92#include "postprocess_internal.h"
bba9b16c
MN
93
94#include "mangle.h" //FIXME should be supressed
3057fa66 95
a7b2871c
RD
96#ifdef HAVE_ALTIVEC_H
97#include <altivec.h>
98#endif
99
e939e1c3
A
100#define MIN(a,b) ((a) > (b) ? (b) : (a))
101#define MAX(a,b) ((a) < (b) ? (b) : (a))
102#define ABS(a) ((a) > 0 ? (a) : (-(a)))
103#define SIGN(a) ((a) > 0 ? 1 : -1)
104
911879d1
MN
105#define GET_MODE_BUFFER_SIZE 500
106#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
107#define BLOCK_SIZE 8
108#define TEMP_STRIDE 8
109//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 110
3f1d4e96
DB
111#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
112# define attribute_used __attribute__((used))
12eebd26 113# define always_inline __attribute__((always_inline)) inline
3f1d4e96
DB
114#else
115# define attribute_used
12eebd26 116# define always_inline inline
3f1d4e96
DB
117#endif
118
053dea12 119#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
120static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
121static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
122static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
123static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
124static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
125static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
126static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
127static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 128#endif
3057fa66 129
134eb1e5
MN
130static uint8_t clip_table[3*256];
131static uint8_t * const clip_tab= clip_table + 256;
132
4df8ca9d 133static const int verbose= 0;
45b4f285 134
3f1d4e96 135static const int attribute_used deringThreshold= 20;
3057fa66 136
9c9e467d 137
911879d1
MN
138static struct PPFilter filters[]=
139{
bb270c08
DB
140 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
141 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
142/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
143 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
144 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
145 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
146 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
147 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
148 {"dr", "dering", 1, 5, 6, DERING},
149 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
150 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
151 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
152 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
153 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
154 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
155 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
156 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
157 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
158 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
159};
160
7b49ce2e 161static const char *replaceTable[]=
911879d1 162{
bb270c08
DB
163 "default", "hdeblock:a,vdeblock:a,dering:a",
164 "de", "hdeblock:a,vdeblock:a,dering:a",
165 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
166 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
167 "ac", "ha:a:128:7,va:a,dering:a",
168 NULL //End Marker
911879d1
MN
169};
170
3057fa66 171
053dea12 172#if defined(ARCH_X86) || defined(ARCH_X86_64)
3057fa66
A
173static inline void prefetchnta(void *p)
174{
bb270c08
DB
175 asm volatile( "prefetchnta (%0)\n\t"
176 : : "r" (p)
177 );
3057fa66
A
178}
179
180static inline void prefetcht0(void *p)
181{
bb270c08
DB
182 asm volatile( "prefetcht0 (%0)\n\t"
183 : : "r" (p)
184 );
3057fa66
A
185}
186
187static inline void prefetcht1(void *p)
188{
bb270c08
DB
189 asm volatile( "prefetcht1 (%0)\n\t"
190 : : "r" (p)
191 );
3057fa66
A
192}
193
194static inline void prefetcht2(void *p)
195{
bb270c08
DB
196 asm volatile( "prefetcht2 (%0)\n\t"
197 : : "r" (p)
198 );
3057fa66 199}
9a722af7 200#endif
3057fa66 201
cc9b0679 202// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 203
cf5ec61d
MN
204/**
205 * Check if the given 8x8 Block is mostly "flat"
206 */
b0ac780a 207static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 208{
bb270c08
DB
209 int numEq= 0;
210 int y;
211 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
212 const int dcThreshold= dcOffset*2 + 1;
213
214 for(y=0; y<BLOCK_SIZE; y++)
215 {
216 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
217 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
218 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
219 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
223 src+= stride;
224 }
225 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
226}
227
228/**
229 * Check if the middle 8x8 Block in the given 8x16 block is flat
230 */
231static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
232 int numEq= 0;
233 int y;
234 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
235 const int dcThreshold= dcOffset*2 + 1;
236
237 src+= stride*4; // src points to begin of the 8x8 Block
238 for(y=0; y<BLOCK_SIZE-1; y++)
239 {
240 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
241 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
242 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
243 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
244 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
245 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
246 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
247 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
248 src+= stride;
249 }
250 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
251}
252
b0ac780a 253static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 254{
bb270c08 255 int i;
cb482d25 256#if 1
bb270c08
DB
257 for(i=0; i<2; i++){
258 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
259 src += stride;
260 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
261 src += stride;
262 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
263 src += stride;
264 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
265 src += stride;
266 }
115329f1 267#else
bb270c08
DB
268 for(i=0; i<8; i++){
269 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
270 src += stride;
271 }
cb482d25 272#endif
bb270c08 273 return 1;
cb482d25 274}
cf5ec61d 275
cb482d25
MN
276static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
277{
278#if 1
279#if 1
bb270c08
DB
280 int x;
281 src+= stride*4;
282 for(x=0; x<BLOCK_SIZE; x+=4)
283 {
284 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
285 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
286 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
287 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
288 }
cb482d25 289#else
bb270c08
DB
290 int x;
291 src+= stride*3;
292 for(x=0; x<BLOCK_SIZE; x++)
293 {
294 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
295 }
cb482d25 296#endif
bb270c08 297 return 1;
cb482d25 298#else
bb270c08
DB
299 int x;
300 src+= stride*4;
301 for(x=0; x<BLOCK_SIZE; x++)
302 {
303 int min=255;
304 int max=0;
305 int y;
306 for(y=0; y<8; y++){
307 int v= src[x + y*stride];
308 if(v>max) max=v;
309 if(v<min) min=v;
310 }
311 if(max-min > 2*QP) return 0;
312 }
313 return 1;
cb482d25
MN
314#endif
315}
316
b0ac780a 317static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
318 if( isHorizDC_C(src, stride, c) ){
319 if( isHorizMinMaxOk_C(src, stride, c->QP) )
320 return 1;
321 else
322 return 0;
323 }else{
324 return 2;
325 }
b0ac780a
MN
326}
327
cb482d25 328static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
329 if( isVertDC_C(src, stride, c) ){
330 if( isVertMinMaxOk_C(src, stride, c->QP) )
331 return 1;
332 else
333 return 0;
334 }else{
335 return 2;
336 }
cf5ec61d
MN
337}
338
b0ac780a 339static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 340{
bb270c08
DB
341 int y;
342 for(y=0; y<BLOCK_SIZE; y++)
343 {
344 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
345
346 if(ABS(middleEnergy) < 8*c->QP)
347 {
348 const int q=(dst[3] - dst[4])/2;
349 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
350 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
351
352 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
353 d= MAX(d, 0);
354
355 d= (5*d + 32) >> 6;
356 d*= SIGN(-middleEnergy);
357
358 if(q>0)
359 {
360 d= d<0 ? 0 : d;
361 d= d>q ? q : d;
362 }
363 else
364 {
365 d= d>0 ? 0 : d;
366 d= d<q ? q : d;
367 }
368
369 dst[3]-= d;
370 dst[4]+= d;
371 }
372 dst+= stride;
373 }
cf5ec61d
MN
374}
375
376/**
377 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
378 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
379 */
b0ac780a 380static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 381{
bb270c08
DB
382 int y;
383 for(y=0; y<BLOCK_SIZE; y++)
384 {
385 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
386 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
387
388 int sums[10];
389 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
390 sums[1] = sums[0] - first + dst[3];
391 sums[2] = sums[1] - first + dst[4];
392 sums[3] = sums[2] - first + dst[5];
393 sums[4] = sums[3] - first + dst[6];
394 sums[5] = sums[4] - dst[0] + dst[7];
395 sums[6] = sums[5] - dst[1] + last;
396 sums[7] = sums[6] - dst[2] + last;
397 sums[8] = sums[7] - dst[3] + last;
398 sums[9] = sums[8] - dst[4] + last;
399
400 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
401 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
402 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
403 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
404 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
405 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
406 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
407 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
408
409 dst+= stride;
410 }
cf5ec61d
MN
411}
412
4e4dcbc5 413/**
cc9b0679
MN
414 * Experimental Filter 1 (Horizontal)
415 * will not damage linear gradients
416 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
417 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
418 * MMX2 version does correct clipping C version doesnt
419 * not identical with the vertical one
4e4dcbc5 420 */
cc9b0679
MN
421static inline void horizX1Filter(uint8_t *src, int stride, int QP)
422{
bb270c08
DB
423 int y;
424 static uint64_t *lut= NULL;
425 if(lut==NULL)
426 {
427 int i;
6ab6c7c3 428 lut = av_malloc(256*8);
bb270c08
DB
429 for(i=0; i<256; i++)
430 {
431 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 432/*
cc9b0679 433//Simulate 112242211 9-Tap filter
bb270c08
DB
434 uint64_t a= (v/16) & 0xFF;
435 uint64_t b= (v/8) & 0xFF;
436 uint64_t c= (v/4) & 0xFF;
437 uint64_t d= (3*v/8) & 0xFF;
117e45b0 438*/
cc9b0679 439//Simulate piecewise linear interpolation
bb270c08
DB
440 uint64_t a= (v/16) & 0xFF;
441 uint64_t b= (v*3/16) & 0xFF;
442 uint64_t c= (v*5/16) & 0xFF;
443 uint64_t d= (7*v/16) & 0xFF;
444 uint64_t A= (0x100 - a)&0xFF;
445 uint64_t B= (0x100 - b)&0xFF;
446 uint64_t C= (0x100 - c)&0xFF;
447 uint64_t D= (0x100 - c)&0xFF;
448
449 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
450 (D<<24) | (C<<16) | (B<<8) | (A);
451 //lut[i] = (v<<32) | (v<<24);
452 }
453 }
454
455 for(y=0; y<BLOCK_SIZE; y++)
456 {
457 int a= src[1] - src[2];
458 int b= src[3] - src[4];
459 int c= src[5] - src[6];
460
461 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
462
463 if(d < QP)
464 {
465 int v = d * SIGN(-b);
466
467 src[1] +=v/8;
468 src[2] +=v/4;
469 src[3] +=3*v/8;
470 src[4] -=3*v/8;
471 src[5] -=v/4;
472 src[6] -=v/8;
473
474 }
475 src+=stride;
476 }
cc9b0679
MN
477}
478
12eebd26
MN
479/**
480 * accurate deblock filter
481 */
792a5a7c 482static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
bb270c08
DB
483 int y;
484 const int QP= c->QP;
485 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
486 const int dcThreshold= dcOffset*2 + 1;
12eebd26 487//START_TIMER
bb270c08
DB
488 src+= step*4; // src points to begin of the 8x8 Block
489 for(y=0; y<8; y++){
490 int numEq= 0;
491
492 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
493 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
494 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
495 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
496 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
497 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
498 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
499 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
500 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
501 if(numEq > c->ppMode.flatnessThreshold){
502 int min, max, x;
503
504 if(src[0] > src[step]){
505 max= src[0];
506 min= src[step];
507 }else{
508 max= src[step];
509 min= src[0];
510 }
511 for(x=2; x<8; x+=2){
512 if(src[x*step] > src[(x+1)*step]){
513 if(src[x *step] > max) max= src[ x *step];
514 if(src[(x+1)*step] < min) min= src[(x+1)*step];
515 }else{
516 if(src[(x+1)*step] > max) max= src[(x+1)*step];
517 if(src[ x *step] < min) min= src[ x *step];
518 }
519 }
520 if(max-min < 2*QP){
521 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
522 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
523
524 int sums[10];
525 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
526 sums[1] = sums[0] - first + src[3*step];
527 sums[2] = sums[1] - first + src[4*step];
528 sums[3] = sums[2] - first + src[5*step];
529 sums[4] = sums[3] - first + src[6*step];
530 sums[5] = sums[4] - src[0*step] + src[7*step];
531 sums[6] = sums[5] - src[1*step] + last;
532 sums[7] = sums[6] - src[2*step] + last;
533 sums[8] = sums[7] - src[3*step] + last;
534 sums[9] = sums[8] - src[4*step] + last;
535
536 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
537 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
538 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
539 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
540 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
541 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
542 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
543 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
544 }
545 }else{
546 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
547
548 if(ABS(middleEnergy) < 8*QP)
549 {
550 const int q=(src[3*step] - src[4*step])/2;
551 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
552 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
553
554 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
555 d= MAX(d, 0);
556
557 d= (5*d + 32) >> 6;
558 d*= SIGN(-middleEnergy);
559
560 if(q>0)
561 {
562 d= d<0 ? 0 : d;
563 d= d>q ? q : d;
564 }
565 else
566 {
567 d= d>0 ? 0 : d;
568 d= d<q ? q : d;
569 }
570
571 src[3*step]-= d;
572 src[4*step]+= d;
573 }
574 }
575
576 src += stride;
577 }
12eebd26
MN
578/*if(step==16){
579 STOP_TIMER("step16")
580}else{
581 STOP_TIMER("stepX")
582}*/
583}
cc9b0679 584
e89952aa 585//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 586//Plain C versions
e89952aa
MN
587#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
588#define COMPILE_C
589#endif
590
b0ac780a
MN
591#ifdef ARCH_POWERPC
592#ifdef HAVE_ALTIVEC
593#define COMPILE_ALTIVEC
b0ac780a
MN
594#endif //HAVE_ALTIVEC
595#endif //ARCH_POWERPC
596
053dea12 597#if defined(ARCH_X86) || defined(ARCH_X86_64)
e89952aa
MN
598
599#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
600#define COMPILE_MMX
601#endif
602
603#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
604#define COMPILE_MMX2
605#endif
606
607#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
608#define COMPILE_3DNOW
609#endif
9c9e467d 610#endif //ARCH_X86
e89952aa
MN
611
612#undef HAVE_MMX
613#undef HAVE_MMX2
614#undef HAVE_3DNOW
b0ac780a 615#undef HAVE_ALTIVEC
e89952aa
MN
616
617#ifdef COMPILE_C
cc9b0679
MN
618#undef HAVE_MMX
619#undef HAVE_MMX2
620#undef HAVE_3DNOW
cc9b0679
MN
621#define RENAME(a) a ## _C
622#include "postprocess_template.c"
e89952aa 623#endif
cc9b0679 624
b0ac780a
MN
625#ifdef ARCH_POWERPC
626#ifdef COMPILE_ALTIVEC
627#undef RENAME
628#define HAVE_ALTIVEC
629#define RENAME(a) a ## _altivec
630#include "postprocess_altivec_template.c"
631#include "postprocess_template.c"
632#endif
633#endif //ARCH_POWERPC
634
cc9b0679 635//MMX versions
e89952aa 636#ifdef COMPILE_MMX
cc9b0679
MN
637#undef RENAME
638#define HAVE_MMX
639#undef HAVE_MMX2
640#undef HAVE_3DNOW
cc9b0679
MN
641#define RENAME(a) a ## _MMX
642#include "postprocess_template.c"
e89952aa 643#endif
cc9b0679
MN
644
645//MMX2 versions
e89952aa 646#ifdef COMPILE_MMX2
cc9b0679
MN
647#undef RENAME
648#define HAVE_MMX
649#define HAVE_MMX2
650#undef HAVE_3DNOW
cc9b0679
MN
651#define RENAME(a) a ## _MMX2
652#include "postprocess_template.c"
e89952aa 653#endif
cc9b0679
MN
654
655//3DNOW versions
e89952aa 656#ifdef COMPILE_3DNOW
cc9b0679
MN
657#undef RENAME
658#define HAVE_MMX
659#undef HAVE_MMX2
660#define HAVE_3DNOW
cc9b0679
MN
661#define RENAME(a) a ## _3DNow
662#include "postprocess_template.c"
e89952aa 663#endif
cc9b0679
MN
664
665// minor note: the HAVE_xyz is messed up after that line so dont use it
666
667static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 668 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 669{
bb270c08
DB
670 PPContext *c= (PPContext *)vc;
671 PPMode *ppMode= (PPMode *)vm;
672 c->ppMode= *ppMode; //FIXME
9c9e467d 673
bb270c08
DB
674 // useing ifs here as they are faster than function pointers allthough the
675 // difference wouldnt be messureable here but its much better because
676 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 677#ifdef RUNTIME_CPUDETECT
053dea12 678#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
679 // ordered per speed fasterst first
680 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
681 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
682 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
683 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
684 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
685 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
686 else
687 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 688#else
b0ac780a
MN
689#ifdef ARCH_POWERPC
690#ifdef HAVE_ALTIVEC
71487254 691 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
bb270c08 692 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
693 else
694#endif
695#endif
bb270c08 696 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 697#endif
e89952aa
MN
698#else //RUNTIME_CPUDETECT
699#ifdef HAVE_MMX2
bb270c08 700 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 701#elif defined (HAVE_3DNOW)
bb270c08 702 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 703#elif defined (HAVE_MMX)
bb270c08 704 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 705#elif defined (HAVE_ALTIVEC)
bb270c08 706 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 707#else
bb270c08 708 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
709#endif
710#endif //!RUNTIME_CPUDETECT
117e45b0
MN
711}
712
cc9b0679 713//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 714// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 715
911879d1 716/* -pp Command line Help
911879d1 717*/
4407a3c4 718char *pp_help=
bf69c4e5 719"Available postprocessing filters:\n"
bb270c08
DB
720"Filters Options\n"
721"short long name short long option Description\n"
722"* * a autoq CPU power dependent enabler\n"
723" c chrom chrominance filtering enabled\n"
724" y nochrom chrominance filtering disabled\n"
725" n noluma luma filtering disabled\n"
726"hb hdeblock (2 threshold) horizontal deblocking filter\n"
727" 1. difference factor: default=32, higher -> more deblocking\n"
728" 2. flatness threshold: default=39, lower -> more deblocking\n"
729" the h & v deblocking filters share these\n"
730" so you can't set different thresholds for h / v\n"
731"vb vdeblock (2 threshold) vertical deblocking filter\n"
732"ha hadeblock (2 threshold) horizontal deblocking filter\n"
733"va vadeblock (2 threshold) vertical deblocking filter\n"
734"h1 x1hdeblock experimental h deblock filter 1\n"
735"v1 x1vdeblock experimental v deblock filter 1\n"
736"dr dering deringing filter\n"
737"al autolevels automatic brightness / contrast\n"
738" f fullyrange stretch luminance to (0..255)\n"
739"lb linblenddeint linear blend deinterlacer\n"
740"li linipoldeint linear interpolating deinterlace\n"
741"ci cubicipoldeint cubic interpolating deinterlacer\n"
742"md mediandeint median deinterlacer\n"
743"fd ffmpegdeint ffmpeg deinterlacer\n"
744"l5 lowpass5 FIR lowpass deinterlacer\n"
745"de default hb:a,vb:a,dr:a\n"
746"fa fast h1:a,v1:a,dr:a\n"
747"ac ha:a:128:7,va:a,dr:a\n"
748"tn tmpnoise (3 threshold) temporal noise reducer\n"
749" 1. <= 2. <= 3. larger -> stronger filtering\n"
750"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
751"Usage:\n"
752"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
753"long form example:\n"
bb270c08 754"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 755"short form example:\n"
bb270c08 756"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
757"more examples:\n"
758"tn:64:128:256\n"
14b005d0 759"\n"
4b001a13 760;
911879d1 761
c41d972d 762pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1 763{
bb270c08
DB
764 char temp[GET_MODE_BUFFER_SIZE];
765 char *p= temp;
7b49ce2e
SH
766 const char *filterDelimiters= ",/";
767 const char *optionDelimiters= ":";
bb270c08
DB
768 struct PPMode *ppMode;
769 char *filterToken;
770
6ab6c7c3 771 ppMode= av_malloc(sizeof(PPMode));
bb270c08
DB
772
773 ppMode->lumMode= 0;
774 ppMode->chromMode= 0;
775 ppMode->maxTmpNoise[0]= 700;
776 ppMode->maxTmpNoise[1]= 1500;
777 ppMode->maxTmpNoise[2]= 3000;
778 ppMode->maxAllowedY= 234;
779 ppMode->minAllowedY= 16;
780 ppMode->baseDcDiff= 256/8;
781 ppMode->flatnessThreshold= 56-16-1;
782 ppMode->maxClippedThreshold= 0.01;
783 ppMode->error=0;
784
785 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
786
787 if(verbose>1) printf("pp: %s\n", name);
788
789 for(;;){
790 char *filterName;
791 int q= 1000000; //PP_QUALITY_MAX;
792 int chrom=-1;
793 int luma=-1;
794 char *option;
795 char *options[OPTIONS_ARRAY_SIZE];
796 int i;
797 int filterNameOk=0;
798 int numOfUnknownOptions=0;
799 int enable=1; //does the user want us to enabled or disabled the filter
800
801 filterToken= strtok(p, filterDelimiters);
802 if(filterToken == NULL) break;
803 p+= strlen(filterToken) + 1; // p points to next filterToken
804 filterName= strtok(filterToken, optionDelimiters);
805 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
806
807 if(*filterName == '-')
808 {
809 enable=0;
810 filterName++;
811 }
812
813 for(;;){ //for all options
814 option= strtok(NULL, optionDelimiters);
815 if(option == NULL) break;
816
817 if(verbose>1) printf("pp: option: %s\n", option);
818 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
819 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
820 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
821 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
822 else
823 {
824 options[numOfUnknownOptions] = option;
825 numOfUnknownOptions++;
826 }
827 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
828 }
829 options[numOfUnknownOptions] = NULL;
830
831 /* replace stuff from the replace Table */
832 for(i=0; replaceTable[2*i]!=NULL; i++)
833 {
834 if(!strcmp(replaceTable[2*i], filterName))
835 {
836 int newlen= strlen(replaceTable[2*i + 1]);
837 int plen;
838 int spaceLeft;
839
840 if(p==NULL) p= temp, *p=0; //last filter
841 else p--, *p=','; //not last filter
842
843 plen= strlen(p);
844 spaceLeft= p - temp + plen;
845 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
846 {
847 ppMode->error++;
848 break;
849 }
850 memmove(p + newlen, p, plen+1);
851 memcpy(p, replaceTable[2*i + 1], newlen);
852 filterNameOk=1;
853 }
854 }
855
856 for(i=0; filters[i].shortName!=NULL; i++)
857 {
858// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
859 if( !strcmp(filters[i].longName, filterName)
860 || !strcmp(filters[i].shortName, filterName))
861 {
862 ppMode->lumMode &= ~filters[i].mask;
863 ppMode->chromMode &= ~filters[i].mask;
864
865 filterNameOk=1;
866 if(!enable) break; // user wants to disable it
867
868 if(q >= filters[i].minLumQuality && luma)
869 ppMode->lumMode|= filters[i].mask;
870 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
871 if(q >= filters[i].minChromQuality)
872 ppMode->chromMode|= filters[i].mask;
873
874 if(filters[i].mask == LEVEL_FIX)
875 {
876 int o;
877 ppMode->minAllowedY= 16;
878 ppMode->maxAllowedY= 234;
879 for(o=0; options[o]!=NULL; o++)
880 {
881 if( !strcmp(options[o],"fullyrange")
882 ||!strcmp(options[o],"f"))
883 {
884 ppMode->minAllowedY= 0;
885 ppMode->maxAllowedY= 255;
886 numOfUnknownOptions--;
887 }
888 }
889 }
890 else if(filters[i].mask == TEMP_NOISE_FILTER)
891 {
892 int o;
893 int numOfNoises=0;
894
895 for(o=0; options[o]!=NULL; o++)
896 {
897 char *tail;
898 ppMode->maxTmpNoise[numOfNoises]=
899 strtol(options[o], &tail, 0);
900 if(tail!=options[o])
901 {
902 numOfNoises++;
903 numOfUnknownOptions--;
904 if(numOfNoises >= 3) break;
905 }
906 }
907 }
908 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
909 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
910 {
911 int o;
912
913 for(o=0; options[o]!=NULL && o<2; o++)
914 {
915 char *tail;
916 int val= strtol(options[o], &tail, 0);
917 if(tail==options[o]) break;
918
919 numOfUnknownOptions--;
920 if(o==0) ppMode->baseDcDiff= val;
921 else ppMode->flatnessThreshold= val;
922 }
923 }
924 else if(filters[i].mask == FORCE_QUANT)
925 {
926 int o;
927 ppMode->forcedQuant= 15;
928
929 for(o=0; options[o]!=NULL && o<1; o++)
930 {
931 char *tail;
932 int val= strtol(options[o], &tail, 0);
933 if(tail==options[o]) break;
934
935 numOfUnknownOptions--;
936 ppMode->forcedQuant= val;
937 }
938 }
939 }
940 }
941 if(!filterNameOk) ppMode->error++;
942 ppMode->error += numOfUnknownOptions;
943 }
944
945 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
946 if(ppMode->error)
947 {
948 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
6ab6c7c3 949 av_free(ppMode);
bb270c08
DB
950 return NULL;
951 }
952 return ppMode;
911879d1
MN
953}
954
c41d972d 955void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 956 av_free(mode);
c41d972d
MN
957}
958
88c0bc7e 959static void reallocAlign(void **p, int alignment, int size){
4851f2ad 960 av_free(*p);
6ab6c7c3 961 *p= av_mallocz(size);
88c0bc7e
MN
962}
963
0426af31 964static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
bb270c08
DB
965 int mbWidth = (width+15)>>4;
966 int mbHeight= (height+15)>>4;
967 int i;
968
969 c->stride= stride;
970 c->qpStride= qpStride;
971
972 reallocAlign((void **)&c->tempDst, 8, stride*24);
973 reallocAlign((void **)&c->tempSrc, 8, stride*24);
974 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
975 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
976 for(i=0; i<256; i++)
977 c->yHistogram[i]= width*height/64*15/256;
978
979 for(i=0; i<3; i++)
980 {
981 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
982 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
983 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
984 }
985
986 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
987 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
988 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
989 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
990}
991
4cfbf61b 992static void global_init(void){
bb270c08
DB
993 int i;
994 memset(clip_table, 0, 256);
995 for(i=256; i<512; i++)
996 clip_table[i]= i;
997 memset(clip_table+512, 0, 256);
134eb1e5
MN
998}
999
88c0bc7e 1000pp_context_t *pp_get_context(int width, int height, int cpuCaps){
6ab6c7c3 1001 PPContext *c= av_malloc(sizeof(PPContext));
bb270c08
DB
1002 int stride= (width+15)&(~15); //assumed / will realloc if needed
1003 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 1004
bb270c08 1005 global_init();
134eb1e5 1006
bb270c08
DB
1007 memset(c, 0, sizeof(PPContext));
1008 c->cpuCaps= cpuCaps;
1009 if(cpuCaps&PP_FORMAT){
1010 c->hChromaSubSample= cpuCaps&0x3;
1011 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1012 }else{
1013 c->hChromaSubSample= 1;
1014 c->vChromaSubSample= 1;
1015 }
88c0bc7e 1016
bb270c08 1017 reallocBuffers(c, width, height, stride, qpStride);
115329f1 1018
bb270c08 1019 c->frameNum=-1;
45b4f285 1020
bb270c08 1021 return c;
45b4f285
MN
1022}
1023
9cb54f43 1024void pp_free_context(void *vc){
bb270c08
DB
1025 PPContext *c = (PPContext*)vc;
1026 int i;
115329f1 1027
6ab6c7c3
LB
1028 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1029 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
115329f1 1030
6ab6c7c3
LB
1031 av_free(c->tempBlocks);
1032 av_free(c->yHistogram);
1033 av_free(c->tempDst);
1034 av_free(c->tempSrc);
1035 av_free(c->deintTemp);
1036 av_free(c->stdQPTable);
1037 av_free(c->nonBQPTable);
1038 av_free(c->forcedQPTable);
115329f1 1039
bb270c08 1040 memset(c, 0, sizeof(PPContext));
88c0bc7e 1041
6ab6c7c3 1042 av_free(c);
9c9e467d
MN
1043}
1044
9cb54f43 1045void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1046 uint8_t * dst[3], int dstStride[3],
ec487e5d 1047 int width, int height,
9c9e467d 1048 QP_STORE_T *QP_store, int QPStride,
bb270c08 1049 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1050{
bb270c08
DB
1051 int mbWidth = (width+15)>>4;
1052 int mbHeight= (height+15)>>4;
1053 PPMode *mode = (PPMode*)vm;
1054 PPContext *c = (PPContext*)vc;
1055 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1056 int absQPStride = ABS(QPStride);
1057
1058 // c->stride and c->QPStride are always positive
1059 if(c->stride < minStride || c->qpStride < absQPStride)
1060 reallocBuffers(c, width, height,
1061 MAX(minStride, c->stride),
1062 MAX(c->qpStride, absQPStride));
1063
1064 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1065 {
1066 int i;
1067 QP_store= c->forcedQPTable;
1068 absQPStride = QPStride = 0;
1069 if(mode->lumMode & FORCE_QUANT)
1070 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1071 else
1072 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1073 }
0426af31
MN
1074//printf("pict_type:%d\n", pict_type);
1075
bb270c08
DB
1076 if(pict_type & PP_PICT_TYPE_QP2){
1077 int i;
1078 const int count= mbHeight * absQPStride;
1079 for(i=0; i<(count>>2); i++){
1080 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1081 }
1082 for(i<<=2; i<count; i++){
1083 c->stdQPTable[i] = QP_store[i]>>1;
1084 }
0426af31 1085 QP_store= c->stdQPTable;
bb270c08
DB
1086 QPStride= absQPStride;
1087 }
0426af31 1088
ec487e5d
MN
1089if(0){
1090int x,y;
1091for(y=0; y<mbHeight; y++){
bb270c08
DB
1092 for(x=0; x<mbWidth; x++){
1093 printf("%2d ", QP_store[x + y*QPStride]);
1094 }
1095 printf("\n");
ec487e5d 1096}
bb270c08 1097 printf("\n");
ec487e5d 1098}
51e19dcc 1099
bb270c08
DB
1100 if((pict_type&7)!=3)
1101 {
1102 if (QPStride >= 0) {
1103 int i;
1104 const int count= mbHeight * QPStride;
1105 for(i=0; i<(count>>2); i++){
1106 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1107 }
1108 for(i<<=2; i<count; i++){
1109 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1110 }
1111 } else {
1112 int i,j;
1113 for(i=0; i<mbHeight; i++) {
1114 for(j=0; j<absQPStride; j++) {
1115 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1116 }
1117 }
1118 }
1119 }
1120
1121 if(verbose>2)
1122 {
1123 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1124 }
1125
1126 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1127 width, height, QP_store, QPStride, 0, mode, c);
1128
1129 width = (width )>>c->hChromaSubSample;
1130 height = (height)>>c->vChromaSubSample;
1131
1132 if(mode->chromMode)
1133 {
1134 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1135 width, height, QP_store, QPStride, 1, mode, c);
1136 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1137 width, height, QP_store, QPStride, 2, mode, c);
1138 }
1139 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1140 {
1141 linecpy(dst[1], src[1], height, srcStride[1]);
1142 linecpy(dst[2], src[2], height, srcStride[2]);
1143 }
1144 else
1145 {
1146 int y;
1147 for(y=0; y<height; y++)
1148 {
1149 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1150 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1151 }
1152 }
911879d1
MN
1153}
1154