CVS --> Subversion
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
9858f773 2 Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3057fa66 3
b0ac780a
MN
4 AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
3057fa66
A
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
5509bffa 18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3057fa66
A
19*/
20
b304569a
MN
21/**
22 * @file postprocess.c
23 * postprocessing.
24 */
115329f1 25
3057fa66 26/*
bb270c08
DB
27 C MMX MMX2 3DNow AltiVec
28isVertDC Ec Ec Ec
29isVertMinMaxOk Ec Ec Ec
30doVertLowPass E e e Ec
31doVertDefFilter Ec Ec e e Ec
32isHorizDC Ec Ec Ec
33isHorizMinMaxOk a E Ec
34doHorizLowPass E e e Ec
35doHorizDefFilter Ec Ec e e Ec
36do_a_deblock Ec E Ec E
37deRing E e e* Ecp
38Vertical RKAlgo1 E a a
39Horizontal RKAlgo1 a a
40Vertical X1# a E E
41Horizontal X1# a E E
42LinIpolDeinterlace e E E*
43CubicIpolDeinterlace a e e*
44LinBlendDeinterlace e E E*
45MedianDeinterlace# E Ec Ec
46TempDeNoiser# E e e Ec
d5a1a995 47
117e45b0
MN
48* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 50E = Exact implementation
acced553 51e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
52a = alternative / approximate impl
53c = checked against the other implementations (-vo md5)
b0ac780a 54p = partially optimized, still some work to do
3057fa66
A
55*/
56
57/*
58TODO:
3057fa66 59reduce the time wasted on the mem transfer
3057fa66 60unroll stuff if instructions depend too much on the prior one
3057fa66 61move YScale thing to the end instead of fixing QP
13e00528 62write a faster and higher quality deblocking filter :)
d5a1a995 63make the mainloop more flexible (variable number of blocks at once
bb270c08 64 (the if/else stuff per block is slowing things down)
9f45d04d 65compare the quality & speed of all filters
9f45d04d 66split this huge file
8405b3fd 67optimize c versions
117e45b0 68try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 69...
13e00528
A
70*/
71
36b1b0bc 72//Changelog: use the Subversion log
3057fa66 73
9858f773 74#include "config.h"
3057fa66
A
75#include <inttypes.h>
76#include <stdio.h>
d5a1a995 77#include <stdlib.h>
911879d1 78#include <string.h>
dda87e9f
PL
79#ifdef HAVE_MALLOC_H
80#include <malloc.h>
81#endif
3057fa66 82//#undef HAVE_MMX2
13e00528 83//#define HAVE_3DNOW
3057fa66 84//#undef HAVE_MMX
cc9b0679 85//#undef ARCH_X86
7f16f6e6 86//#define DEBUG_BRIGHTNESS
bba9b16c 87#ifdef USE_FASTMEMCPY
0a87c409 88#include "fastmemcpy.h"
70d4f2da 89#endif
13e00528 90#include "postprocess.h"
c41d972d 91#include "postprocess_internal.h"
bba9b16c
MN
92
93#include "mangle.h" //FIXME should be supressed
3057fa66 94
a7b2871c
RD
95#ifdef HAVE_ALTIVEC_H
96#include <altivec.h>
97#endif
98
ca390e72
ZK
99#ifndef HAVE_MEMALIGN
100#define memalign(a,b) malloc(b)
101#endif
102
e939e1c3
A
103#define MIN(a,b) ((a) > (b) ? (b) : (a))
104#define MAX(a,b) ((a) < (b) ? (b) : (a))
105#define ABS(a) ((a) > 0 ? (a) : (-(a)))
106#define SIGN(a) ((a) > 0 ? 1 : -1)
107
911879d1
MN
108#define GET_MODE_BUFFER_SIZE 500
109#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
110#define BLOCK_SIZE 8
111#define TEMP_STRIDE 8
112//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 113
3f1d4e96
DB
114#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
115# define attribute_used __attribute__((used))
12eebd26 116# define always_inline __attribute__((always_inline)) inline
3f1d4e96
DB
117#else
118# define attribute_used
12eebd26 119# define always_inline inline
3f1d4e96
DB
120#endif
121
053dea12 122#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
123static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
124static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
125static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
126static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
127static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
128static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
129static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
130static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 131#endif
3057fa66 132
134eb1e5
MN
133static uint8_t clip_table[3*256];
134static uint8_t * const clip_tab= clip_table + 256;
135
4df8ca9d 136static const int verbose= 0;
45b4f285 137
3f1d4e96 138static const int attribute_used deringThreshold= 20;
3057fa66 139
9c9e467d 140
911879d1
MN
141static struct PPFilter filters[]=
142{
bb270c08
DB
143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
145/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
146 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
151 {"dr", "dering", 1, 5, 6, DERING},
152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
161 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
162};
163
164static char *replaceTable[]=
165{
bb270c08
DB
166 "default", "hdeblock:a,vdeblock:a,dering:a",
167 "de", "hdeblock:a,vdeblock:a,dering:a",
168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
170 "ac", "ha:a:128:7,va:a,dering:a",
171 NULL //End Marker
911879d1
MN
172};
173
3057fa66 174
053dea12 175#if defined(ARCH_X86) || defined(ARCH_X86_64)
3057fa66
A
176static inline void prefetchnta(void *p)
177{
bb270c08
DB
178 asm volatile( "prefetchnta (%0)\n\t"
179 : : "r" (p)
180 );
3057fa66
A
181}
182
183static inline void prefetcht0(void *p)
184{
bb270c08
DB
185 asm volatile( "prefetcht0 (%0)\n\t"
186 : : "r" (p)
187 );
3057fa66
A
188}
189
190static inline void prefetcht1(void *p)
191{
bb270c08
DB
192 asm volatile( "prefetcht1 (%0)\n\t"
193 : : "r" (p)
194 );
3057fa66
A
195}
196
197static inline void prefetcht2(void *p)
198{
bb270c08
DB
199 asm volatile( "prefetcht2 (%0)\n\t"
200 : : "r" (p)
201 );
3057fa66 202}
9a722af7 203#endif
3057fa66 204
cc9b0679 205// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 206
cf5ec61d
MN
207/**
208 * Check if the given 8x8 Block is mostly "flat"
209 */
b0ac780a 210static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 211{
bb270c08
DB
212 int numEq= 0;
213 int y;
214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
215 const int dcThreshold= dcOffset*2 + 1;
216
217 for(y=0; y<BLOCK_SIZE; y++)
218 {
219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
226 src+= stride;
227 }
228 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
229}
230
231/**
232 * Check if the middle 8x8 Block in the given 8x16 block is flat
233 */
234static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
235 int numEq= 0;
236 int y;
237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
238 const int dcThreshold= dcOffset*2 + 1;
239
240 src+= stride*4; // src points to begin of the 8x8 Block
241 for(y=0; y<BLOCK_SIZE-1; y++)
242 {
243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
251 src+= stride;
252 }
253 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
254}
255
b0ac780a 256static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 257{
bb270c08 258 int i;
cb482d25 259#if 1
bb270c08
DB
260 for(i=0; i<2; i++){
261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
262 src += stride;
263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
264 src += stride;
265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
266 src += stride;
267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
268 src += stride;
269 }
115329f1 270#else
bb270c08
DB
271 for(i=0; i<8; i++){
272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
273 src += stride;
274 }
cb482d25 275#endif
bb270c08 276 return 1;
cb482d25 277}
cf5ec61d 278
cb482d25
MN
279static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
280{
281#if 1
282#if 1
bb270c08
DB
283 int x;
284 src+= stride*4;
285 for(x=0; x<BLOCK_SIZE; x+=4)
286 {
287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
291 }
cb482d25 292#else
bb270c08
DB
293 int x;
294 src+= stride*3;
295 for(x=0; x<BLOCK_SIZE; x++)
296 {
297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
298 }
cb482d25 299#endif
bb270c08 300 return 1;
cb482d25 301#else
bb270c08
DB
302 int x;
303 src+= stride*4;
304 for(x=0; x<BLOCK_SIZE; x++)
305 {
306 int min=255;
307 int max=0;
308 int y;
309 for(y=0; y<8; y++){
310 int v= src[x + y*stride];
311 if(v>max) max=v;
312 if(v<min) min=v;
313 }
314 if(max-min > 2*QP) return 0;
315 }
316 return 1;
cb482d25
MN
317#endif
318}
319
b0ac780a 320static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
321 if( isHorizDC_C(src, stride, c) ){
322 if( isHorizMinMaxOk_C(src, stride, c->QP) )
323 return 1;
324 else
325 return 0;
326 }else{
327 return 2;
328 }
b0ac780a
MN
329}
330
cb482d25 331static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
332 if( isVertDC_C(src, stride, c) ){
333 if( isVertMinMaxOk_C(src, stride, c->QP) )
334 return 1;
335 else
336 return 0;
337 }else{
338 return 2;
339 }
cf5ec61d
MN
340}
341
b0ac780a 342static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 343{
bb270c08
DB
344 int y;
345 for(y=0; y<BLOCK_SIZE; y++)
346 {
347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
348
349 if(ABS(middleEnergy) < 8*c->QP)
350 {
351 const int q=(dst[3] - dst[4])/2;
352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
354
355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
356 d= MAX(d, 0);
357
358 d= (5*d + 32) >> 6;
359 d*= SIGN(-middleEnergy);
360
361 if(q>0)
362 {
363 d= d<0 ? 0 : d;
364 d= d>q ? q : d;
365 }
366 else
367 {
368 d= d>0 ? 0 : d;
369 d= d<q ? q : d;
370 }
371
372 dst[3]-= d;
373 dst[4]+= d;
374 }
375 dst+= stride;
376 }
cf5ec61d
MN
377}
378
379/**
380 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
381 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
382 */
b0ac780a 383static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 384{
bb270c08
DB
385 int y;
386 for(y=0; y<BLOCK_SIZE; y++)
387 {
388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
390
391 int sums[10];
392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
393 sums[1] = sums[0] - first + dst[3];
394 sums[2] = sums[1] - first + dst[4];
395 sums[3] = sums[2] - first + dst[5];
396 sums[4] = sums[3] - first + dst[6];
397 sums[5] = sums[4] - dst[0] + dst[7];
398 sums[6] = sums[5] - dst[1] + last;
399 sums[7] = sums[6] - dst[2] + last;
400 sums[8] = sums[7] - dst[3] + last;
401 sums[9] = sums[8] - dst[4] + last;
402
403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
411
412 dst+= stride;
413 }
cf5ec61d
MN
414}
415
4e4dcbc5 416/**
cc9b0679
MN
417 * Experimental Filter 1 (Horizontal)
418 * will not damage linear gradients
419 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
420 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
421 * MMX2 version does correct clipping C version doesnt
422 * not identical with the vertical one
4e4dcbc5 423 */
cc9b0679
MN
424static inline void horizX1Filter(uint8_t *src, int stride, int QP)
425{
bb270c08
DB
426 int y;
427 static uint64_t *lut= NULL;
428 if(lut==NULL)
429 {
430 int i;
431 lut= (uint64_t*)memalign(8, 256*8);
432 for(i=0; i<256; i++)
433 {
434 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 435/*
cc9b0679 436//Simulate 112242211 9-Tap filter
bb270c08
DB
437 uint64_t a= (v/16) & 0xFF;
438 uint64_t b= (v/8) & 0xFF;
439 uint64_t c= (v/4) & 0xFF;
440 uint64_t d= (3*v/8) & 0xFF;
117e45b0 441*/
cc9b0679 442//Simulate piecewise linear interpolation
bb270c08
DB
443 uint64_t a= (v/16) & 0xFF;
444 uint64_t b= (v*3/16) & 0xFF;
445 uint64_t c= (v*5/16) & 0xFF;
446 uint64_t d= (7*v/16) & 0xFF;
447 uint64_t A= (0x100 - a)&0xFF;
448 uint64_t B= (0x100 - b)&0xFF;
449 uint64_t C= (0x100 - c)&0xFF;
450 uint64_t D= (0x100 - c)&0xFF;
451
452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
453 (D<<24) | (C<<16) | (B<<8) | (A);
454 //lut[i] = (v<<32) | (v<<24);
455 }
456 }
457
458 for(y=0; y<BLOCK_SIZE; y++)
459 {
460 int a= src[1] - src[2];
461 int b= src[3] - src[4];
462 int c= src[5] - src[6];
463
464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
465
466 if(d < QP)
467 {
468 int v = d * SIGN(-b);
469
470 src[1] +=v/8;
471 src[2] +=v/4;
472 src[3] +=3*v/8;
473 src[4] -=3*v/8;
474 src[5] -=v/4;
475 src[6] -=v/8;
476
477 }
478 src+=stride;
479 }
cc9b0679
MN
480}
481
12eebd26
MN
482/**
483 * accurate deblock filter
484 */
792a5a7c 485static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
bb270c08
DB
486 int y;
487 const int QP= c->QP;
488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
489 const int dcThreshold= dcOffset*2 + 1;
12eebd26 490//START_TIMER
bb270c08
DB
491 src+= step*4; // src points to begin of the 8x8 Block
492 for(y=0; y<8; y++){
493 int numEq= 0;
494
495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
504 if(numEq > c->ppMode.flatnessThreshold){
505 int min, max, x;
506
507 if(src[0] > src[step]){
508 max= src[0];
509 min= src[step];
510 }else{
511 max= src[step];
512 min= src[0];
513 }
514 for(x=2; x<8; x+=2){
515 if(src[x*step] > src[(x+1)*step]){
516 if(src[x *step] > max) max= src[ x *step];
517 if(src[(x+1)*step] < min) min= src[(x+1)*step];
518 }else{
519 if(src[(x+1)*step] > max) max= src[(x+1)*step];
520 if(src[ x *step] < min) min= src[ x *step];
521 }
522 }
523 if(max-min < 2*QP){
524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
526
527 int sums[10];
528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
529 sums[1] = sums[0] - first + src[3*step];
530 sums[2] = sums[1] - first + src[4*step];
531 sums[3] = sums[2] - first + src[5*step];
532 sums[4] = sums[3] - first + src[6*step];
533 sums[5] = sums[4] - src[0*step] + src[7*step];
534 sums[6] = sums[5] - src[1*step] + last;
535 sums[7] = sums[6] - src[2*step] + last;
536 sums[8] = sums[7] - src[3*step] + last;
537 sums[9] = sums[8] - src[4*step] + last;
538
539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
547 }
548 }else{
549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
550
551 if(ABS(middleEnergy) < 8*QP)
552 {
553 const int q=(src[3*step] - src[4*step])/2;
554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
556
557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
558 d= MAX(d, 0);
559
560 d= (5*d + 32) >> 6;
561 d*= SIGN(-middleEnergy);
562
563 if(q>0)
564 {
565 d= d<0 ? 0 : d;
566 d= d>q ? q : d;
567 }
568 else
569 {
570 d= d>0 ? 0 : d;
571 d= d<q ? q : d;
572 }
573
574 src[3*step]-= d;
575 src[4*step]+= d;
576 }
577 }
578
579 src += stride;
580 }
12eebd26
MN
581/*if(step==16){
582 STOP_TIMER("step16")
583}else{
584 STOP_TIMER("stepX")
585}*/
586}
cc9b0679 587
e89952aa 588//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 589//Plain C versions
e89952aa
MN
590#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
591#define COMPILE_C
592#endif
593
b0ac780a
MN
594#ifdef ARCH_POWERPC
595#ifdef HAVE_ALTIVEC
596#define COMPILE_ALTIVEC
b0ac780a
MN
597#endif //HAVE_ALTIVEC
598#endif //ARCH_POWERPC
599
053dea12 600#if defined(ARCH_X86) || defined(ARCH_X86_64)
e89952aa
MN
601
602#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
603#define COMPILE_MMX
604#endif
605
606#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
607#define COMPILE_MMX2
608#endif
609
610#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
611#define COMPILE_3DNOW
612#endif
9c9e467d 613#endif //ARCH_X86
e89952aa
MN
614
615#undef HAVE_MMX
616#undef HAVE_MMX2
617#undef HAVE_3DNOW
b0ac780a 618#undef HAVE_ALTIVEC
e89952aa
MN
619
620#ifdef COMPILE_C
cc9b0679
MN
621#undef HAVE_MMX
622#undef HAVE_MMX2
623#undef HAVE_3DNOW
cc9b0679
MN
624#define RENAME(a) a ## _C
625#include "postprocess_template.c"
e89952aa 626#endif
cc9b0679 627
b0ac780a
MN
628#ifdef ARCH_POWERPC
629#ifdef COMPILE_ALTIVEC
630#undef RENAME
631#define HAVE_ALTIVEC
632#define RENAME(a) a ## _altivec
633#include "postprocess_altivec_template.c"
634#include "postprocess_template.c"
635#endif
636#endif //ARCH_POWERPC
637
cc9b0679 638//MMX versions
e89952aa 639#ifdef COMPILE_MMX
cc9b0679
MN
640#undef RENAME
641#define HAVE_MMX
642#undef HAVE_MMX2
643#undef HAVE_3DNOW
cc9b0679
MN
644#define RENAME(a) a ## _MMX
645#include "postprocess_template.c"
e89952aa 646#endif
cc9b0679
MN
647
648//MMX2 versions
e89952aa 649#ifdef COMPILE_MMX2
cc9b0679
MN
650#undef RENAME
651#define HAVE_MMX
652#define HAVE_MMX2
653#undef HAVE_3DNOW
cc9b0679
MN
654#define RENAME(a) a ## _MMX2
655#include "postprocess_template.c"
e89952aa 656#endif
cc9b0679
MN
657
658//3DNOW versions
e89952aa 659#ifdef COMPILE_3DNOW
cc9b0679
MN
660#undef RENAME
661#define HAVE_MMX
662#undef HAVE_MMX2
663#define HAVE_3DNOW
cc9b0679
MN
664#define RENAME(a) a ## _3DNow
665#include "postprocess_template.c"
e89952aa 666#endif
cc9b0679
MN
667
668// minor note: the HAVE_xyz is messed up after that line so dont use it
669
670static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 672{
bb270c08
DB
673 PPContext *c= (PPContext *)vc;
674 PPMode *ppMode= (PPMode *)vm;
675 c->ppMode= *ppMode; //FIXME
9c9e467d 676
bb270c08
DB
677 // useing ifs here as they are faster than function pointers allthough the
678 // difference wouldnt be messureable here but its much better because
679 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 680#ifdef RUNTIME_CPUDETECT
053dea12 681#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
682 // ordered per speed fasterst first
683 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689 else
690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 691#else
b0ac780a
MN
692#ifdef ARCH_POWERPC
693#ifdef HAVE_ALTIVEC
71487254 694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
bb270c08 695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
696 else
697#endif
698#endif
bb270c08 699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 700#endif
e89952aa
MN
701#else //RUNTIME_CPUDETECT
702#ifdef HAVE_MMX2
bb270c08 703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 704#elif defined (HAVE_3DNOW)
bb270c08 705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 706#elif defined (HAVE_MMX)
bb270c08 707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 708#elif defined (HAVE_ALTIVEC)
bb270c08 709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 710#else
bb270c08 711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
712#endif
713#endif //!RUNTIME_CPUDETECT
117e45b0
MN
714}
715
cc9b0679 716//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 717// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 718
911879d1 719/* -pp Command line Help
911879d1 720*/
4407a3c4 721char *pp_help=
bf69c4e5 722"Available postprocessing filters:\n"
bb270c08
DB
723"Filters Options\n"
724"short long name short long option Description\n"
725"* * a autoq CPU power dependent enabler\n"
726" c chrom chrominance filtering enabled\n"
727" y nochrom chrominance filtering disabled\n"
728" n noluma luma filtering disabled\n"
729"hb hdeblock (2 threshold) horizontal deblocking filter\n"
730" 1. difference factor: default=32, higher -> more deblocking\n"
731" 2. flatness threshold: default=39, lower -> more deblocking\n"
732" the h & v deblocking filters share these\n"
733" so you can't set different thresholds for h / v\n"
734"vb vdeblock (2 threshold) vertical deblocking filter\n"
735"ha hadeblock (2 threshold) horizontal deblocking filter\n"
736"va vadeblock (2 threshold) vertical deblocking filter\n"
737"h1 x1hdeblock experimental h deblock filter 1\n"
738"v1 x1vdeblock experimental v deblock filter 1\n"
739"dr dering deringing filter\n"
740"al autolevels automatic brightness / contrast\n"
741" f fullyrange stretch luminance to (0..255)\n"
742"lb linblenddeint linear blend deinterlacer\n"
743"li linipoldeint linear interpolating deinterlace\n"
744"ci cubicipoldeint cubic interpolating deinterlacer\n"
745"md mediandeint median deinterlacer\n"
746"fd ffmpegdeint ffmpeg deinterlacer\n"
747"l5 lowpass5 FIR lowpass deinterlacer\n"
748"de default hb:a,vb:a,dr:a\n"
749"fa fast h1:a,v1:a,dr:a\n"
750"ac ha:a:128:7,va:a,dr:a\n"
751"tn tmpnoise (3 threshold) temporal noise reducer\n"
752" 1. <= 2. <= 3. larger -> stronger filtering\n"
753"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
754"Usage:\n"
755"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
756"long form example:\n"
bb270c08 757"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 758"short form example:\n"
bb270c08 759"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
760"more examples:\n"
761"tn:64:128:256\n"
14b005d0 762"\n"
4b001a13 763;
911879d1 764
c41d972d 765pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1 766{
bb270c08
DB
767 char temp[GET_MODE_BUFFER_SIZE];
768 char *p= temp;
769 char *filterDelimiters= ",/";
770 char *optionDelimiters= ":";
771 struct PPMode *ppMode;
772 char *filterToken;
773
774 ppMode= memalign(8, sizeof(PPMode));
775
776 ppMode->lumMode= 0;
777 ppMode->chromMode= 0;
778 ppMode->maxTmpNoise[0]= 700;
779 ppMode->maxTmpNoise[1]= 1500;
780 ppMode->maxTmpNoise[2]= 3000;
781 ppMode->maxAllowedY= 234;
782 ppMode->minAllowedY= 16;
783 ppMode->baseDcDiff= 256/8;
784 ppMode->flatnessThreshold= 56-16-1;
785 ppMode->maxClippedThreshold= 0.01;
786 ppMode->error=0;
787
788 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
789
790 if(verbose>1) printf("pp: %s\n", name);
791
792 for(;;){
793 char *filterName;
794 int q= 1000000; //PP_QUALITY_MAX;
795 int chrom=-1;
796 int luma=-1;
797 char *option;
798 char *options[OPTIONS_ARRAY_SIZE];
799 int i;
800 int filterNameOk=0;
801 int numOfUnknownOptions=0;
802 int enable=1; //does the user want us to enabled or disabled the filter
803
804 filterToken= strtok(p, filterDelimiters);
805 if(filterToken == NULL) break;
806 p+= strlen(filterToken) + 1; // p points to next filterToken
807 filterName= strtok(filterToken, optionDelimiters);
808 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
809
810 if(*filterName == '-')
811 {
812 enable=0;
813 filterName++;
814 }
815
816 for(;;){ //for all options
817 option= strtok(NULL, optionDelimiters);
818 if(option == NULL) break;
819
820 if(verbose>1) printf("pp: option: %s\n", option);
821 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
822 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
823 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
824 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
825 else
826 {
827 options[numOfUnknownOptions] = option;
828 numOfUnknownOptions++;
829 }
830 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
831 }
832 options[numOfUnknownOptions] = NULL;
833
834 /* replace stuff from the replace Table */
835 for(i=0; replaceTable[2*i]!=NULL; i++)
836 {
837 if(!strcmp(replaceTable[2*i], filterName))
838 {
839 int newlen= strlen(replaceTable[2*i + 1]);
840 int plen;
841 int spaceLeft;
842
843 if(p==NULL) p= temp, *p=0; //last filter
844 else p--, *p=','; //not last filter
845
846 plen= strlen(p);
847 spaceLeft= p - temp + plen;
848 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
849 {
850 ppMode->error++;
851 break;
852 }
853 memmove(p + newlen, p, plen+1);
854 memcpy(p, replaceTable[2*i + 1], newlen);
855 filterNameOk=1;
856 }
857 }
858
859 for(i=0; filters[i].shortName!=NULL; i++)
860 {
861// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
862 if( !strcmp(filters[i].longName, filterName)
863 || !strcmp(filters[i].shortName, filterName))
864 {
865 ppMode->lumMode &= ~filters[i].mask;
866 ppMode->chromMode &= ~filters[i].mask;
867
868 filterNameOk=1;
869 if(!enable) break; // user wants to disable it
870
871 if(q >= filters[i].minLumQuality && luma)
872 ppMode->lumMode|= filters[i].mask;
873 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
874 if(q >= filters[i].minChromQuality)
875 ppMode->chromMode|= filters[i].mask;
876
877 if(filters[i].mask == LEVEL_FIX)
878 {
879 int o;
880 ppMode->minAllowedY= 16;
881 ppMode->maxAllowedY= 234;
882 for(o=0; options[o]!=NULL; o++)
883 {
884 if( !strcmp(options[o],"fullyrange")
885 ||!strcmp(options[o],"f"))
886 {
887 ppMode->minAllowedY= 0;
888 ppMode->maxAllowedY= 255;
889 numOfUnknownOptions--;
890 }
891 }
892 }
893 else if(filters[i].mask == TEMP_NOISE_FILTER)
894 {
895 int o;
896 int numOfNoises=0;
897
898 for(o=0; options[o]!=NULL; o++)
899 {
900 char *tail;
901 ppMode->maxTmpNoise[numOfNoises]=
902 strtol(options[o], &tail, 0);
903 if(tail!=options[o])
904 {
905 numOfNoises++;
906 numOfUnknownOptions--;
907 if(numOfNoises >= 3) break;
908 }
909 }
910 }
911 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
912 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
913 {
914 int o;
915
916 for(o=0; options[o]!=NULL && o<2; o++)
917 {
918 char *tail;
919 int val= strtol(options[o], &tail, 0);
920 if(tail==options[o]) break;
921
922 numOfUnknownOptions--;
923 if(o==0) ppMode->baseDcDiff= val;
924 else ppMode->flatnessThreshold= val;
925 }
926 }
927 else if(filters[i].mask == FORCE_QUANT)
928 {
929 int o;
930 ppMode->forcedQuant= 15;
931
932 for(o=0; options[o]!=NULL && o<1; o++)
933 {
934 char *tail;
935 int val= strtol(options[o], &tail, 0);
936 if(tail==options[o]) break;
937
938 numOfUnknownOptions--;
939 ppMode->forcedQuant= val;
940 }
941 }
942 }
943 }
944 if(!filterNameOk) ppMode->error++;
945 ppMode->error += numOfUnknownOptions;
946 }
947
948 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
949 if(ppMode->error)
950 {
951 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
952 free(ppMode);
953 return NULL;
954 }
955 return ppMode;
911879d1
MN
956}
957
c41d972d
MN
958void pp_free_mode(pp_mode_t *mode){
959 if(mode) free(mode);
960}
961
88c0bc7e 962static void reallocAlign(void **p, int alignment, int size){
bb270c08
DB
963 if(*p) free(*p);
964 *p= memalign(alignment, size);
965 memset(*p, 0, size);
88c0bc7e
MN
966}
967
0426af31 968static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
bb270c08
DB
969 int mbWidth = (width+15)>>4;
970 int mbHeight= (height+15)>>4;
971 int i;
972
973 c->stride= stride;
974 c->qpStride= qpStride;
975
976 reallocAlign((void **)&c->tempDst, 8, stride*24);
977 reallocAlign((void **)&c->tempSrc, 8, stride*24);
978 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
979 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
980 for(i=0; i<256; i++)
981 c->yHistogram[i]= width*height/64*15/256;
982
983 for(i=0; i<3; i++)
984 {
985 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
986 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
987 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
988 }
989
990 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
991 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
992 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
993 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
994}
995
4cfbf61b 996static void global_init(void){
bb270c08
DB
997 int i;
998 memset(clip_table, 0, 256);
999 for(i=256; i<512; i++)
1000 clip_table[i]= i;
1001 memset(clip_table+512, 0, 256);
134eb1e5
MN
1002}
1003
88c0bc7e 1004pp_context_t *pp_get_context(int width, int height, int cpuCaps){
bb270c08
DB
1005 PPContext *c= memalign(32, sizeof(PPContext));
1006 int stride= (width+15)&(~15); //assumed / will realloc if needed
1007 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 1008
bb270c08 1009 global_init();
134eb1e5 1010
bb270c08
DB
1011 memset(c, 0, sizeof(PPContext));
1012 c->cpuCaps= cpuCaps;
1013 if(cpuCaps&PP_FORMAT){
1014 c->hChromaSubSample= cpuCaps&0x3;
1015 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1016 }else{
1017 c->hChromaSubSample= 1;
1018 c->vChromaSubSample= 1;
1019 }
88c0bc7e 1020
bb270c08 1021 reallocBuffers(c, width, height, stride, qpStride);
115329f1 1022
bb270c08 1023 c->frameNum=-1;
45b4f285 1024
bb270c08 1025 return c;
45b4f285
MN
1026}
1027
9cb54f43 1028void pp_free_context(void *vc){
bb270c08
DB
1029 PPContext *c = (PPContext*)vc;
1030 int i;
115329f1 1031
bb270c08
DB
1032 for(i=0; i<3; i++) free(c->tempBlured[i]);
1033 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
115329f1 1034
bb270c08
DB
1035 free(c->tempBlocks);
1036 free(c->yHistogram);
1037 free(c->tempDst);
1038 free(c->tempSrc);
1039 free(c->deintTemp);
1040 free(c->stdQPTable);
1041 free(c->nonBQPTable);
1042 free(c->forcedQPTable);
115329f1 1043
bb270c08 1044 memset(c, 0, sizeof(PPContext));
88c0bc7e 1045
bb270c08 1046 free(c);
9c9e467d
MN
1047}
1048
9cb54f43 1049void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1050 uint8_t * dst[3], int dstStride[3],
ec487e5d 1051 int width, int height,
9c9e467d 1052 QP_STORE_T *QP_store, int QPStride,
bb270c08 1053 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1054{
bb270c08
DB
1055 int mbWidth = (width+15)>>4;
1056 int mbHeight= (height+15)>>4;
1057 PPMode *mode = (PPMode*)vm;
1058 PPContext *c = (PPContext*)vc;
1059 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1060 int absQPStride = ABS(QPStride);
1061
1062 // c->stride and c->QPStride are always positive
1063 if(c->stride < minStride || c->qpStride < absQPStride)
1064 reallocBuffers(c, width, height,
1065 MAX(minStride, c->stride),
1066 MAX(c->qpStride, absQPStride));
1067
1068 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1069 {
1070 int i;
1071 QP_store= c->forcedQPTable;
1072 absQPStride = QPStride = 0;
1073 if(mode->lumMode & FORCE_QUANT)
1074 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1075 else
1076 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1077 }
0426af31
MN
1078//printf("pict_type:%d\n", pict_type);
1079
bb270c08
DB
1080 if(pict_type & PP_PICT_TYPE_QP2){
1081 int i;
1082 const int count= mbHeight * absQPStride;
1083 for(i=0; i<(count>>2); i++){
1084 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1085 }
1086 for(i<<=2; i<count; i++){
1087 c->stdQPTable[i] = QP_store[i]>>1;
1088 }
0426af31 1089 QP_store= c->stdQPTable;
bb270c08
DB
1090 QPStride= absQPStride;
1091 }
0426af31 1092
ec487e5d
MN
1093if(0){
1094int x,y;
1095for(y=0; y<mbHeight; y++){
bb270c08
DB
1096 for(x=0; x<mbWidth; x++){
1097 printf("%2d ", QP_store[x + y*QPStride]);
1098 }
1099 printf("\n");
ec487e5d 1100}
bb270c08 1101 printf("\n");
ec487e5d 1102}
51e19dcc 1103
bb270c08
DB
1104 if((pict_type&7)!=3)
1105 {
1106 if (QPStride >= 0) {
1107 int i;
1108 const int count= mbHeight * QPStride;
1109 for(i=0; i<(count>>2); i++){
1110 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1111 }
1112 for(i<<=2; i<count; i++){
1113 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1114 }
1115 } else {
1116 int i,j;
1117 for(i=0; i<mbHeight; i++) {
1118 for(j=0; j<absQPStride; j++) {
1119 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1120 }
1121 }
1122 }
1123 }
1124
1125 if(verbose>2)
1126 {
1127 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1128 }
1129
1130 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1131 width, height, QP_store, QPStride, 0, mode, c);
1132
1133 width = (width )>>c->hChromaSubSample;
1134 height = (height)>>c->vChromaSubSample;
1135
1136 if(mode->chromMode)
1137 {
1138 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1139 width, height, QP_store, QPStride, 1, mode, c);
1140 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1141 width, height, QP_store, QPStride, 2, mode, c);
1142 }
1143 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1144 {
1145 linecpy(dst[1], src[1], height, srcStride[1]);
1146 linecpy(dst[2], src[2], height, srcStride[2]);
1147 }
1148 else
1149 {
1150 int y;
1151 for(y=0; y<height; y++)
1152 {
1153 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1154 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1155 }
1156 }
911879d1
MN
1157}
1158