unused #define
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
117e45b0
MN
50* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 52E = Exact implementation
acced553 53e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
6ab6c7c3 77#include "avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
dda87e9f
PL
82#ifdef HAVE_MALLOC_H
83#include <malloc.h>
84#endif
3057fa66 85//#undef HAVE_MMX2
13e00528 86//#define HAVE_3DNOW
3057fa66 87//#undef HAVE_MMX
cc9b0679 88//#undef ARCH_X86
7f16f6e6 89//#define DEBUG_BRIGHTNESS
bba9b16c 90#ifdef USE_FASTMEMCPY
f4bd289a 91#include "libvo/fastmemcpy.h"
70d4f2da 92#endif
13e00528 93#include "postprocess.h"
c41d972d 94#include "postprocess_internal.h"
bba9b16c
MN
95
96#include "mangle.h" //FIXME should be supressed
3057fa66 97
a7b2871c
RD
98#ifdef HAVE_ALTIVEC_H
99#include <altivec.h>
100#endif
101
911879d1
MN
102#define GET_MODE_BUFFER_SIZE 500
103#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
104#define BLOCK_SIZE 8
105#define TEMP_STRIDE 8
106//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 107
053dea12 108#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
109static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
110static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
111static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
112static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
113static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
114static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
115static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
116static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 117#endif
3057fa66 118
134eb1e5
MN
119static uint8_t clip_table[3*256];
120static uint8_t * const clip_tab= clip_table + 256;
121
4df8ca9d 122static const int verbose= 0;
45b4f285 123
3f1d4e96 124static const int attribute_used deringThreshold= 20;
3057fa66 125
9c9e467d 126
911879d1
MN
127static struct PPFilter filters[]=
128{
bb270c08
DB
129 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
130 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
131/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
132 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
133 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
134 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
135 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
136 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
137 {"dr", "dering", 1, 5, 6, DERING},
138 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
139 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
140 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
141 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
142 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
143 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
144 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
145 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
146 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
147 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
148};
149
7b49ce2e 150static const char *replaceTable[]=
911879d1 151{
bb270c08
DB
152 "default", "hdeblock:a,vdeblock:a,dering:a",
153 "de", "hdeblock:a,vdeblock:a,dering:a",
154 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
155 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
156 "ac", "ha:a:128:7,va:a,dering:a",
157 NULL //End Marker
911879d1
MN
158};
159
3057fa66 160
053dea12 161#if defined(ARCH_X86) || defined(ARCH_X86_64)
3057fa66
A
162static inline void prefetchnta(void *p)
163{
bb270c08
DB
164 asm volatile( "prefetchnta (%0)\n\t"
165 : : "r" (p)
166 );
3057fa66
A
167}
168
169static inline void prefetcht0(void *p)
170{
bb270c08
DB
171 asm volatile( "prefetcht0 (%0)\n\t"
172 : : "r" (p)
173 );
3057fa66
A
174}
175
176static inline void prefetcht1(void *p)
177{
bb270c08
DB
178 asm volatile( "prefetcht1 (%0)\n\t"
179 : : "r" (p)
180 );
3057fa66
A
181}
182
183static inline void prefetcht2(void *p)
184{
bb270c08
DB
185 asm volatile( "prefetcht2 (%0)\n\t"
186 : : "r" (p)
187 );
3057fa66 188}
9a722af7 189#endif
3057fa66 190
cc9b0679 191// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 192
cf5ec61d
MN
193/**
194 * Check if the given 8x8 Block is mostly "flat"
195 */
b0ac780a 196static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 197{
bb270c08
DB
198 int numEq= 0;
199 int y;
200 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
201 const int dcThreshold= dcOffset*2 + 1;
202
203 for(y=0; y<BLOCK_SIZE; y++)
204 {
205 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
206 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
207 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
208 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
209 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
210 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
211 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
212 src+= stride;
213 }
214 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
215}
216
217/**
218 * Check if the middle 8x8 Block in the given 8x16 block is flat
219 */
220static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
221 int numEq= 0;
222 int y;
223 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
224 const int dcThreshold= dcOffset*2 + 1;
225
226 src+= stride*4; // src points to begin of the 8x8 Block
227 for(y=0; y<BLOCK_SIZE-1; y++)
228 {
229 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
230 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
231 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
232 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
233 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
237 src+= stride;
238 }
239 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
240}
241
b0ac780a 242static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 243{
bb270c08 244 int i;
cb482d25 245#if 1
bb270c08
DB
246 for(i=0; i<2; i++){
247 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
248 src += stride;
249 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
250 src += stride;
251 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
252 src += stride;
253 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
254 src += stride;
255 }
115329f1 256#else
bb270c08
DB
257 for(i=0; i<8; i++){
258 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
259 src += stride;
260 }
cb482d25 261#endif
bb270c08 262 return 1;
cb482d25 263}
cf5ec61d 264
cb482d25
MN
265static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
266{
267#if 1
268#if 1
bb270c08
DB
269 int x;
270 src+= stride*4;
271 for(x=0; x<BLOCK_SIZE; x+=4)
272 {
273 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
274 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
275 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
276 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
277 }
cb482d25 278#else
bb270c08
DB
279 int x;
280 src+= stride*3;
281 for(x=0; x<BLOCK_SIZE; x++)
282 {
283 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
284 }
cb482d25 285#endif
bb270c08 286 return 1;
cb482d25 287#else
bb270c08
DB
288 int x;
289 src+= stride*4;
290 for(x=0; x<BLOCK_SIZE; x++)
291 {
292 int min=255;
293 int max=0;
294 int y;
295 for(y=0; y<8; y++){
296 int v= src[x + y*stride];
297 if(v>max) max=v;
298 if(v<min) min=v;
299 }
300 if(max-min > 2*QP) return 0;
301 }
302 return 1;
cb482d25
MN
303#endif
304}
305
b0ac780a 306static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
307 if( isHorizDC_C(src, stride, c) ){
308 if( isHorizMinMaxOk_C(src, stride, c->QP) )
309 return 1;
310 else
311 return 0;
312 }else{
313 return 2;
314 }
b0ac780a
MN
315}
316
cb482d25 317static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
318 if( isVertDC_C(src, stride, c) ){
319 if( isVertMinMaxOk_C(src, stride, c->QP) )
320 return 1;
321 else
322 return 0;
323 }else{
324 return 2;
325 }
cf5ec61d
MN
326}
327
b0ac780a 328static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 329{
bb270c08
DB
330 int y;
331 for(y=0; y<BLOCK_SIZE; y++)
332 {
333 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
334
335 if(ABS(middleEnergy) < 8*c->QP)
336 {
337 const int q=(dst[3] - dst[4])/2;
338 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
339 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
340
8925915f
DB
341 int d= ABS(middleEnergy) - FFMIN( ABS(leftEnergy), ABS(rightEnergy) );
342 d= FFMAX(d, 0);
bb270c08
DB
343
344 d= (5*d + 32) >> 6;
345 d*= SIGN(-middleEnergy);
346
347 if(q>0)
348 {
349 d= d<0 ? 0 : d;
350 d= d>q ? q : d;
351 }
352 else
353 {
354 d= d>0 ? 0 : d;
355 d= d<q ? q : d;
356 }
357
358 dst[3]-= d;
359 dst[4]+= d;
360 }
361 dst+= stride;
362 }
cf5ec61d
MN
363}
364
365/**
366 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
367 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
368 */
b0ac780a 369static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 370{
bb270c08
DB
371 int y;
372 for(y=0; y<BLOCK_SIZE; y++)
373 {
374 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
375 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
376
377 int sums[10];
378 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
379 sums[1] = sums[0] - first + dst[3];
380 sums[2] = sums[1] - first + dst[4];
381 sums[3] = sums[2] - first + dst[5];
382 sums[4] = sums[3] - first + dst[6];
383 sums[5] = sums[4] - dst[0] + dst[7];
384 sums[6] = sums[5] - dst[1] + last;
385 sums[7] = sums[6] - dst[2] + last;
386 sums[8] = sums[7] - dst[3] + last;
387 sums[9] = sums[8] - dst[4] + last;
388
389 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
390 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
391 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
392 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
393 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
394 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
395 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
396 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
397
398 dst+= stride;
399 }
cf5ec61d
MN
400}
401
4e4dcbc5 402/**
cc9b0679
MN
403 * Experimental Filter 1 (Horizontal)
404 * will not damage linear gradients
405 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
406 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
407 * MMX2 version does correct clipping C version doesnt
408 * not identical with the vertical one
4e4dcbc5 409 */
cc9b0679
MN
410static inline void horizX1Filter(uint8_t *src, int stride, int QP)
411{
bb270c08
DB
412 int y;
413 static uint64_t *lut= NULL;
414 if(lut==NULL)
415 {
416 int i;
6ab6c7c3 417 lut = av_malloc(256*8);
bb270c08
DB
418 for(i=0; i<256; i++)
419 {
420 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 421/*
cc9b0679 422//Simulate 112242211 9-Tap filter
bb270c08
DB
423 uint64_t a= (v/16) & 0xFF;
424 uint64_t b= (v/8) & 0xFF;
425 uint64_t c= (v/4) & 0xFF;
426 uint64_t d= (3*v/8) & 0xFF;
117e45b0 427*/
cc9b0679 428//Simulate piecewise linear interpolation
bb270c08
DB
429 uint64_t a= (v/16) & 0xFF;
430 uint64_t b= (v*3/16) & 0xFF;
431 uint64_t c= (v*5/16) & 0xFF;
432 uint64_t d= (7*v/16) & 0xFF;
433 uint64_t A= (0x100 - a)&0xFF;
434 uint64_t B= (0x100 - b)&0xFF;
435 uint64_t C= (0x100 - c)&0xFF;
436 uint64_t D= (0x100 - c)&0xFF;
437
438 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
439 (D<<24) | (C<<16) | (B<<8) | (A);
440 //lut[i] = (v<<32) | (v<<24);
441 }
442 }
443
444 for(y=0; y<BLOCK_SIZE; y++)
445 {
446 int a= src[1] - src[2];
447 int b= src[3] - src[4];
448 int c= src[5] - src[6];
449
8925915f 450 int d= FFMAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
bb270c08
DB
451
452 if(d < QP)
453 {
454 int v = d * SIGN(-b);
455
456 src[1] +=v/8;
457 src[2] +=v/4;
458 src[3] +=3*v/8;
459 src[4] -=3*v/8;
460 src[5] -=v/4;
461 src[6] -=v/8;
462
463 }
464 src+=stride;
465 }
cc9b0679
MN
466}
467
12eebd26
MN
468/**
469 * accurate deblock filter
470 */
792a5a7c 471static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
bb270c08
DB
472 int y;
473 const int QP= c->QP;
474 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
475 const int dcThreshold= dcOffset*2 + 1;
12eebd26 476//START_TIMER
bb270c08
DB
477 src+= step*4; // src points to begin of the 8x8 Block
478 for(y=0; y<8; y++){
479 int numEq= 0;
480
481 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
482 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
483 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
484 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
485 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
486 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
487 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
488 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
489 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
490 if(numEq > c->ppMode.flatnessThreshold){
491 int min, max, x;
492
493 if(src[0] > src[step]){
494 max= src[0];
495 min= src[step];
496 }else{
497 max= src[step];
498 min= src[0];
499 }
500 for(x=2; x<8; x+=2){
501 if(src[x*step] > src[(x+1)*step]){
502 if(src[x *step] > max) max= src[ x *step];
503 if(src[(x+1)*step] < min) min= src[(x+1)*step];
504 }else{
505 if(src[(x+1)*step] > max) max= src[(x+1)*step];
506 if(src[ x *step] < min) min= src[ x *step];
507 }
508 }
509 if(max-min < 2*QP){
510 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
511 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
512
513 int sums[10];
514 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
515 sums[1] = sums[0] - first + src[3*step];
516 sums[2] = sums[1] - first + src[4*step];
517 sums[3] = sums[2] - first + src[5*step];
518 sums[4] = sums[3] - first + src[6*step];
519 sums[5] = sums[4] - src[0*step] + src[7*step];
520 sums[6] = sums[5] - src[1*step] + last;
521 sums[7] = sums[6] - src[2*step] + last;
522 sums[8] = sums[7] - src[3*step] + last;
523 sums[9] = sums[8] - src[4*step] + last;
524
525 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
526 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
527 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
528 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
529 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
530 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
531 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
532 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
533 }
534 }else{
535 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
536
537 if(ABS(middleEnergy) < 8*QP)
538 {
539 const int q=(src[3*step] - src[4*step])/2;
540 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
541 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
542
8925915f
DB
543 int d= ABS(middleEnergy) - FFMIN( ABS(leftEnergy), ABS(rightEnergy) );
544 d= FFMAX(d, 0);
bb270c08
DB
545
546 d= (5*d + 32) >> 6;
547 d*= SIGN(-middleEnergy);
548
549 if(q>0)
550 {
551 d= d<0 ? 0 : d;
552 d= d>q ? q : d;
553 }
554 else
555 {
556 d= d>0 ? 0 : d;
557 d= d<q ? q : d;
558 }
559
560 src[3*step]-= d;
561 src[4*step]+= d;
562 }
563 }
564
565 src += stride;
566 }
12eebd26
MN
567/*if(step==16){
568 STOP_TIMER("step16")
569}else{
570 STOP_TIMER("stepX")
571}*/
572}
cc9b0679 573
e89952aa 574//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 575//Plain C versions
e89952aa
MN
576#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
577#define COMPILE_C
578#endif
579
b0ac780a
MN
580#ifdef ARCH_POWERPC
581#ifdef HAVE_ALTIVEC
582#define COMPILE_ALTIVEC
b0ac780a
MN
583#endif //HAVE_ALTIVEC
584#endif //ARCH_POWERPC
585
053dea12 586#if defined(ARCH_X86) || defined(ARCH_X86_64)
e89952aa
MN
587
588#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
589#define COMPILE_MMX
590#endif
591
592#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
593#define COMPILE_MMX2
594#endif
595
596#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
597#define COMPILE_3DNOW
598#endif
9c9e467d 599#endif //ARCH_X86
e89952aa
MN
600
601#undef HAVE_MMX
602#undef HAVE_MMX2
603#undef HAVE_3DNOW
b0ac780a 604#undef HAVE_ALTIVEC
e89952aa
MN
605
606#ifdef COMPILE_C
cc9b0679
MN
607#undef HAVE_MMX
608#undef HAVE_MMX2
609#undef HAVE_3DNOW
cc9b0679
MN
610#define RENAME(a) a ## _C
611#include "postprocess_template.c"
e89952aa 612#endif
cc9b0679 613
b0ac780a
MN
614#ifdef ARCH_POWERPC
615#ifdef COMPILE_ALTIVEC
616#undef RENAME
617#define HAVE_ALTIVEC
618#define RENAME(a) a ## _altivec
619#include "postprocess_altivec_template.c"
620#include "postprocess_template.c"
621#endif
622#endif //ARCH_POWERPC
623
cc9b0679 624//MMX versions
e89952aa 625#ifdef COMPILE_MMX
cc9b0679
MN
626#undef RENAME
627#define HAVE_MMX
628#undef HAVE_MMX2
629#undef HAVE_3DNOW
cc9b0679
MN
630#define RENAME(a) a ## _MMX
631#include "postprocess_template.c"
e89952aa 632#endif
cc9b0679
MN
633
634//MMX2 versions
e89952aa 635#ifdef COMPILE_MMX2
cc9b0679
MN
636#undef RENAME
637#define HAVE_MMX
638#define HAVE_MMX2
639#undef HAVE_3DNOW
cc9b0679
MN
640#define RENAME(a) a ## _MMX2
641#include "postprocess_template.c"
e89952aa 642#endif
cc9b0679
MN
643
644//3DNOW versions
e89952aa 645#ifdef COMPILE_3DNOW
cc9b0679
MN
646#undef RENAME
647#define HAVE_MMX
648#undef HAVE_MMX2
649#define HAVE_3DNOW
cc9b0679
MN
650#define RENAME(a) a ## _3DNow
651#include "postprocess_template.c"
e89952aa 652#endif
cc9b0679
MN
653
654// minor note: the HAVE_xyz is messed up after that line so dont use it
655
656static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 657 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 658{
bb270c08
DB
659 PPContext *c= (PPContext *)vc;
660 PPMode *ppMode= (PPMode *)vm;
661 c->ppMode= *ppMode; //FIXME
9c9e467d 662
bb270c08
DB
663 // useing ifs here as they are faster than function pointers allthough the
664 // difference wouldnt be messureable here but its much better because
665 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 666#ifdef RUNTIME_CPUDETECT
053dea12 667#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
668 // ordered per speed fasterst first
669 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
670 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
672 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
673 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
674 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675 else
676 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 677#else
b0ac780a
MN
678#ifdef ARCH_POWERPC
679#ifdef HAVE_ALTIVEC
71487254 680 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
bb270c08 681 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
682 else
683#endif
684#endif
bb270c08 685 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 686#endif
e89952aa
MN
687#else //RUNTIME_CPUDETECT
688#ifdef HAVE_MMX2
bb270c08 689 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 690#elif defined (HAVE_3DNOW)
bb270c08 691 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 692#elif defined (HAVE_MMX)
bb270c08 693 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 694#elif defined (HAVE_ALTIVEC)
bb270c08 695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 696#else
bb270c08 697 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
698#endif
699#endif //!RUNTIME_CPUDETECT
117e45b0
MN
700}
701
cc9b0679 702//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 703// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 704
911879d1 705/* -pp Command line Help
911879d1 706*/
4407a3c4 707char *pp_help=
bf69c4e5 708"Available postprocessing filters:\n"
bb270c08
DB
709"Filters Options\n"
710"short long name short long option Description\n"
711"* * a autoq CPU power dependent enabler\n"
712" c chrom chrominance filtering enabled\n"
713" y nochrom chrominance filtering disabled\n"
714" n noluma luma filtering disabled\n"
715"hb hdeblock (2 threshold) horizontal deblocking filter\n"
716" 1. difference factor: default=32, higher -> more deblocking\n"
717" 2. flatness threshold: default=39, lower -> more deblocking\n"
718" the h & v deblocking filters share these\n"
719" so you can't set different thresholds for h / v\n"
720"vb vdeblock (2 threshold) vertical deblocking filter\n"
721"ha hadeblock (2 threshold) horizontal deblocking filter\n"
722"va vadeblock (2 threshold) vertical deblocking filter\n"
723"h1 x1hdeblock experimental h deblock filter 1\n"
724"v1 x1vdeblock experimental v deblock filter 1\n"
725"dr dering deringing filter\n"
726"al autolevels automatic brightness / contrast\n"
727" f fullyrange stretch luminance to (0..255)\n"
728"lb linblenddeint linear blend deinterlacer\n"
729"li linipoldeint linear interpolating deinterlace\n"
730"ci cubicipoldeint cubic interpolating deinterlacer\n"
731"md mediandeint median deinterlacer\n"
732"fd ffmpegdeint ffmpeg deinterlacer\n"
733"l5 lowpass5 FIR lowpass deinterlacer\n"
734"de default hb:a,vb:a,dr:a\n"
735"fa fast h1:a,v1:a,dr:a\n"
736"ac ha:a:128:7,va:a,dr:a\n"
737"tn tmpnoise (3 threshold) temporal noise reducer\n"
738" 1. <= 2. <= 3. larger -> stronger filtering\n"
739"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
740"Usage:\n"
741"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
742"long form example:\n"
bb270c08 743"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 744"short form example:\n"
bb270c08 745"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
746"more examples:\n"
747"tn:64:128:256\n"
14b005d0 748"\n"
4b001a13 749;
911879d1 750
c41d972d 751pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1 752{
bb270c08
DB
753 char temp[GET_MODE_BUFFER_SIZE];
754 char *p= temp;
7b49ce2e
SH
755 const char *filterDelimiters= ",/";
756 const char *optionDelimiters= ":";
bb270c08
DB
757 struct PPMode *ppMode;
758 char *filterToken;
759
6ab6c7c3 760 ppMode= av_malloc(sizeof(PPMode));
bb270c08
DB
761
762 ppMode->lumMode= 0;
763 ppMode->chromMode= 0;
764 ppMode->maxTmpNoise[0]= 700;
765 ppMode->maxTmpNoise[1]= 1500;
766 ppMode->maxTmpNoise[2]= 3000;
767 ppMode->maxAllowedY= 234;
768 ppMode->minAllowedY= 16;
769 ppMode->baseDcDiff= 256/8;
770 ppMode->flatnessThreshold= 56-16-1;
771 ppMode->maxClippedThreshold= 0.01;
772 ppMode->error=0;
773
774 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
775
776 if(verbose>1) printf("pp: %s\n", name);
777
778 for(;;){
779 char *filterName;
780 int q= 1000000; //PP_QUALITY_MAX;
781 int chrom=-1;
782 int luma=-1;
783 char *option;
784 char *options[OPTIONS_ARRAY_SIZE];
785 int i;
786 int filterNameOk=0;
787 int numOfUnknownOptions=0;
788 int enable=1; //does the user want us to enabled or disabled the filter
789
790 filterToken= strtok(p, filterDelimiters);
791 if(filterToken == NULL) break;
792 p+= strlen(filterToken) + 1; // p points to next filterToken
793 filterName= strtok(filterToken, optionDelimiters);
794 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
795
796 if(*filterName == '-')
797 {
798 enable=0;
799 filterName++;
800 }
801
802 for(;;){ //for all options
803 option= strtok(NULL, optionDelimiters);
804 if(option == NULL) break;
805
806 if(verbose>1) printf("pp: option: %s\n", option);
807 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
808 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
809 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
810 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
811 else
812 {
813 options[numOfUnknownOptions] = option;
814 numOfUnknownOptions++;
815 }
816 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
817 }
818 options[numOfUnknownOptions] = NULL;
819
820 /* replace stuff from the replace Table */
821 for(i=0; replaceTable[2*i]!=NULL; i++)
822 {
823 if(!strcmp(replaceTable[2*i], filterName))
824 {
825 int newlen= strlen(replaceTable[2*i + 1]);
826 int plen;
827 int spaceLeft;
828
829 if(p==NULL) p= temp, *p=0; //last filter
830 else p--, *p=','; //not last filter
831
832 plen= strlen(p);
833 spaceLeft= p - temp + plen;
834 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
835 {
836 ppMode->error++;
837 break;
838 }
839 memmove(p + newlen, p, plen+1);
840 memcpy(p, replaceTable[2*i + 1], newlen);
841 filterNameOk=1;
842 }
843 }
844
845 for(i=0; filters[i].shortName!=NULL; i++)
846 {
847// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
848 if( !strcmp(filters[i].longName, filterName)
849 || !strcmp(filters[i].shortName, filterName))
850 {
851 ppMode->lumMode &= ~filters[i].mask;
852 ppMode->chromMode &= ~filters[i].mask;
853
854 filterNameOk=1;
855 if(!enable) break; // user wants to disable it
856
857 if(q >= filters[i].minLumQuality && luma)
858 ppMode->lumMode|= filters[i].mask;
859 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
860 if(q >= filters[i].minChromQuality)
861 ppMode->chromMode|= filters[i].mask;
862
863 if(filters[i].mask == LEVEL_FIX)
864 {
865 int o;
866 ppMode->minAllowedY= 16;
867 ppMode->maxAllowedY= 234;
868 for(o=0; options[o]!=NULL; o++)
869 {
870 if( !strcmp(options[o],"fullyrange")
871 ||!strcmp(options[o],"f"))
872 {
873 ppMode->minAllowedY= 0;
874 ppMode->maxAllowedY= 255;
875 numOfUnknownOptions--;
876 }
877 }
878 }
879 else if(filters[i].mask == TEMP_NOISE_FILTER)
880 {
881 int o;
882 int numOfNoises=0;
883
884 for(o=0; options[o]!=NULL; o++)
885 {
886 char *tail;
887 ppMode->maxTmpNoise[numOfNoises]=
888 strtol(options[o], &tail, 0);
889 if(tail!=options[o])
890 {
891 numOfNoises++;
892 numOfUnknownOptions--;
893 if(numOfNoises >= 3) break;
894 }
895 }
896 }
897 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
898 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
899 {
900 int o;
901
902 for(o=0; options[o]!=NULL && o<2; o++)
903 {
904 char *tail;
905 int val= strtol(options[o], &tail, 0);
906 if(tail==options[o]) break;
907
908 numOfUnknownOptions--;
909 if(o==0) ppMode->baseDcDiff= val;
910 else ppMode->flatnessThreshold= val;
911 }
912 }
913 else if(filters[i].mask == FORCE_QUANT)
914 {
915 int o;
916 ppMode->forcedQuant= 15;
917
918 for(o=0; options[o]!=NULL && o<1; o++)
919 {
920 char *tail;
921 int val= strtol(options[o], &tail, 0);
922 if(tail==options[o]) break;
923
924 numOfUnknownOptions--;
925 ppMode->forcedQuant= val;
926 }
927 }
928 }
929 }
930 if(!filterNameOk) ppMode->error++;
931 ppMode->error += numOfUnknownOptions;
932 }
933
934 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
935 if(ppMode->error)
936 {
937 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
6ab6c7c3 938 av_free(ppMode);
bb270c08
DB
939 return NULL;
940 }
941 return ppMode;
911879d1
MN
942}
943
c41d972d 944void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 945 av_free(mode);
c41d972d
MN
946}
947
88c0bc7e 948static void reallocAlign(void **p, int alignment, int size){
4851f2ad 949 av_free(*p);
6ab6c7c3 950 *p= av_mallocz(size);
88c0bc7e
MN
951}
952
0426af31 953static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
bb270c08
DB
954 int mbWidth = (width+15)>>4;
955 int mbHeight= (height+15)>>4;
956 int i;
957
958 c->stride= stride;
959 c->qpStride= qpStride;
960
961 reallocAlign((void **)&c->tempDst, 8, stride*24);
962 reallocAlign((void **)&c->tempSrc, 8, stride*24);
963 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
964 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
965 for(i=0; i<256; i++)
966 c->yHistogram[i]= width*height/64*15/256;
967
968 for(i=0; i<3; i++)
969 {
970 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
971 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
972 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
973 }
974
975 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
976 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
977 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
978 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
979}
980
4cfbf61b 981static void global_init(void){
bb270c08
DB
982 int i;
983 memset(clip_table, 0, 256);
984 for(i=256; i<512; i++)
985 clip_table[i]= i;
986 memset(clip_table+512, 0, 256);
134eb1e5
MN
987}
988
88c0bc7e 989pp_context_t *pp_get_context(int width, int height, int cpuCaps){
6ab6c7c3 990 PPContext *c= av_malloc(sizeof(PPContext));
bb270c08
DB
991 int stride= (width+15)&(~15); //assumed / will realloc if needed
992 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 993
bb270c08 994 global_init();
134eb1e5 995
bb270c08
DB
996 memset(c, 0, sizeof(PPContext));
997 c->cpuCaps= cpuCaps;
998 if(cpuCaps&PP_FORMAT){
999 c->hChromaSubSample= cpuCaps&0x3;
1000 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1001 }else{
1002 c->hChromaSubSample= 1;
1003 c->vChromaSubSample= 1;
1004 }
88c0bc7e 1005
bb270c08 1006 reallocBuffers(c, width, height, stride, qpStride);
115329f1 1007
bb270c08 1008 c->frameNum=-1;
45b4f285 1009
bb270c08 1010 return c;
45b4f285
MN
1011}
1012
9cb54f43 1013void pp_free_context(void *vc){
bb270c08
DB
1014 PPContext *c = (PPContext*)vc;
1015 int i;
115329f1 1016
6ab6c7c3
LB
1017 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1018 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
115329f1 1019
6ab6c7c3
LB
1020 av_free(c->tempBlocks);
1021 av_free(c->yHistogram);
1022 av_free(c->tempDst);
1023 av_free(c->tempSrc);
1024 av_free(c->deintTemp);
1025 av_free(c->stdQPTable);
1026 av_free(c->nonBQPTable);
1027 av_free(c->forcedQPTable);
115329f1 1028
bb270c08 1029 memset(c, 0, sizeof(PPContext));
88c0bc7e 1030
6ab6c7c3 1031 av_free(c);
9c9e467d
MN
1032}
1033
9cb54f43 1034void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1035 uint8_t * dst[3], int dstStride[3],
ec487e5d 1036 int width, int height,
9c9e467d 1037 QP_STORE_T *QP_store, int QPStride,
bb270c08 1038 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1039{
bb270c08
DB
1040 int mbWidth = (width+15)>>4;
1041 int mbHeight= (height+15)>>4;
1042 PPMode *mode = (PPMode*)vm;
1043 PPContext *c = (PPContext*)vc;
8925915f 1044 int minStride= FFMAX(ABS(srcStride[0]), ABS(dstStride[0]));
bb270c08
DB
1045 int absQPStride = ABS(QPStride);
1046
1047 // c->stride and c->QPStride are always positive
1048 if(c->stride < minStride || c->qpStride < absQPStride)
1049 reallocBuffers(c, width, height,
8925915f
DB
1050 FFMAX(minStride, c->stride),
1051 FFMAX(c->qpStride, absQPStride));
bb270c08
DB
1052
1053 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1054 {
1055 int i;
1056 QP_store= c->forcedQPTable;
1057 absQPStride = QPStride = 0;
1058 if(mode->lumMode & FORCE_QUANT)
1059 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1060 else
1061 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1062 }
0426af31
MN
1063//printf("pict_type:%d\n", pict_type);
1064
bb270c08
DB
1065 if(pict_type & PP_PICT_TYPE_QP2){
1066 int i;
1067 const int count= mbHeight * absQPStride;
1068 for(i=0; i<(count>>2); i++){
1069 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1070 }
1071 for(i<<=2; i<count; i++){
1072 c->stdQPTable[i] = QP_store[i]>>1;
1073 }
0426af31 1074 QP_store= c->stdQPTable;
bb270c08
DB
1075 QPStride= absQPStride;
1076 }
0426af31 1077
ec487e5d
MN
1078if(0){
1079int x,y;
1080for(y=0; y<mbHeight; y++){
bb270c08
DB
1081 for(x=0; x<mbWidth; x++){
1082 printf("%2d ", QP_store[x + y*QPStride]);
1083 }
1084 printf("\n");
ec487e5d 1085}
bb270c08 1086 printf("\n");
ec487e5d 1087}
51e19dcc 1088
bb270c08
DB
1089 if((pict_type&7)!=3)
1090 {
1091 if (QPStride >= 0) {
1092 int i;
1093 const int count= mbHeight * QPStride;
1094 for(i=0; i<(count>>2); i++){
1095 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1096 }
1097 for(i<<=2; i<count; i++){
1098 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1099 }
1100 } else {
1101 int i,j;
1102 for(i=0; i<mbHeight; i++) {
1103 for(j=0; j<absQPStride; j++) {
1104 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1105 }
1106 }
1107 }
1108 }
1109
1110 if(verbose>2)
1111 {
1112 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1113 }
1114
1115 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1116 width, height, QP_store, QPStride, 0, mode, c);
1117
1118 width = (width )>>c->hChromaSubSample;
1119 height = (height)>>c->vChromaSubSample;
1120
1121 if(mode->chromMode)
1122 {
1123 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1124 width, height, QP_store, QPStride, 1, mode, c);
1125 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1126 width, height, QP_store, QPStride, 2, mode, c);
1127 }
1128 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1129 {
1130 linecpy(dst[1], src[1], height, srcStride[1]);
1131 linecpy(dst[2], src[2], height, srcStride[2]);
1132 }
1133 else
1134 {
1135 int y;
1136 for(y=0; y<height; y++)
1137 {
1138 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1139 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1140 }
1141 }
911879d1
MN
1142}
1143