Rename SIGN macro to FFSIGN to avoid clashes with system headers.
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
117e45b0
MN
50* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 52E = Exact implementation
acced553 53e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
6ab6c7c3 77#include "avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
dda87e9f
PL
82#ifdef HAVE_MALLOC_H
83#include <malloc.h>
84#endif
3057fa66 85//#undef HAVE_MMX2
13e00528 86//#define HAVE_3DNOW
3057fa66 87//#undef HAVE_MMX
cc9b0679 88//#undef ARCH_X86
7f16f6e6 89//#define DEBUG_BRIGHTNESS
bba9b16c 90#ifdef USE_FASTMEMCPY
f4bd289a 91#include "libvo/fastmemcpy.h"
70d4f2da 92#endif
13e00528 93#include "postprocess.h"
c41d972d 94#include "postprocess_internal.h"
bba9b16c
MN
95
96#include "mangle.h" //FIXME should be supressed
3057fa66 97
a7b2871c
RD
98#ifdef HAVE_ALTIVEC_H
99#include <altivec.h>
100#endif
101
911879d1
MN
102#define GET_MODE_BUFFER_SIZE 500
103#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
104#define BLOCK_SIZE 8
105#define TEMP_STRIDE 8
106//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 107
053dea12 108#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
109static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
110static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
111static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
112static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
113static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
114static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
115static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
116static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
b28daef8 117#endif
3057fa66 118
134eb1e5
MN
119static uint8_t clip_table[3*256];
120static uint8_t * const clip_tab= clip_table + 256;
121
3f1d4e96 122static const int attribute_used deringThreshold= 20;
3057fa66 123
9c9e467d 124
911879d1
MN
125static struct PPFilter filters[]=
126{
bb270c08
DB
127 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
128 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
129/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
130 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
131 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
132 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
133 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
134 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
135 {"dr", "dering", 1, 5, 6, DERING},
136 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
137 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
138 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
139 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
140 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
141 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
142 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
143 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
144 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
145 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
146};
147
7b49ce2e 148static const char *replaceTable[]=
911879d1 149{
bb270c08
DB
150 "default", "hdeblock:a,vdeblock:a,dering:a",
151 "de", "hdeblock:a,vdeblock:a,dering:a",
152 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
153 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
154 "ac", "ha:a:128:7,va:a,dering:a",
155 NULL //End Marker
911879d1
MN
156};
157
3057fa66 158
053dea12 159#if defined(ARCH_X86) || defined(ARCH_X86_64)
3057fa66
A
160static inline void prefetchnta(void *p)
161{
bb270c08
DB
162 asm volatile( "prefetchnta (%0)\n\t"
163 : : "r" (p)
164 );
3057fa66
A
165}
166
167static inline void prefetcht0(void *p)
168{
bb270c08
DB
169 asm volatile( "prefetcht0 (%0)\n\t"
170 : : "r" (p)
171 );
3057fa66
A
172}
173
174static inline void prefetcht1(void *p)
175{
bb270c08
DB
176 asm volatile( "prefetcht1 (%0)\n\t"
177 : : "r" (p)
178 );
3057fa66
A
179}
180
181static inline void prefetcht2(void *p)
182{
bb270c08
DB
183 asm volatile( "prefetcht2 (%0)\n\t"
184 : : "r" (p)
185 );
3057fa66 186}
9a722af7 187#endif
3057fa66 188
cc9b0679 189// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 190
cf5ec61d
MN
191/**
192 * Check if the given 8x8 Block is mostly "flat"
193 */
b0ac780a 194static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 195{
bb270c08
DB
196 int numEq= 0;
197 int y;
198 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
199 const int dcThreshold= dcOffset*2 + 1;
200
201 for(y=0; y<BLOCK_SIZE; y++)
202 {
203 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
204 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
205 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
206 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
207 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
208 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
209 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
210 src+= stride;
211 }
212 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
213}
214
215/**
216 * Check if the middle 8x8 Block in the given 8x16 block is flat
217 */
218static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
219 int numEq= 0;
220 int y;
221 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
222 const int dcThreshold= dcOffset*2 + 1;
223
224 src+= stride*4; // src points to begin of the 8x8 Block
225 for(y=0; y<BLOCK_SIZE-1; y++)
226 {
227 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
228 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
229 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
230 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
231 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
232 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
233 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
235 src+= stride;
236 }
237 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
238}
239
b0ac780a 240static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 241{
bb270c08 242 int i;
cb482d25 243#if 1
bb270c08
DB
244 for(i=0; i<2; i++){
245 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
246 src += stride;
247 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
248 src += stride;
249 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
250 src += stride;
251 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
252 src += stride;
253 }
115329f1 254#else
bb270c08
DB
255 for(i=0; i<8; i++){
256 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
257 src += stride;
258 }
cb482d25 259#endif
bb270c08 260 return 1;
cb482d25 261}
cf5ec61d 262
cb482d25
MN
263static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
264{
265#if 1
266#if 1
bb270c08
DB
267 int x;
268 src+= stride*4;
269 for(x=0; x<BLOCK_SIZE; x+=4)
270 {
271 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
272 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
273 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
274 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
275 }
cb482d25 276#else
bb270c08
DB
277 int x;
278 src+= stride*3;
279 for(x=0; x<BLOCK_SIZE; x++)
280 {
281 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
282 }
cb482d25 283#endif
bb270c08 284 return 1;
cb482d25 285#else
bb270c08
DB
286 int x;
287 src+= stride*4;
288 for(x=0; x<BLOCK_SIZE; x++)
289 {
290 int min=255;
291 int max=0;
292 int y;
293 for(y=0; y<8; y++){
294 int v= src[x + y*stride];
295 if(v>max) max=v;
296 if(v<min) min=v;
297 }
298 if(max-min > 2*QP) return 0;
299 }
300 return 1;
cb482d25
MN
301#endif
302}
303
b0ac780a 304static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
305 if( isHorizDC_C(src, stride, c) ){
306 if( isHorizMinMaxOk_C(src, stride, c->QP) )
307 return 1;
308 else
309 return 0;
310 }else{
311 return 2;
312 }
b0ac780a
MN
313}
314
cb482d25 315static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
316 if( isVertDC_C(src, stride, c) ){
317 if( isVertMinMaxOk_C(src, stride, c->QP) )
318 return 1;
319 else
320 return 0;
321 }else{
322 return 2;
323 }
cf5ec61d
MN
324}
325
b0ac780a 326static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 327{
bb270c08
DB
328 int y;
329 for(y=0; y<BLOCK_SIZE; y++)
330 {
331 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
332
333 if(ABS(middleEnergy) < 8*c->QP)
334 {
335 const int q=(dst[3] - dst[4])/2;
336 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
337 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
338
8925915f
DB
339 int d= ABS(middleEnergy) - FFMIN( ABS(leftEnergy), ABS(rightEnergy) );
340 d= FFMAX(d, 0);
bb270c08
DB
341
342 d= (5*d + 32) >> 6;
02305ff3 343 d*= FFSIGN(-middleEnergy);
bb270c08
DB
344
345 if(q>0)
346 {
347 d= d<0 ? 0 : d;
348 d= d>q ? q : d;
349 }
350 else
351 {
352 d= d>0 ? 0 : d;
353 d= d<q ? q : d;
354 }
355
356 dst[3]-= d;
357 dst[4]+= d;
358 }
359 dst+= stride;
360 }
cf5ec61d
MN
361}
362
363/**
364 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
365 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
366 */
b0ac780a 367static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 368{
bb270c08
DB
369 int y;
370 for(y=0; y<BLOCK_SIZE; y++)
371 {
372 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
373 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
374
375 int sums[10];
376 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
377 sums[1] = sums[0] - first + dst[3];
378 sums[2] = sums[1] - first + dst[4];
379 sums[3] = sums[2] - first + dst[5];
380 sums[4] = sums[3] - first + dst[6];
381 sums[5] = sums[4] - dst[0] + dst[7];
382 sums[6] = sums[5] - dst[1] + last;
383 sums[7] = sums[6] - dst[2] + last;
384 sums[8] = sums[7] - dst[3] + last;
385 sums[9] = sums[8] - dst[4] + last;
386
387 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
388 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
389 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
390 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
391 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
392 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
393 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
394 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
395
396 dst+= stride;
397 }
cf5ec61d
MN
398}
399
4e4dcbc5 400/**
cc9b0679
MN
401 * Experimental Filter 1 (Horizontal)
402 * will not damage linear gradients
403 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
404 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
405 * MMX2 version does correct clipping C version doesnt
406 * not identical with the vertical one
4e4dcbc5 407 */
cc9b0679
MN
408static inline void horizX1Filter(uint8_t *src, int stride, int QP)
409{
bb270c08
DB
410 int y;
411 static uint64_t *lut= NULL;
412 if(lut==NULL)
413 {
414 int i;
6ab6c7c3 415 lut = av_malloc(256*8);
bb270c08
DB
416 for(i=0; i<256; i++)
417 {
418 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 419/*
cc9b0679 420//Simulate 112242211 9-Tap filter
bb270c08
DB
421 uint64_t a= (v/16) & 0xFF;
422 uint64_t b= (v/8) & 0xFF;
423 uint64_t c= (v/4) & 0xFF;
424 uint64_t d= (3*v/8) & 0xFF;
117e45b0 425*/
cc9b0679 426//Simulate piecewise linear interpolation
bb270c08
DB
427 uint64_t a= (v/16) & 0xFF;
428 uint64_t b= (v*3/16) & 0xFF;
429 uint64_t c= (v*5/16) & 0xFF;
430 uint64_t d= (7*v/16) & 0xFF;
431 uint64_t A= (0x100 - a)&0xFF;
432 uint64_t B= (0x100 - b)&0xFF;
433 uint64_t C= (0x100 - c)&0xFF;
434 uint64_t D= (0x100 - c)&0xFF;
435
436 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
437 (D<<24) | (C<<16) | (B<<8) | (A);
438 //lut[i] = (v<<32) | (v<<24);
439 }
440 }
441
442 for(y=0; y<BLOCK_SIZE; y++)
443 {
444 int a= src[1] - src[2];
445 int b= src[3] - src[4];
446 int c= src[5] - src[6];
447
8925915f 448 int d= FFMAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
bb270c08
DB
449
450 if(d < QP)
451 {
02305ff3 452 int v = d * FFSIGN(-b);
bb270c08
DB
453
454 src[1] +=v/8;
455 src[2] +=v/4;
456 src[3] +=3*v/8;
457 src[4] -=3*v/8;
458 src[5] -=v/4;
459 src[6] -=v/8;
460
461 }
462 src+=stride;
463 }
cc9b0679
MN
464}
465
12eebd26
MN
466/**
467 * accurate deblock filter
468 */
792a5a7c 469static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
bb270c08
DB
470 int y;
471 const int QP= c->QP;
472 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
473 const int dcThreshold= dcOffset*2 + 1;
12eebd26 474//START_TIMER
bb270c08
DB
475 src+= step*4; // src points to begin of the 8x8 Block
476 for(y=0; y<8; y++){
477 int numEq= 0;
478
479 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
480 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
481 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
482 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
483 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
484 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
485 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
486 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
487 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
488 if(numEq > c->ppMode.flatnessThreshold){
489 int min, max, x;
490
491 if(src[0] > src[step]){
492 max= src[0];
493 min= src[step];
494 }else{
495 max= src[step];
496 min= src[0];
497 }
498 for(x=2; x<8; x+=2){
499 if(src[x*step] > src[(x+1)*step]){
500 if(src[x *step] > max) max= src[ x *step];
501 if(src[(x+1)*step] < min) min= src[(x+1)*step];
502 }else{
503 if(src[(x+1)*step] > max) max= src[(x+1)*step];
504 if(src[ x *step] < min) min= src[ x *step];
505 }
506 }
507 if(max-min < 2*QP){
508 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
509 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
510
511 int sums[10];
512 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
513 sums[1] = sums[0] - first + src[3*step];
514 sums[2] = sums[1] - first + src[4*step];
515 sums[3] = sums[2] - first + src[5*step];
516 sums[4] = sums[3] - first + src[6*step];
517 sums[5] = sums[4] - src[0*step] + src[7*step];
518 sums[6] = sums[5] - src[1*step] + last;
519 sums[7] = sums[6] - src[2*step] + last;
520 sums[8] = sums[7] - src[3*step] + last;
521 sums[9] = sums[8] - src[4*step] + last;
522
523 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
524 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
525 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
526 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
527 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
528 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
529 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
530 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
531 }
532 }else{
533 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
534
535 if(ABS(middleEnergy) < 8*QP)
536 {
537 const int q=(src[3*step] - src[4*step])/2;
538 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
539 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
540
8925915f
DB
541 int d= ABS(middleEnergy) - FFMIN( ABS(leftEnergy), ABS(rightEnergy) );
542 d= FFMAX(d, 0);
bb270c08
DB
543
544 d= (5*d + 32) >> 6;
02305ff3 545 d*= FFSIGN(-middleEnergy);
bb270c08
DB
546
547 if(q>0)
548 {
549 d= d<0 ? 0 : d;
550 d= d>q ? q : d;
551 }
552 else
553 {
554 d= d>0 ? 0 : d;
555 d= d<q ? q : d;
556 }
557
558 src[3*step]-= d;
559 src[4*step]+= d;
560 }
561 }
562
563 src += stride;
564 }
12eebd26
MN
565/*if(step==16){
566 STOP_TIMER("step16")
567}else{
568 STOP_TIMER("stepX")
569}*/
570}
cc9b0679 571
e89952aa 572//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 573//Plain C versions
e89952aa
MN
574#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
575#define COMPILE_C
576#endif
577
b0ac780a
MN
578#ifdef ARCH_POWERPC
579#ifdef HAVE_ALTIVEC
580#define COMPILE_ALTIVEC
b0ac780a
MN
581#endif //HAVE_ALTIVEC
582#endif //ARCH_POWERPC
583
053dea12 584#if defined(ARCH_X86) || defined(ARCH_X86_64)
e89952aa
MN
585
586#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
587#define COMPILE_MMX
588#endif
589
590#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
591#define COMPILE_MMX2
592#endif
593
594#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
595#define COMPILE_3DNOW
596#endif
9c9e467d 597#endif //ARCH_X86
e89952aa
MN
598
599#undef HAVE_MMX
600#undef HAVE_MMX2
601#undef HAVE_3DNOW
b0ac780a 602#undef HAVE_ALTIVEC
e89952aa
MN
603
604#ifdef COMPILE_C
cc9b0679
MN
605#undef HAVE_MMX
606#undef HAVE_MMX2
607#undef HAVE_3DNOW
cc9b0679
MN
608#define RENAME(a) a ## _C
609#include "postprocess_template.c"
e89952aa 610#endif
cc9b0679 611
b0ac780a
MN
612#ifdef ARCH_POWERPC
613#ifdef COMPILE_ALTIVEC
614#undef RENAME
615#define HAVE_ALTIVEC
616#define RENAME(a) a ## _altivec
617#include "postprocess_altivec_template.c"
618#include "postprocess_template.c"
619#endif
620#endif //ARCH_POWERPC
621
cc9b0679 622//MMX versions
e89952aa 623#ifdef COMPILE_MMX
cc9b0679
MN
624#undef RENAME
625#define HAVE_MMX
626#undef HAVE_MMX2
627#undef HAVE_3DNOW
cc9b0679
MN
628#define RENAME(a) a ## _MMX
629#include "postprocess_template.c"
e89952aa 630#endif
cc9b0679
MN
631
632//MMX2 versions
e89952aa 633#ifdef COMPILE_MMX2
cc9b0679
MN
634#undef RENAME
635#define HAVE_MMX
636#define HAVE_MMX2
637#undef HAVE_3DNOW
cc9b0679
MN
638#define RENAME(a) a ## _MMX2
639#include "postprocess_template.c"
e89952aa 640#endif
cc9b0679
MN
641
642//3DNOW versions
e89952aa 643#ifdef COMPILE_3DNOW
cc9b0679
MN
644#undef RENAME
645#define HAVE_MMX
646#undef HAVE_MMX2
647#define HAVE_3DNOW
cc9b0679
MN
648#define RENAME(a) a ## _3DNow
649#include "postprocess_template.c"
e89952aa 650#endif
cc9b0679
MN
651
652// minor note: the HAVE_xyz is messed up after that line so dont use it
653
654static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 655 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 656{
bb270c08
DB
657 PPContext *c= (PPContext *)vc;
658 PPMode *ppMode= (PPMode *)vm;
659 c->ppMode= *ppMode; //FIXME
9c9e467d 660
bb270c08
DB
661 // useing ifs here as they are faster than function pointers allthough the
662 // difference wouldnt be messureable here but its much better because
663 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 664#ifdef RUNTIME_CPUDETECT
053dea12 665#if defined(ARCH_X86) || defined(ARCH_X86_64)
bb270c08
DB
666 // ordered per speed fasterst first
667 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
668 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
670 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
672 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
673 else
674 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 675#else
b0ac780a
MN
676#ifdef ARCH_POWERPC
677#ifdef HAVE_ALTIVEC
71487254 678 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
bb270c08 679 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
680 else
681#endif
682#endif
bb270c08 683 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 684#endif
e89952aa
MN
685#else //RUNTIME_CPUDETECT
686#ifdef HAVE_MMX2
bb270c08 687 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 688#elif defined (HAVE_3DNOW)
bb270c08 689 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 690#elif defined (HAVE_MMX)
bb270c08 691 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 692#elif defined (HAVE_ALTIVEC)
bb270c08 693 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 694#else
bb270c08 695 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
696#endif
697#endif //!RUNTIME_CPUDETECT
117e45b0
MN
698}
699
cc9b0679 700//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 701// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 702
911879d1 703/* -pp Command line Help
911879d1 704*/
4407a3c4 705char *pp_help=
bf69c4e5 706"Available postprocessing filters:\n"
bb270c08
DB
707"Filters Options\n"
708"short long name short long option Description\n"
709"* * a autoq CPU power dependent enabler\n"
710" c chrom chrominance filtering enabled\n"
711" y nochrom chrominance filtering disabled\n"
712" n noluma luma filtering disabled\n"
713"hb hdeblock (2 threshold) horizontal deblocking filter\n"
714" 1. difference factor: default=32, higher -> more deblocking\n"
715" 2. flatness threshold: default=39, lower -> more deblocking\n"
716" the h & v deblocking filters share these\n"
717" so you can't set different thresholds for h / v\n"
718"vb vdeblock (2 threshold) vertical deblocking filter\n"
719"ha hadeblock (2 threshold) horizontal deblocking filter\n"
720"va vadeblock (2 threshold) vertical deblocking filter\n"
721"h1 x1hdeblock experimental h deblock filter 1\n"
722"v1 x1vdeblock experimental v deblock filter 1\n"
723"dr dering deringing filter\n"
724"al autolevels automatic brightness / contrast\n"
725" f fullyrange stretch luminance to (0..255)\n"
726"lb linblenddeint linear blend deinterlacer\n"
727"li linipoldeint linear interpolating deinterlace\n"
728"ci cubicipoldeint cubic interpolating deinterlacer\n"
729"md mediandeint median deinterlacer\n"
730"fd ffmpegdeint ffmpeg deinterlacer\n"
731"l5 lowpass5 FIR lowpass deinterlacer\n"
732"de default hb:a,vb:a,dr:a\n"
733"fa fast h1:a,v1:a,dr:a\n"
734"ac ha:a:128:7,va:a,dr:a\n"
735"tn tmpnoise (3 threshold) temporal noise reducer\n"
736" 1. <= 2. <= 3. larger -> stronger filtering\n"
737"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
738"Usage:\n"
739"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
740"long form example:\n"
bb270c08 741"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 742"short form example:\n"
bb270c08 743"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
744"more examples:\n"
745"tn:64:128:256\n"
14b005d0 746"\n"
4b001a13 747;
911879d1 748
c41d972d 749pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1 750{
bb270c08
DB
751 char temp[GET_MODE_BUFFER_SIZE];
752 char *p= temp;
7b49ce2e
SH
753 const char *filterDelimiters= ",/";
754 const char *optionDelimiters= ":";
bb270c08
DB
755 struct PPMode *ppMode;
756 char *filterToken;
757
6ab6c7c3 758 ppMode= av_malloc(sizeof(PPMode));
bb270c08
DB
759
760 ppMode->lumMode= 0;
761 ppMode->chromMode= 0;
762 ppMode->maxTmpNoise[0]= 700;
763 ppMode->maxTmpNoise[1]= 1500;
764 ppMode->maxTmpNoise[2]= 3000;
765 ppMode->maxAllowedY= 234;
766 ppMode->minAllowedY= 16;
767 ppMode->baseDcDiff= 256/8;
768 ppMode->flatnessThreshold= 56-16-1;
769 ppMode->maxClippedThreshold= 0.01;
770 ppMode->error=0;
771
772 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
773
e7becfb2 774 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
bb270c08
DB
775
776 for(;;){
777 char *filterName;
778 int q= 1000000; //PP_QUALITY_MAX;
779 int chrom=-1;
780 int luma=-1;
781 char *option;
782 char *options[OPTIONS_ARRAY_SIZE];
783 int i;
784 int filterNameOk=0;
785 int numOfUnknownOptions=0;
786 int enable=1; //does the user want us to enabled or disabled the filter
787
788 filterToken= strtok(p, filterDelimiters);
789 if(filterToken == NULL) break;
790 p+= strlen(filterToken) + 1; // p points to next filterToken
791 filterName= strtok(filterToken, optionDelimiters);
e7becfb2 792 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
bb270c08
DB
793
794 if(*filterName == '-')
795 {
796 enable=0;
797 filterName++;
798 }
799
800 for(;;){ //for all options
801 option= strtok(NULL, optionDelimiters);
802 if(option == NULL) break;
803
e7becfb2 804 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
bb270c08
DB
805 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
806 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
807 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
808 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
809 else
810 {
811 options[numOfUnknownOptions] = option;
812 numOfUnknownOptions++;
813 }
814 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
815 }
816 options[numOfUnknownOptions] = NULL;
817
818 /* replace stuff from the replace Table */
819 for(i=0; replaceTable[2*i]!=NULL; i++)
820 {
821 if(!strcmp(replaceTable[2*i], filterName))
822 {
823 int newlen= strlen(replaceTable[2*i + 1]);
824 int plen;
825 int spaceLeft;
826
827 if(p==NULL) p= temp, *p=0; //last filter
828 else p--, *p=','; //not last filter
829
830 plen= strlen(p);
831 spaceLeft= p - temp + plen;
832 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
833 {
834 ppMode->error++;
835 break;
836 }
837 memmove(p + newlen, p, plen+1);
838 memcpy(p, replaceTable[2*i + 1], newlen);
839 filterNameOk=1;
840 }
841 }
842
843 for(i=0; filters[i].shortName!=NULL; i++)
844 {
bb270c08
DB
845 if( !strcmp(filters[i].longName, filterName)
846 || !strcmp(filters[i].shortName, filterName))
847 {
848 ppMode->lumMode &= ~filters[i].mask;
849 ppMode->chromMode &= ~filters[i].mask;
850
851 filterNameOk=1;
852 if(!enable) break; // user wants to disable it
853
854 if(q >= filters[i].minLumQuality && luma)
855 ppMode->lumMode|= filters[i].mask;
856 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
857 if(q >= filters[i].minChromQuality)
858 ppMode->chromMode|= filters[i].mask;
859
860 if(filters[i].mask == LEVEL_FIX)
861 {
862 int o;
863 ppMode->minAllowedY= 16;
864 ppMode->maxAllowedY= 234;
865 for(o=0; options[o]!=NULL; o++)
866 {
867 if( !strcmp(options[o],"fullyrange")
868 ||!strcmp(options[o],"f"))
869 {
870 ppMode->minAllowedY= 0;
871 ppMode->maxAllowedY= 255;
872 numOfUnknownOptions--;
873 }
874 }
875 }
876 else if(filters[i].mask == TEMP_NOISE_FILTER)
877 {
878 int o;
879 int numOfNoises=0;
880
881 for(o=0; options[o]!=NULL; o++)
882 {
883 char *tail;
884 ppMode->maxTmpNoise[numOfNoises]=
885 strtol(options[o], &tail, 0);
886 if(tail!=options[o])
887 {
888 numOfNoises++;
889 numOfUnknownOptions--;
890 if(numOfNoises >= 3) break;
891 }
892 }
893 }
894 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
895 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
896 {
897 int o;
898
899 for(o=0; options[o]!=NULL && o<2; o++)
900 {
901 char *tail;
902 int val= strtol(options[o], &tail, 0);
903 if(tail==options[o]) break;
904
905 numOfUnknownOptions--;
906 if(o==0) ppMode->baseDcDiff= val;
907 else ppMode->flatnessThreshold= val;
908 }
909 }
910 else if(filters[i].mask == FORCE_QUANT)
911 {
912 int o;
913 ppMode->forcedQuant= 15;
914
915 for(o=0; options[o]!=NULL && o<1; o++)
916 {
917 char *tail;
918 int val= strtol(options[o], &tail, 0);
919 if(tail==options[o]) break;
920
921 numOfUnknownOptions--;
922 ppMode->forcedQuant= val;
923 }
924 }
925 }
926 }
927 if(!filterNameOk) ppMode->error++;
928 ppMode->error += numOfUnknownOptions;
929 }
930
e7becfb2 931 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
bb270c08
DB
932 if(ppMode->error)
933 {
e7becfb2 934 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
6ab6c7c3 935 av_free(ppMode);
bb270c08
DB
936 return NULL;
937 }
938 return ppMode;
911879d1
MN
939}
940
c41d972d 941void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 942 av_free(mode);
c41d972d
MN
943}
944
88c0bc7e 945static void reallocAlign(void **p, int alignment, int size){
4851f2ad 946 av_free(*p);
6ab6c7c3 947 *p= av_mallocz(size);
88c0bc7e
MN
948}
949
0426af31 950static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
bb270c08
DB
951 int mbWidth = (width+15)>>4;
952 int mbHeight= (height+15)>>4;
953 int i;
954
955 c->stride= stride;
956 c->qpStride= qpStride;
957
958 reallocAlign((void **)&c->tempDst, 8, stride*24);
959 reallocAlign((void **)&c->tempSrc, 8, stride*24);
960 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
961 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
962 for(i=0; i<256; i++)
963 c->yHistogram[i]= width*height/64*15/256;
964
965 for(i=0; i<3; i++)
966 {
967 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
968 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
969 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
970 }
971
972 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
973 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
974 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
975 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
976}
977
4cfbf61b 978static void global_init(void){
bb270c08
DB
979 int i;
980 memset(clip_table, 0, 256);
981 for(i=256; i<512; i++)
982 clip_table[i]= i;
983 memset(clip_table+512, 0, 256);
134eb1e5
MN
984}
985
e7becfb2
DB
986static const char * context_to_name(void * ptr) {
987 return "postproc";
988}
989
990static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
991
88c0bc7e 992pp_context_t *pp_get_context(int width, int height, int cpuCaps){
6ab6c7c3 993 PPContext *c= av_malloc(sizeof(PPContext));
bb270c08
DB
994 int stride= (width+15)&(~15); //assumed / will realloc if needed
995 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 996
bb270c08 997 global_init();
134eb1e5 998
bb270c08 999 memset(c, 0, sizeof(PPContext));
e7becfb2 1000 c->av_class = &av_codec_context_class;
bb270c08
DB
1001 c->cpuCaps= cpuCaps;
1002 if(cpuCaps&PP_FORMAT){
1003 c->hChromaSubSample= cpuCaps&0x3;
1004 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1005 }else{
1006 c->hChromaSubSample= 1;
1007 c->vChromaSubSample= 1;
1008 }
88c0bc7e 1009
bb270c08 1010 reallocBuffers(c, width, height, stride, qpStride);
115329f1 1011
bb270c08 1012 c->frameNum=-1;
45b4f285 1013
bb270c08 1014 return c;
45b4f285
MN
1015}
1016
9cb54f43 1017void pp_free_context(void *vc){
bb270c08
DB
1018 PPContext *c = (PPContext*)vc;
1019 int i;
115329f1 1020
6ab6c7c3
LB
1021 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1022 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
115329f1 1023
6ab6c7c3
LB
1024 av_free(c->tempBlocks);
1025 av_free(c->yHistogram);
1026 av_free(c->tempDst);
1027 av_free(c->tempSrc);
1028 av_free(c->deintTemp);
1029 av_free(c->stdQPTable);
1030 av_free(c->nonBQPTable);
1031 av_free(c->forcedQPTable);
115329f1 1032
bb270c08 1033 memset(c, 0, sizeof(PPContext));
88c0bc7e 1034
6ab6c7c3 1035 av_free(c);
9c9e467d
MN
1036}
1037
9cb54f43 1038void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1039 uint8_t * dst[3], int dstStride[3],
ec487e5d 1040 int width, int height,
9c9e467d 1041 QP_STORE_T *QP_store, int QPStride,
bb270c08 1042 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1043{
bb270c08
DB
1044 int mbWidth = (width+15)>>4;
1045 int mbHeight= (height+15)>>4;
1046 PPMode *mode = (PPMode*)vm;
1047 PPContext *c = (PPContext*)vc;
8925915f 1048 int minStride= FFMAX(ABS(srcStride[0]), ABS(dstStride[0]));
bb270c08
DB
1049 int absQPStride = ABS(QPStride);
1050
1051 // c->stride and c->QPStride are always positive
1052 if(c->stride < minStride || c->qpStride < absQPStride)
1053 reallocBuffers(c, width, height,
8925915f
DB
1054 FFMAX(minStride, c->stride),
1055 FFMAX(c->qpStride, absQPStride));
bb270c08
DB
1056
1057 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1058 {
1059 int i;
1060 QP_store= c->forcedQPTable;
1061 absQPStride = QPStride = 0;
1062 if(mode->lumMode & FORCE_QUANT)
1063 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1064 else
1065 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1066 }
0426af31 1067
bb270c08
DB
1068 if(pict_type & PP_PICT_TYPE_QP2){
1069 int i;
1070 const int count= mbHeight * absQPStride;
1071 for(i=0; i<(count>>2); i++){
1072 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1073 }
1074 for(i<<=2; i<count; i++){
1075 c->stdQPTable[i] = QP_store[i]>>1;
1076 }
0426af31 1077 QP_store= c->stdQPTable;
bb270c08
DB
1078 QPStride= absQPStride;
1079 }
0426af31 1080
ec487e5d
MN
1081if(0){
1082int x,y;
1083for(y=0; y<mbHeight; y++){
bb270c08 1084 for(x=0; x<mbWidth; x++){
e7becfb2 1085 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
bb270c08 1086 }
e7becfb2 1087 av_log(c, AV_LOG_INFO, "\n");
ec487e5d 1088}
e7becfb2 1089 av_log(c, AV_LOG_INFO, "\n");
ec487e5d 1090}
51e19dcc 1091
bb270c08
DB
1092 if((pict_type&7)!=3)
1093 {
1094 if (QPStride >= 0) {
1095 int i;
1096 const int count= mbHeight * QPStride;
1097 for(i=0; i<(count>>2); i++){
1098 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1099 }
1100 for(i<<=2; i<count; i++){
1101 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1102 }
1103 } else {
1104 int i,j;
1105 for(i=0; i<mbHeight; i++) {
1106 for(j=0; j<absQPStride; j++) {
1107 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1108 }
1109 }
1110 }
1111 }
1112
e7becfb2
DB
1113 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1114 mode->lumMode, mode->chromMode);
bb270c08
DB
1115
1116 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1117 width, height, QP_store, QPStride, 0, mode, c);
1118
1119 width = (width )>>c->hChromaSubSample;
1120 height = (height)>>c->vChromaSubSample;
1121
1122 if(mode->chromMode)
1123 {
1124 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1125 width, height, QP_store, QPStride, 1, mode, c);
1126 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1127 width, height, QP_store, QPStride, 2, mode, c);
1128 }
1129 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1130 {
1131 linecpy(dst[1], src[1], height, srcStride[1]);
1132 linecpy(dst[2], src[2], height, srcStride[2]);
1133 }
1134 else
1135 {
1136 int y;
1137 for(y=0; y<height; y++)
1138 {
1139 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1140 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1141 }
1142 }
911879d1
MN
1143}
1144