replace FFMPEG with LIBAV in FFMPEG_CONFIGURATION
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a 23/**
ba87f080 24 * @file
b304569a
MN
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
2cab6401 50* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
755bfeab 51# more or less selfinvented filters so the exactness is not too meaningful
3057fa66 52E = Exact implementation
04932b0d 53e = almost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
245976da 77#include "libavutil/avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
3057fa66 82//#undef HAVE_MMX2
ebc3209a 83//#define HAVE_AMD3DNOW
3057fa66 84//#undef HAVE_MMX
cc9b0679 85//#undef ARCH_X86
7f16f6e6 86//#define DEBUG_BRIGHTNESS
13e00528 87#include "postprocess.h"
c41d972d 88#include "postprocess_internal.h"
bba9b16c 89
2a4a62bf
SS
90unsigned postproc_version(void)
91{
92 return LIBPOSTPROC_VERSION_INT;
93}
94
41600690 95const char *postproc_configuration(void)
c1736936 96{
29ba0911 97 return LIBAV_CONFIGURATION;
c1736936
DB
98}
99
41600690 100const char *postproc_license(void)
c1736936
DB
101{
102#define LICENSE_PREFIX "libpostproc license: "
103 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
104}
105
b250f9c6 106#if HAVE_ALTIVEC_H
a7b2871c
RD
107#include <altivec.h>
108#endif
109
911879d1
MN
110#define GET_MODE_BUFFER_SIZE 500
111#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
112#define BLOCK_SIZE 8
113#define TEMP_STRIDE 8
114//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 115
b250f9c6 116#if ARCH_X86
2b858d0b
RD
117DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
118DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
119DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
120DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
121DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
122DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
123DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
124DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
b28daef8 125#endif
3057fa66 126
2722e362 127DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
3057fa66 128
9c9e467d 129
911879d1
MN
130static struct PPFilter filters[]=
131{
16e0bf73
DB
132 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
133 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
134/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
135 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
136 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
137 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
138 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
139 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
140 {"dr", "dering", 1, 5, 6, DERING},
141 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
142 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
143 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
144 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
145 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
146 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
147 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
148 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
149 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
150 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
151};
152
7b49ce2e 153static const char *replaceTable[]=
911879d1 154{
16e0bf73
DB
155 "default", "hb:a,vb:a,dr:a",
156 "de", "hb:a,vb:a,dr:a",
157 "fast", "h1:a,v1:a,dr:a",
158 "fa", "h1:a,v1:a,dr:a",
159 "ac", "ha:a:128:7,va:a,dr:a",
160 NULL //End Marker
911879d1
MN
161};
162
3057fa66 163
b250f9c6 164#if ARCH_X86
3057fa66
A
165static inline void prefetchnta(void *p)
166{
be449fca 167 __asm__ volatile( "prefetchnta (%0)\n\t"
16e0bf73
DB
168 : : "r" (p)
169 );
3057fa66
A
170}
171
172static inline void prefetcht0(void *p)
173{
be449fca 174 __asm__ volatile( "prefetcht0 (%0)\n\t"
16e0bf73
DB
175 : : "r" (p)
176 );
3057fa66
A
177}
178
179static inline void prefetcht1(void *p)
180{
be449fca 181 __asm__ volatile( "prefetcht1 (%0)\n\t"
16e0bf73
DB
182 : : "r" (p)
183 );
3057fa66
A
184}
185
186static inline void prefetcht2(void *p)
187{
be449fca 188 __asm__ volatile( "prefetcht2 (%0)\n\t"
16e0bf73
DB
189 : : "r" (p)
190 );
3057fa66 191}
9a722af7 192#endif
3057fa66 193
04932b0d
DB
194/* The horizontal functions exist only in C because the MMX
195 * code is faster with vertical filters and transposing. */
3057fa66 196
cf5ec61d
MN
197/**
198 * Check if the given 8x8 Block is mostly "flat"
199 */
b0ac780a 200static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 201{
16e0bf73
DB
202 int numEq= 0;
203 int y;
204 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
205 const int dcThreshold= dcOffset*2 + 1;
206
207 for(y=0; y<BLOCK_SIZE; y++){
208 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
209 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
210 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
211 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
212 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
214 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
215 src+= stride;
216 }
217 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
218}
219
220/**
221 * Check if the middle 8x8 Block in the given 8x16 block is flat
222 */
16e0bf73
DB
223static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
224{
225 int numEq= 0;
226 int y;
227 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
228 const int dcThreshold= dcOffset*2 + 1;
229
230 src+= stride*4; // src points to begin of the 8x8 Block
231 for(y=0; y<BLOCK_SIZE-1; y++){
232 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
233 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
240 src+= stride;
241 }
242 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
243}
244
b0ac780a 245static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 246{
16e0bf73 247 int i;
cb482d25 248#if 1
16e0bf73
DB
249 for(i=0; i<2; i++){
250 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
251 src += stride;
252 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
253 src += stride;
254 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
255 src += stride;
256 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
257 src += stride;
258 }
115329f1 259#else
16e0bf73
DB
260 for(i=0; i<8; i++){
261 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
262 src += stride;
263 }
cb482d25 264#endif
16e0bf73 265 return 1;
cb482d25 266}
cf5ec61d 267
cb482d25
MN
268static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
269{
270#if 1
271#if 1
16e0bf73
DB
272 int x;
273 src+= stride*4;
274 for(x=0; x<BLOCK_SIZE; x+=4){
275 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
276 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
277 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
278 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
279 }
cb482d25 280#else
16e0bf73
DB
281 int x;
282 src+= stride*3;
283 for(x=0; x<BLOCK_SIZE; x++){
284 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
285 }
cb482d25 286#endif
16e0bf73 287 return 1;
cb482d25 288#else
16e0bf73
DB
289 int x;
290 src+= stride*4;
291 for(x=0; x<BLOCK_SIZE; x++){
292 int min=255;
293 int max=0;
294 int y;
295 for(y=0; y<8; y++){
296 int v= src[x + y*stride];
297 if(v>max) max=v;
298 if(v<min) min=v;
bb270c08 299 }
16e0bf73
DB
300 if(max-min > 2*QP) return 0;
301 }
302 return 1;
cb482d25
MN
303#endif
304}
305
16e0bf73
DB
306static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
307{
308 if( isHorizDC_C(src, stride, c) ){
309 if( isHorizMinMaxOk_C(src, stride, c->QP) )
310 return 1;
311 else
312 return 0;
313 }else{
314 return 2;
315 }
b0ac780a
MN
316}
317
16e0bf73
DB
318static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
319{
320 if( isVertDC_C(src, stride, c) ){
321 if( isVertMinMaxOk_C(src, stride, c->QP) )
322 return 1;
323 else
324 return 0;
325 }else{
326 return 2;
327 }
cf5ec61d
MN
328}
329
b0ac780a 330static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 331{
16e0bf73
DB
332 int y;
333 for(y=0; y<BLOCK_SIZE; y++){
334 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
335
336 if(FFABS(middleEnergy) < 8*c->QP){
337 const int q=(dst[3] - dst[4])/2;
338 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
339 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
340
341 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
342 d= FFMAX(d, 0);
343
344 d= (5*d + 32) >> 6;
345 d*= FFSIGN(-middleEnergy);
346
347 if(q>0)
348 {
349 d= d<0 ? 0 : d;
350 d= d>q ? q : d;
351 }
352 else
353 {
354 d= d>0 ? 0 : d;
355 d= d<q ? q : d;
356 }
357
358 dst[3]-= d;
359 dst[4]+= d;
bb270c08 360 }
16e0bf73
DB
361 dst+= stride;
362 }
cf5ec61d
MN
363}
364
365/**
366 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
367 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
368 */
b0ac780a 369static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 370{
16e0bf73
DB
371 int y;
372 for(y=0; y<BLOCK_SIZE; y++){
373 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
374 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
375
376 int sums[10];
377 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
378 sums[1] = sums[0] - first + dst[3];
379 sums[2] = sums[1] - first + dst[4];
380 sums[3] = sums[2] - first + dst[5];
381 sums[4] = sums[3] - first + dst[6];
382 sums[5] = sums[4] - dst[0] + dst[7];
383 sums[6] = sums[5] - dst[1] + last;
384 sums[7] = sums[6] - dst[2] + last;
385 sums[8] = sums[7] - dst[3] + last;
386 sums[9] = sums[8] - dst[4] + last;
387
388 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
389 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
390 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
391 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
392 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
393 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
394 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
395 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
396
397 dst+= stride;
398 }
cf5ec61d
MN
399}
400
4e4dcbc5 401/**
cc9b0679
MN
402 * Experimental Filter 1 (Horizontal)
403 * will not damage linear gradients
bd107136 404 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
755bfeab
DB
405 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
406 * MMX2 version does correct clipping C version does not
cc9b0679 407 * not identical with the vertical one
4e4dcbc5 408 */
cc9b0679
MN
409static inline void horizX1Filter(uint8_t *src, int stride, int QP)
410{
16e0bf73
DB
411 int y;
412 static uint64_t *lut= NULL;
413 if(lut==NULL)
414 {
415 int i;
416 lut = av_malloc(256*8);
417 for(i=0; i<256; i++)
bb270c08 418 {
16e0bf73 419 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 420/*
cc9b0679 421//Simulate 112242211 9-Tap filter
16e0bf73
DB
422 uint64_t a= (v/16) & 0xFF;
423 uint64_t b= (v/8) & 0xFF;
424 uint64_t c= (v/4) & 0xFF;
425 uint64_t d= (3*v/8) & 0xFF;
117e45b0 426*/
cc9b0679 427//Simulate piecewise linear interpolation
16e0bf73
DB
428 uint64_t a= (v/16) & 0xFF;
429 uint64_t b= (v*3/16) & 0xFF;
430 uint64_t c= (v*5/16) & 0xFF;
431 uint64_t d= (7*v/16) & 0xFF;
432 uint64_t A= (0x100 - a)&0xFF;
433 uint64_t B= (0x100 - b)&0xFF;
434 uint64_t C= (0x100 - c)&0xFF;
435 uint64_t D= (0x100 - c)&0xFF;
436
437 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
438 (D<<24) | (C<<16) | (B<<8) | (A);
439 //lut[i] = (v<<32) | (v<<24);
bb270c08 440 }
16e0bf73 441 }
bb270c08 442
16e0bf73
DB
443 for(y=0; y<BLOCK_SIZE; y++){
444 int a= src[1] - src[2];
445 int b= src[3] - src[4];
446 int c= src[5] - src[6];
bb270c08 447
16e0bf73 448 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
bb270c08 449
16e0bf73
DB
450 if(d < QP){
451 int v = d * FFSIGN(-b);
bb270c08 452
16e0bf73
DB
453 src[1] +=v/8;
454 src[2] +=v/4;
455 src[3] +=3*v/8;
456 src[4] -=3*v/8;
457 src[5] -=v/4;
458 src[6] -=v/8;
bb270c08 459 }
16e0bf73
DB
460 src+=stride;
461 }
cc9b0679
MN
462}
463
12eebd26
MN
464/**
465 * accurate deblock filter
466 */
849f1035 467static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
16e0bf73
DB
468 int y;
469 const int QP= c->QP;
470 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
471 const int dcThreshold= dcOffset*2 + 1;
12eebd26 472//START_TIMER
16e0bf73
DB
473 src+= step*4; // src points to begin of the 8x8 Block
474 for(y=0; y<8; y++){
475 int numEq= 0;
476
477 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
478 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
479 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
480 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
481 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
482 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
483 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
484 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
485 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
486 if(numEq > c->ppMode.flatnessThreshold){
487 int min, max, x;
488
489 if(src[0] > src[step]){
490 max= src[0];
491 min= src[step];
492 }else{
493 max= src[step];
494 min= src[0];
495 }
496 for(x=2; x<8; x+=2){
497 if(src[x*step] > src[(x+1)*step]){
498 if(src[x *step] > max) max= src[ x *step];
499 if(src[(x+1)*step] < min) min= src[(x+1)*step];
bb270c08 500 }else{
16e0bf73
DB
501 if(src[(x+1)*step] > max) max= src[(x+1)*step];
502 if(src[ x *step] < min) min= src[ x *step];
503 }
504 }
505 if(max-min < 2*QP){
506 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
507 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
508
509 int sums[10];
510 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
511 sums[1] = sums[0] - first + src[3*step];
512 sums[2] = sums[1] - first + src[4*step];
513 sums[3] = sums[2] - first + src[5*step];
514 sums[4] = sums[3] - first + src[6*step];
515 sums[5] = sums[4] - src[0*step] + src[7*step];
516 sums[6] = sums[5] - src[1*step] + last;
517 sums[7] = sums[6] - src[2*step] + last;
518 sums[8] = sums[7] - src[3*step] + last;
519 sums[9] = sums[8] - src[4*step] + last;
520
521 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
522 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
523 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
524 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
525 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
526 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
527 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
528 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
529 }
530 }else{
531 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
532
533 if(FFABS(middleEnergy) < 8*QP){
534 const int q=(src[3*step] - src[4*step])/2;
535 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
536 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
537
538 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
539 d= FFMAX(d, 0);
540
541 d= (5*d + 32) >> 6;
542 d*= FFSIGN(-middleEnergy);
543
544 if(q>0){
545 d= d<0 ? 0 : d;
546 d= d>q ? q : d;
547 }else{
548 d= d>0 ? 0 : d;
549 d= d<q ? q : d;
bb270c08
DB
550 }
551
16e0bf73
DB
552 src[3*step]-= d;
553 src[4*step]+= d;
554 }
bb270c08 555 }
16e0bf73
DB
556
557 src += stride;
558 }
12eebd26
MN
559/*if(step==16){
560 STOP_TIMER("step16")
561}else{
562 STOP_TIMER("stepX")
563}*/
564}
cc9b0679 565
e89952aa 566//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 567//Plain C versions
e90f5b5a 568#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
e89952aa
MN
569#define COMPILE_C
570#endif
571
b250f9c6 572#if HAVE_ALTIVEC
b0ac780a 573#define COMPILE_ALTIVEC
b0ac780a 574#endif //HAVE_ALTIVEC
b0ac780a 575
b250f9c6 576#if ARCH_X86
e89952aa 577
e90f5b5a 578#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
e89952aa
MN
579#define COMPILE_MMX
580#endif
581
e90f5b5a 582#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
e89952aa
MN
583#define COMPILE_MMX2
584#endif
585
e90f5b5a 586#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
e89952aa
MN
587#define COMPILE_3DNOW
588#endif
b250f9c6 589#endif /* ARCH_X86 */
e89952aa
MN
590
591#undef HAVE_MMX
b250f9c6 592#define HAVE_MMX 0
e89952aa 593#undef HAVE_MMX2
b250f9c6 594#define HAVE_MMX2 0
ebc3209a
DB
595#undef HAVE_AMD3DNOW
596#define HAVE_AMD3DNOW 0
b0ac780a 597#undef HAVE_ALTIVEC
b250f9c6 598#define HAVE_ALTIVEC 0
e89952aa
MN
599
600#ifdef COMPILE_C
cc9b0679
MN
601#define RENAME(a) a ## _C
602#include "postprocess_template.c"
e89952aa 603#endif
cc9b0679 604
b0ac780a
MN
605#ifdef COMPILE_ALTIVEC
606#undef RENAME
b250f9c6
AJ
607#undef HAVE_ALTIVEC
608#define HAVE_ALTIVEC 1
b0ac780a
MN
609#define RENAME(a) a ## _altivec
610#include "postprocess_altivec_template.c"
611#include "postprocess_template.c"
612#endif
b0ac780a 613
cc9b0679 614//MMX versions
e89952aa 615#ifdef COMPILE_MMX
cc9b0679 616#undef RENAME
b250f9c6
AJ
617#undef HAVE_MMX
618#define HAVE_MMX 1
cc9b0679
MN
619#define RENAME(a) a ## _MMX
620#include "postprocess_template.c"
e89952aa 621#endif
cc9b0679
MN
622
623//MMX2 versions
e89952aa 624#ifdef COMPILE_MMX2
cc9b0679 625#undef RENAME
b250f9c6
AJ
626#undef HAVE_MMX
627#undef HAVE_MMX2
628#define HAVE_MMX 1
629#define HAVE_MMX2 1
cc9b0679
MN
630#define RENAME(a) a ## _MMX2
631#include "postprocess_template.c"
e89952aa 632#endif
cc9b0679
MN
633
634//3DNOW versions
e89952aa 635#ifdef COMPILE_3DNOW
cc9b0679 636#undef RENAME
b250f9c6 637#undef HAVE_MMX
cc9b0679 638#undef HAVE_MMX2
ebc3209a 639#undef HAVE_AMD3DNOW
b250f9c6
AJ
640#define HAVE_MMX 1
641#define HAVE_MMX2 0
ebc3209a 642#define HAVE_AMD3DNOW 1
cc9b0679
MN
643#define RENAME(a) a ## _3DNow
644#include "postprocess_template.c"
e89952aa 645#endif
cc9b0679 646
755bfeab 647// minor note: the HAVE_xyz is messed up after that line so do not use it.
cc9b0679 648
6c51fd3f 649static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
7dfea342 650 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
cc9b0679 651{
16e0bf73
DB
652 PPContext *c= (PPContext *)vc;
653 PPMode *ppMode= (PPMode *)vm;
654 c->ppMode= *ppMode; //FIXME
9c9e467d 655
16e0bf73
DB
656 // Using ifs here as they are faster than function pointers although the
657 // difference would not be measurable here but it is much better because
658 // someone might exchange the CPU whithout restarting MPlayer ;)
e90f5b5a 659#if CONFIG_RUNTIME_CPUDETECT
b250f9c6 660#if ARCH_X86
16e0bf73
DB
661 // ordered per speed fastest first
662 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
663 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
665 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
667 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668 else
669 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 670#else
b250f9c6 671#if HAVE_ALTIVEC
16e0bf73
DB
672 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
673 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674 else
b0ac780a 675#endif
16e0bf73 676 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 677#endif
e90f5b5a 678#else //CONFIG_RUNTIME_CPUDETECT
b250f9c6 679#if HAVE_MMX2
16e0bf73 680 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
ebc3209a 681#elif HAVE_AMD3DNOW
16e0bf73 682 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b250f9c6 683#elif HAVE_MMX
16e0bf73 684 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b250f9c6 685#elif HAVE_ALTIVEC
16e0bf73 686 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 687#else
16e0bf73 688 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 689#endif
e90f5b5a 690#endif //!CONFIG_RUNTIME_CPUDETECT
117e45b0
MN
691}
692
cc9b0679 693//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 694// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 695
911879d1 696/* -pp Command line Help
911879d1 697*/
69fdc40d 698#if LIBPOSTPROC_VERSION_INT < (52<<16)
10ff3ff4 699const char *const pp_help=
69fdc40d
DP
700#else
701const char pp_help[] =
702#endif
bf69c4e5 703"Available postprocessing filters:\n"
bb270c08
DB
704"Filters Options\n"
705"short long name short long option Description\n"
706"* * a autoq CPU power dependent enabler\n"
707" c chrom chrominance filtering enabled\n"
708" y nochrom chrominance filtering disabled\n"
709" n noluma luma filtering disabled\n"
710"hb hdeblock (2 threshold) horizontal deblocking filter\n"
711" 1. difference factor: default=32, higher -> more deblocking\n"
712" 2. flatness threshold: default=39, lower -> more deblocking\n"
713" the h & v deblocking filters share these\n"
714" so you can't set different thresholds for h / v\n"
715"vb vdeblock (2 threshold) vertical deblocking filter\n"
716"ha hadeblock (2 threshold) horizontal deblocking filter\n"
717"va vadeblock (2 threshold) vertical deblocking filter\n"
718"h1 x1hdeblock experimental h deblock filter 1\n"
719"v1 x1vdeblock experimental v deblock filter 1\n"
720"dr dering deringing filter\n"
721"al autolevels automatic brightness / contrast\n"
722" f fullyrange stretch luminance to (0..255)\n"
723"lb linblenddeint linear blend deinterlacer\n"
724"li linipoldeint linear interpolating deinterlace\n"
725"ci cubicipoldeint cubic interpolating deinterlacer\n"
726"md mediandeint median deinterlacer\n"
727"fd ffmpegdeint ffmpeg deinterlacer\n"
728"l5 lowpass5 FIR lowpass deinterlacer\n"
729"de default hb:a,vb:a,dr:a\n"
730"fa fast h1:a,v1:a,dr:a\n"
731"ac ha:a:128:7,va:a,dr:a\n"
732"tn tmpnoise (3 threshold) temporal noise reducer\n"
733" 1. <= 2. <= 3. larger -> stronger filtering\n"
734"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
735"Usage:\n"
736"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
737"long form example:\n"
bb270c08 738"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 739"short form example:\n"
bb270c08 740"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
741"more examples:\n"
742"tn:64:128:256\n"
14b005d0 743"\n"
4b001a13 744;
911879d1 745
7dfea342 746pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
911879d1 747{
16e0bf73
DB
748 char temp[GET_MODE_BUFFER_SIZE];
749 char *p= temp;
750 static const char filterDelimiters[] = ",/";
751 static const char optionDelimiters[] = ":";
752 struct PPMode *ppMode;
753 char *filterToken;
754
755 ppMode= av_malloc(sizeof(PPMode));
756
757 ppMode->lumMode= 0;
758 ppMode->chromMode= 0;
759 ppMode->maxTmpNoise[0]= 700;
760 ppMode->maxTmpNoise[1]= 1500;
761 ppMode->maxTmpNoise[2]= 3000;
762 ppMode->maxAllowedY= 234;
763 ppMode->minAllowedY= 16;
764 ppMode->baseDcDiff= 256/8;
765 ppMode->flatnessThreshold= 56-16-1;
766 ppMode->maxClippedThreshold= 0.01;
767 ppMode->error=0;
768
769 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
770
771 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
772
773 for(;;){
774 char *filterName;
775 int q= 1000000; //PP_QUALITY_MAX;
776 int chrom=-1;
777 int luma=-1;
778 char *option;
779 char *options[OPTIONS_ARRAY_SIZE];
780 int i;
781 int filterNameOk=0;
782 int numOfUnknownOptions=0;
783 int enable=1; //does the user want us to enabled or disabled the filter
784
785 filterToken= strtok(p, filterDelimiters);
786 if(filterToken == NULL) break;
787 p+= strlen(filterToken) + 1; // p points to next filterToken
788 filterName= strtok(filterToken, optionDelimiters);
789 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
790
791 if(*filterName == '-'){
792 enable=0;
793 filterName++;
794 }
bb270c08 795
16e0bf73
DB
796 for(;;){ //for all options
797 option= strtok(NULL, optionDelimiters);
798 if(option == NULL) break;
799
800 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
801 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
802 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
803 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
804 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
805 else{
806 options[numOfUnknownOptions] = option;
807 numOfUnknownOptions++;
808 }
809 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
810 }
811 options[numOfUnknownOptions] = NULL;
812
813 /* replace stuff from the replace Table */
814 for(i=0; replaceTable[2*i]!=NULL; i++){
815 if(!strcmp(replaceTable[2*i], filterName)){
816 int newlen= strlen(replaceTable[2*i + 1]);
817 int plen;
818 int spaceLeft;
819
820 if(p==NULL) p= temp, *p=0; //last filter
821 else p--, *p=','; //not last filter
822
823 plen= strlen(p);
824 spaceLeft= p - temp + plen;
825 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
826 ppMode->error++;
827 break;
bb270c08 828 }
16e0bf73
DB
829 memmove(p + newlen, p, plen+1);
830 memcpy(p, replaceTable[2*i + 1], newlen);
831 filterNameOk=1;
832 }
833 }
bb270c08 834
16e0bf73
DB
835 for(i=0; filters[i].shortName!=NULL; i++){
836 if( !strcmp(filters[i].longName, filterName)
837 || !strcmp(filters[i].shortName, filterName)){
838 ppMode->lumMode &= ~filters[i].mask;
839 ppMode->chromMode &= ~filters[i].mask;
840
841 filterNameOk=1;
842 if(!enable) break; // user wants to disable it
843
844 if(q >= filters[i].minLumQuality && luma)
845 ppMode->lumMode|= filters[i].mask;
846 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
847 if(q >= filters[i].minChromQuality)
848 ppMode->chromMode|= filters[i].mask;
849
850 if(filters[i].mask == LEVEL_FIX){
851 int o;
852 ppMode->minAllowedY= 16;
853 ppMode->maxAllowedY= 234;
854 for(o=0; options[o]!=NULL; o++){
855 if( !strcmp(options[o],"fullyrange")
856 ||!strcmp(options[o],"f")){
857 ppMode->minAllowedY= 0;
858 ppMode->maxAllowedY= 255;
859 numOfUnknownOptions--;
bb270c08 860 }
16e0bf73 861 }
bb270c08 862 }
16e0bf73 863 else if(filters[i].mask == TEMP_NOISE_FILTER)
bb270c08 864 {
16e0bf73
DB
865 int o;
866 int numOfNoises=0;
867
868 for(o=0; options[o]!=NULL; o++){
869 char *tail;
870 ppMode->maxTmpNoise[numOfNoises]=
871 strtol(options[o], &tail, 0);
872 if(tail!=options[o]){
873 numOfNoises++;
874 numOfUnknownOptions--;
875 if(numOfNoises >= 3) break;
bb270c08 876 }
16e0bf73 877 }
bb270c08 878 }
16e0bf73
DB
879 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
880 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
881 int o;
882
883 for(o=0; options[o]!=NULL && o<2; o++){
884 char *tail;
885 int val= strtol(options[o], &tail, 0);
886 if(tail==options[o]) break;
887
888 numOfUnknownOptions--;
889 if(o==0) ppMode->baseDcDiff= val;
890 else ppMode->flatnessThreshold= val;
891 }
892 }
893 else if(filters[i].mask == FORCE_QUANT){
894 int o;
895 ppMode->forcedQuant= 15;
896
897 for(o=0; options[o]!=NULL && o<1; o++){
898 char *tail;
899 int val= strtol(options[o], &tail, 0);
900 if(tail==options[o]) break;
901
902 numOfUnknownOptions--;
903 ppMode->forcedQuant= val;
904 }
905 }
906 }
bb270c08 907 }
16e0bf73
DB
908 if(!filterNameOk) ppMode->error++;
909 ppMode->error += numOfUnknownOptions;
910 }
911
912 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
913 if(ppMode->error){
914 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
915 av_free(ppMode);
916 return NULL;
917 }
918 return ppMode;
911879d1
MN
919}
920
7dfea342 921void pp_free_mode(pp_mode *mode){
6ab6c7c3 922 av_free(mode);
c41d972d
MN
923}
924
88c0bc7e 925static void reallocAlign(void **p, int alignment, int size){
16e0bf73
DB
926 av_free(*p);
927 *p= av_mallocz(size);
88c0bc7e
MN
928}
929
0426af31 930static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
16e0bf73
DB
931 int mbWidth = (width+15)>>4;
932 int mbHeight= (height+15)>>4;
933 int i;
934
935 c->stride= stride;
936 c->qpStride= qpStride;
937
938 reallocAlign((void **)&c->tempDst, 8, stride*24);
939 reallocAlign((void **)&c->tempSrc, 8, stride*24);
940 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
941 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
942 for(i=0; i<256; i++)
943 c->yHistogram[i]= width*height/64*15/256;
944
945 for(i=0; i<3; i++){
946 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
aa089f6c
DB
947 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
948 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
16e0bf73
DB
949 }
950
951 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
952 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
953 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
954 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
955}
956
e7becfb2
DB
957static const char * context_to_name(void * ptr) {
958 return "postproc";
959}
960
31bfd6f3 961static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
e7becfb2 962
7dfea342 963pp_context *pp_get_context(int width, int height, int cpuCaps){
16e0bf73 964 PPContext *c= av_malloc(sizeof(PPContext));
ef516f73 965 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
16e0bf73
DB
966 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
967
968 memset(c, 0, sizeof(PPContext));
969 c->av_class = &av_codec_context_class;
970 c->cpuCaps= cpuCaps;
971 if(cpuCaps&PP_FORMAT){
972 c->hChromaSubSample= cpuCaps&0x3;
973 c->vChromaSubSample= (cpuCaps>>4)&0x3;
974 }else{
975 c->hChromaSubSample= 1;
976 c->vChromaSubSample= 1;
977 }
978
979 reallocBuffers(c, width, height, stride, qpStride);
980
981 c->frameNum=-1;
982
983 return c;
45b4f285
MN
984}
985
9cb54f43 986void pp_free_context(void *vc){
16e0bf73
DB
987 PPContext *c = (PPContext*)vc;
988 int i;
115329f1 989
aa089f6c
DB
990 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
991 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
115329f1 992
16e0bf73
DB
993 av_free(c->tempBlocks);
994 av_free(c->yHistogram);
995 av_free(c->tempDst);
996 av_free(c->tempSrc);
997 av_free(c->deintTemp);
998 av_free(c->stdQPTable);
999 av_free(c->nonBQPTable);
1000 av_free(c->forcedQPTable);
115329f1 1001
16e0bf73 1002 memset(c, 0, sizeof(PPContext));
88c0bc7e 1003
16e0bf73 1004 av_free(c);
9c9e467d
MN
1005}
1006
6c51fd3f 1007void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
16e0bf73
DB
1008 uint8_t * dst[3], const int dstStride[3],
1009 int width, int height,
1010 const QP_STORE_T *QP_store, int QPStride,
7dfea342 1011 pp_mode *vm, void *vc, int pict_type)
911879d1 1012{
16e0bf73
DB
1013 int mbWidth = (width+15)>>4;
1014 int mbHeight= (height+15)>>4;
1015 PPMode *mode = (PPMode*)vm;
1016 PPContext *c = (PPContext*)vc;
1017 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1018 int absQPStride = FFABS(QPStride);
1019
1020 // c->stride and c->QPStride are always positive
1021 if(c->stride < minStride || c->qpStride < absQPStride)
1022 reallocBuffers(c, width, height,
1023 FFMAX(minStride, c->stride),
1024 FFMAX(c->qpStride, absQPStride));
1025
1026 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1027 int i;
1028 QP_store= c->forcedQPTable;
1029 absQPStride = QPStride = 0;
1030 if(mode->lumMode & FORCE_QUANT)
1031 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1032 else
1033 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1034 }
0426af31 1035
16e0bf73
DB
1036 if(pict_type & PP_PICT_TYPE_QP2){
1037 int i;
1038 const int count= mbHeight * absQPStride;
1039 for(i=0; i<(count>>2); i++){
1040 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
bb270c08 1041 }
16e0bf73
DB
1042 for(i<<=2; i<count; i++){
1043 c->stdQPTable[i] = QP_store[i]>>1;
1044 }
1045 QP_store= c->stdQPTable;
1046 QPStride= absQPStride;
1047 }
1048
1049 if(0){
1050 int x,y;
1051 for(y=0; y<mbHeight; y++){
1052 for(x=0; x<mbWidth; x++){
e7becfb2 1053 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
16e0bf73
DB
1054 }
1055 av_log(c, AV_LOG_INFO, "\n");
bb270c08 1056 }
e7becfb2 1057 av_log(c, AV_LOG_INFO, "\n");
16e0bf73
DB
1058 }
1059
1060 if((pict_type&7)!=3){
1061 if (QPStride >= 0){
1062 int i;
1063 const int count= mbHeight * QPStride;
1064 for(i=0; i<(count>>2); i++){
1065 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1066 }
1067 for(i<<=2; i<count; i++){
1068 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1069 }
1070 } else {
1071 int i,j;
1072 for(i=0; i<mbHeight; i++) {
1073 for(j=0; j<absQPStride; j++) {
1074 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
bb270c08 1075 }
16e0bf73 1076 }
bb270c08 1077 }
16e0bf73 1078 }
bb270c08 1079
16e0bf73
DB
1080 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1081 mode->lumMode, mode->chromMode);
bb270c08 1082
16e0bf73 1083 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
bb270c08
DB
1084 width, height, QP_store, QPStride, 0, mode, c);
1085
16e0bf73
DB
1086 width = (width )>>c->hChromaSubSample;
1087 height = (height)>>c->vChromaSubSample;
1088
1089 if(mode->chromMode){
1090 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1091 width, height, QP_store, QPStride, 1, mode, c);
1092 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1093 width, height, QP_store, QPStride, 2, mode, c);
1094 }
1095 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1096 linecpy(dst[1], src[1], height, srcStride[1]);
1097 linecpy(dst[2], src[2], height, srcStride[2]);
1098 }else{
1099 int y;
1100 for(y=0; y<height; y++){
1101 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1102 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
bb270c08 1103 }
16e0bf73 1104 }
911879d1
MN
1105}
1106