avcodec: add a public function, avcodec_fill_audio_frame().
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
2912e87a 6 * This file is part of Libav.
b78e7197 7 *
2912e87a 8 * Libav is free software; you can redistribute it and/or modify
b78e7197
DB
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
2912e87a 13 * Libav is distributed in the hope that it will be useful,
b78e7197
DB
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
2912e87a 19 * along with Libav; if not, write to the Free Software
b78e7197
DB
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a 23/**
ba87f080 24 * @file
b304569a
MN
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
2cab6401 50* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
755bfeab 51# more or less selfinvented filters so the exactness is not too meaningful
3057fa66 52E = Exact implementation
04932b0d 53e = almost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
faa6f1c3 74//Changelog: use git log
3057fa66 75
9858f773 76#include "config.h"
245976da 77#include "libavutil/avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
3057fa66 82//#undef HAVE_MMX2
ebc3209a 83//#define HAVE_AMD3DNOW
3057fa66 84//#undef HAVE_MMX
cc9b0679 85//#undef ARCH_X86
7f16f6e6 86//#define DEBUG_BRIGHTNESS
13e00528 87#include "postprocess.h"
c41d972d 88#include "postprocess_internal.h"
1a5e4fd8 89#include "libavutil/avstring.h"
bba9b16c 90
2a4a62bf
SS
91unsigned postproc_version(void)
92{
93 return LIBPOSTPROC_VERSION_INT;
94}
95
41600690 96const char *postproc_configuration(void)
c1736936 97{
29ba0911 98 return LIBAV_CONFIGURATION;
c1736936
DB
99}
100
41600690 101const char *postproc_license(void)
c1736936
DB
102{
103#define LICENSE_PREFIX "libpostproc license: "
a03be6e1 104 return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
c1736936
DB
105}
106
b250f9c6 107#if HAVE_ALTIVEC_H
a7b2871c
RD
108#include <altivec.h>
109#endif
110
911879d1
MN
111#define GET_MODE_BUFFER_SIZE 500
112#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
113#define BLOCK_SIZE 8
114#define TEMP_STRIDE 8
115//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 116
b250f9c6 117#if ARCH_X86
2b858d0b
RD
118DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
119DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
120DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
121DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
122DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
123DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
124DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
125DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
b28daef8 126#endif
3057fa66 127
2722e362 128DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
3057fa66 129
9c9e467d 130
911879d1
MN
131static struct PPFilter filters[]=
132{
16e0bf73
DB
133 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
134 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
135/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
136 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
137 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
138 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
139 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
140 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
141 {"dr", "dering", 1, 5, 6, DERING},
142 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
143 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
144 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
145 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
146 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
147 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
148 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
149 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
150 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
151 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
152};
153
7b49ce2e 154static const char *replaceTable[]=
911879d1 155{
16e0bf73
DB
156 "default", "hb:a,vb:a,dr:a",
157 "de", "hb:a,vb:a,dr:a",
158 "fast", "h1:a,v1:a,dr:a",
159 "fa", "h1:a,v1:a,dr:a",
160 "ac", "ha:a:128:7,va:a,dr:a",
161 NULL //End Marker
911879d1
MN
162};
163
3057fa66 164
b250f9c6 165#if ARCH_X86
3057fa66
A
166static inline void prefetchnta(void *p)
167{
be449fca 168 __asm__ volatile( "prefetchnta (%0)\n\t"
16e0bf73
DB
169 : : "r" (p)
170 );
3057fa66
A
171}
172
173static inline void prefetcht0(void *p)
174{
be449fca 175 __asm__ volatile( "prefetcht0 (%0)\n\t"
16e0bf73
DB
176 : : "r" (p)
177 );
3057fa66
A
178}
179
180static inline void prefetcht1(void *p)
181{
be449fca 182 __asm__ volatile( "prefetcht1 (%0)\n\t"
16e0bf73
DB
183 : : "r" (p)
184 );
3057fa66
A
185}
186
187static inline void prefetcht2(void *p)
188{
be449fca 189 __asm__ volatile( "prefetcht2 (%0)\n\t"
16e0bf73
DB
190 : : "r" (p)
191 );
3057fa66 192}
9a722af7 193#endif
3057fa66 194
04932b0d
DB
195/* The horizontal functions exist only in C because the MMX
196 * code is faster with vertical filters and transposing. */
3057fa66 197
cf5ec61d
MN
198/**
199 * Check if the given 8x8 Block is mostly "flat"
200 */
b0ac780a 201static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 202{
16e0bf73
DB
203 int numEq= 0;
204 int y;
205 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206 const int dcThreshold= dcOffset*2 + 1;
207
208 for(y=0; y<BLOCK_SIZE; y++){
209 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
210 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
211 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
212 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
214 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
215 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
216 src+= stride;
217 }
218 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
219}
220
221/**
222 * Check if the middle 8x8 Block in the given 8x16 block is flat
223 */
16e0bf73
DB
224static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
225{
226 int numEq= 0;
227 int y;
228 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
229 const int dcThreshold= dcOffset*2 + 1;
230
231 src+= stride*4; // src points to begin of the 8x8 Block
232 for(y=0; y<BLOCK_SIZE-1; y++){
233 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
240 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
241 src+= stride;
242 }
243 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
244}
245
b0ac780a 246static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 247{
16e0bf73 248 int i;
16e0bf73
DB
249 for(i=0; i<2; i++){
250 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
251 src += stride;
252 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
253 src += stride;
254 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
255 src += stride;
256 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
257 src += stride;
258 }
16e0bf73 259 return 1;
cb482d25 260}
cf5ec61d 261
cb482d25
MN
262static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
263{
16e0bf73
DB
264 int x;
265 src+= stride*4;
266 for(x=0; x<BLOCK_SIZE; x+=4){
267 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
268 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
269 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
270 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
271 }
16e0bf73 272 return 1;
cb482d25
MN
273}
274
16e0bf73
DB
275static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
276{
277 if( isHorizDC_C(src, stride, c) ){
278 if( isHorizMinMaxOk_C(src, stride, c->QP) )
279 return 1;
280 else
281 return 0;
282 }else{
283 return 2;
284 }
b0ac780a
MN
285}
286
16e0bf73
DB
287static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
288{
289 if( isVertDC_C(src, stride, c) ){
290 if( isVertMinMaxOk_C(src, stride, c->QP) )
291 return 1;
292 else
293 return 0;
294 }else{
295 return 2;
296 }
cf5ec61d
MN
297}
298
b0ac780a 299static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 300{
16e0bf73
DB
301 int y;
302 for(y=0; y<BLOCK_SIZE; y++){
303 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
304
305 if(FFABS(middleEnergy) < 8*c->QP){
306 const int q=(dst[3] - dst[4])/2;
307 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
308 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
309
310 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
311 d= FFMAX(d, 0);
312
313 d= (5*d + 32) >> 6;
314 d*= FFSIGN(-middleEnergy);
315
316 if(q>0)
317 {
318 d= d<0 ? 0 : d;
319 d= d>q ? q : d;
320 }
321 else
322 {
323 d= d>0 ? 0 : d;
324 d= d<q ? q : d;
325 }
326
327 dst[3]-= d;
328 dst[4]+= d;
bb270c08 329 }
16e0bf73
DB
330 dst+= stride;
331 }
cf5ec61d
MN
332}
333
334/**
335 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
336 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
337 */
b0ac780a 338static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 339{
16e0bf73
DB
340 int y;
341 for(y=0; y<BLOCK_SIZE; y++){
342 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
343 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
344
345 int sums[10];
346 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
347 sums[1] = sums[0] - first + dst[3];
348 sums[2] = sums[1] - first + dst[4];
349 sums[3] = sums[2] - first + dst[5];
350 sums[4] = sums[3] - first + dst[6];
351 sums[5] = sums[4] - dst[0] + dst[7];
352 sums[6] = sums[5] - dst[1] + last;
353 sums[7] = sums[6] - dst[2] + last;
354 sums[8] = sums[7] - dst[3] + last;
355 sums[9] = sums[8] - dst[4] + last;
356
357 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
358 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
359 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
360 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
361 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
362 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
363 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
364 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
365
366 dst+= stride;
367 }
cf5ec61d
MN
368}
369
4e4dcbc5 370/**
cc9b0679
MN
371 * Experimental Filter 1 (Horizontal)
372 * will not damage linear gradients
bd107136 373 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
755bfeab
DB
374 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
375 * MMX2 version does correct clipping C version does not
cc9b0679 376 * not identical with the vertical one
4e4dcbc5 377 */
cc9b0679
MN
378static inline void horizX1Filter(uint8_t *src, int stride, int QP)
379{
16e0bf73
DB
380 int y;
381 static uint64_t *lut= NULL;
382 if(lut==NULL)
383 {
384 int i;
385 lut = av_malloc(256*8);
386 for(i=0; i<256; i++)
bb270c08 387 {
16e0bf73 388 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 389/*
cc9b0679 390//Simulate 112242211 9-Tap filter
16e0bf73
DB
391 uint64_t a= (v/16) & 0xFF;
392 uint64_t b= (v/8) & 0xFF;
393 uint64_t c= (v/4) & 0xFF;
394 uint64_t d= (3*v/8) & 0xFF;
117e45b0 395*/
cc9b0679 396//Simulate piecewise linear interpolation
16e0bf73
DB
397 uint64_t a= (v/16) & 0xFF;
398 uint64_t b= (v*3/16) & 0xFF;
399 uint64_t c= (v*5/16) & 0xFF;
400 uint64_t d= (7*v/16) & 0xFF;
401 uint64_t A= (0x100 - a)&0xFF;
402 uint64_t B= (0x100 - b)&0xFF;
403 uint64_t C= (0x100 - c)&0xFF;
404 uint64_t D= (0x100 - c)&0xFF;
405
406 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
407 (D<<24) | (C<<16) | (B<<8) | (A);
408 //lut[i] = (v<<32) | (v<<24);
bb270c08 409 }
16e0bf73 410 }
bb270c08 411
16e0bf73
DB
412 for(y=0; y<BLOCK_SIZE; y++){
413 int a= src[1] - src[2];
414 int b= src[3] - src[4];
415 int c= src[5] - src[6];
bb270c08 416
16e0bf73 417 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
bb270c08 418
16e0bf73
DB
419 if(d < QP){
420 int v = d * FFSIGN(-b);
bb270c08 421
16e0bf73
DB
422 src[1] +=v/8;
423 src[2] +=v/4;
424 src[3] +=3*v/8;
425 src[4] -=3*v/8;
426 src[5] -=v/4;
427 src[6] -=v/8;
bb270c08 428 }
16e0bf73
DB
429 src+=stride;
430 }
cc9b0679
MN
431}
432
12eebd26
MN
433/**
434 * accurate deblock filter
435 */
849f1035 436static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
16e0bf73
DB
437 int y;
438 const int QP= c->QP;
439 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
440 const int dcThreshold= dcOffset*2 + 1;
12eebd26 441//START_TIMER
16e0bf73
DB
442 src+= step*4; // src points to begin of the 8x8 Block
443 for(y=0; y<8; y++){
444 int numEq= 0;
445
446 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
447 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
448 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
449 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
450 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
451 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
452 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
453 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
454 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
455 if(numEq > c->ppMode.flatnessThreshold){
456 int min, max, x;
457
458 if(src[0] > src[step]){
459 max= src[0];
460 min= src[step];
461 }else{
462 max= src[step];
463 min= src[0];
464 }
465 for(x=2; x<8; x+=2){
466 if(src[x*step] > src[(x+1)*step]){
467 if(src[x *step] > max) max= src[ x *step];
468 if(src[(x+1)*step] < min) min= src[(x+1)*step];
bb270c08 469 }else{
16e0bf73
DB
470 if(src[(x+1)*step] > max) max= src[(x+1)*step];
471 if(src[ x *step] < min) min= src[ x *step];
472 }
473 }
474 if(max-min < 2*QP){
475 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
476 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
477
478 int sums[10];
479 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
480 sums[1] = sums[0] - first + src[3*step];
481 sums[2] = sums[1] - first + src[4*step];
482 sums[3] = sums[2] - first + src[5*step];
483 sums[4] = sums[3] - first + src[6*step];
484 sums[5] = sums[4] - src[0*step] + src[7*step];
485 sums[6] = sums[5] - src[1*step] + last;
486 sums[7] = sums[6] - src[2*step] + last;
487 sums[8] = sums[7] - src[3*step] + last;
488 sums[9] = sums[8] - src[4*step] + last;
489
490 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
491 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
492 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
493 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
494 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
495 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
496 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
497 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
498 }
499 }else{
500 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
501
502 if(FFABS(middleEnergy) < 8*QP){
503 const int q=(src[3*step] - src[4*step])/2;
504 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
505 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
506
507 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
508 d= FFMAX(d, 0);
509
510 d= (5*d + 32) >> 6;
511 d*= FFSIGN(-middleEnergy);
512
513 if(q>0){
514 d= d<0 ? 0 : d;
515 d= d>q ? q : d;
516 }else{
517 d= d>0 ? 0 : d;
518 d= d<q ? q : d;
bb270c08
DB
519 }
520
16e0bf73
DB
521 src[3*step]-= d;
522 src[4*step]+= d;
523 }
bb270c08 524 }
16e0bf73
DB
525
526 src += stride;
527 }
12eebd26
MN
528/*if(step==16){
529 STOP_TIMER("step16")
530}else{
531 STOP_TIMER("stepX")
532}*/
533}
cc9b0679 534
e89952aa 535//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 536//Plain C versions
e90f5b5a 537#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
e89952aa
MN
538#define COMPILE_C
539#endif
540
b250f9c6 541#if HAVE_ALTIVEC
b0ac780a 542#define COMPILE_ALTIVEC
b0ac780a 543#endif //HAVE_ALTIVEC
b0ac780a 544
b250f9c6 545#if ARCH_X86
e89952aa 546
e90f5b5a 547#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
e89952aa
MN
548#define COMPILE_MMX
549#endif
550
e90f5b5a 551#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
e89952aa
MN
552#define COMPILE_MMX2
553#endif
554
e90f5b5a 555#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
e89952aa
MN
556#define COMPILE_3DNOW
557#endif
b250f9c6 558#endif /* ARCH_X86 */
e89952aa
MN
559
560#undef HAVE_MMX
b250f9c6 561#define HAVE_MMX 0
e89952aa 562#undef HAVE_MMX2
b250f9c6 563#define HAVE_MMX2 0
ebc3209a
DB
564#undef HAVE_AMD3DNOW
565#define HAVE_AMD3DNOW 0
b0ac780a 566#undef HAVE_ALTIVEC
b250f9c6 567#define HAVE_ALTIVEC 0
e89952aa
MN
568
569#ifdef COMPILE_C
cc9b0679
MN
570#define RENAME(a) a ## _C
571#include "postprocess_template.c"
e89952aa 572#endif
cc9b0679 573
b0ac780a
MN
574#ifdef COMPILE_ALTIVEC
575#undef RENAME
b250f9c6
AJ
576#undef HAVE_ALTIVEC
577#define HAVE_ALTIVEC 1
b0ac780a
MN
578#define RENAME(a) a ## _altivec
579#include "postprocess_altivec_template.c"
580#include "postprocess_template.c"
581#endif
b0ac780a 582
cc9b0679 583//MMX versions
e89952aa 584#ifdef COMPILE_MMX
cc9b0679 585#undef RENAME
b250f9c6
AJ
586#undef HAVE_MMX
587#define HAVE_MMX 1
cc9b0679
MN
588#define RENAME(a) a ## _MMX
589#include "postprocess_template.c"
e89952aa 590#endif
cc9b0679
MN
591
592//MMX2 versions
e89952aa 593#ifdef COMPILE_MMX2
cc9b0679 594#undef RENAME
b250f9c6
AJ
595#undef HAVE_MMX
596#undef HAVE_MMX2
597#define HAVE_MMX 1
598#define HAVE_MMX2 1
cc9b0679
MN
599#define RENAME(a) a ## _MMX2
600#include "postprocess_template.c"
e89952aa 601#endif
cc9b0679
MN
602
603//3DNOW versions
e89952aa 604#ifdef COMPILE_3DNOW
cc9b0679 605#undef RENAME
b250f9c6 606#undef HAVE_MMX
cc9b0679 607#undef HAVE_MMX2
ebc3209a 608#undef HAVE_AMD3DNOW
b250f9c6
AJ
609#define HAVE_MMX 1
610#define HAVE_MMX2 0
ebc3209a 611#define HAVE_AMD3DNOW 1
cc9b0679
MN
612#define RENAME(a) a ## _3DNow
613#include "postprocess_template.c"
e89952aa 614#endif
cc9b0679 615
755bfeab 616// minor note: the HAVE_xyz is messed up after that line so do not use it.
cc9b0679 617
6c51fd3f 618static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
7dfea342 619 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
cc9b0679 620{
16e0bf73
DB
621 PPContext *c= (PPContext *)vc;
622 PPMode *ppMode= (PPMode *)vm;
623 c->ppMode= *ppMode; //FIXME
9c9e467d 624
16e0bf73
DB
625 // Using ifs here as they are faster than function pointers although the
626 // difference would not be measurable here but it is much better because
627 // someone might exchange the CPU whithout restarting MPlayer ;)
e90f5b5a 628#if CONFIG_RUNTIME_CPUDETECT
b250f9c6 629#if ARCH_X86
16e0bf73
DB
630 // ordered per speed fastest first
631 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
632 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
633 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
634 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
635 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
636 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
637 else
638 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 639#else
b250f9c6 640#if HAVE_ALTIVEC
16e0bf73
DB
641 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
642 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
643 else
b0ac780a 644#endif
16e0bf73 645 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 646#endif
c68fafe0 647#else /* CONFIG_RUNTIME_CPUDETECT */
b250f9c6 648#if HAVE_MMX2
16e0bf73 649 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
ebc3209a 650#elif HAVE_AMD3DNOW
16e0bf73 651 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b250f9c6 652#elif HAVE_MMX
16e0bf73 653 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b250f9c6 654#elif HAVE_ALTIVEC
16e0bf73 655 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 656#else
16e0bf73 657 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 658#endif
c68fafe0 659#endif /* !CONFIG_RUNTIME_CPUDETECT */
117e45b0
MN
660}
661
cc9b0679 662//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 663// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 664
911879d1 665/* -pp Command line Help
911879d1 666*/
69fdc40d 667const char pp_help[] =
bf69c4e5 668"Available postprocessing filters:\n"
bb270c08
DB
669"Filters Options\n"
670"short long name short long option Description\n"
671"* * a autoq CPU power dependent enabler\n"
672" c chrom chrominance filtering enabled\n"
673" y nochrom chrominance filtering disabled\n"
674" n noluma luma filtering disabled\n"
675"hb hdeblock (2 threshold) horizontal deblocking filter\n"
676" 1. difference factor: default=32, higher -> more deblocking\n"
677" 2. flatness threshold: default=39, lower -> more deblocking\n"
678" the h & v deblocking filters share these\n"
679" so you can't set different thresholds for h / v\n"
680"vb vdeblock (2 threshold) vertical deblocking filter\n"
681"ha hadeblock (2 threshold) horizontal deblocking filter\n"
682"va vadeblock (2 threshold) vertical deblocking filter\n"
683"h1 x1hdeblock experimental h deblock filter 1\n"
684"v1 x1vdeblock experimental v deblock filter 1\n"
685"dr dering deringing filter\n"
686"al autolevels automatic brightness / contrast\n"
687" f fullyrange stretch luminance to (0..255)\n"
688"lb linblenddeint linear blend deinterlacer\n"
689"li linipoldeint linear interpolating deinterlace\n"
690"ci cubicipoldeint cubic interpolating deinterlacer\n"
691"md mediandeint median deinterlacer\n"
692"fd ffmpegdeint ffmpeg deinterlacer\n"
693"l5 lowpass5 FIR lowpass deinterlacer\n"
694"de default hb:a,vb:a,dr:a\n"
695"fa fast h1:a,v1:a,dr:a\n"
696"ac ha:a:128:7,va:a,dr:a\n"
697"tn tmpnoise (3 threshold) temporal noise reducer\n"
698" 1. <= 2. <= 3. larger -> stronger filtering\n"
699"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
700"Usage:\n"
701"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
702"long form example:\n"
bb270c08 703"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 704"short form example:\n"
bb270c08 705"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
706"more examples:\n"
707"tn:64:128:256\n"
14b005d0 708"\n"
4b001a13 709;
911879d1 710
7dfea342 711pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
911879d1 712{
16e0bf73
DB
713 char temp[GET_MODE_BUFFER_SIZE];
714 char *p= temp;
715 static const char filterDelimiters[] = ",/";
716 static const char optionDelimiters[] = ":";
717 struct PPMode *ppMode;
718 char *filterToken;
719
720 ppMode= av_malloc(sizeof(PPMode));
721
722 ppMode->lumMode= 0;
723 ppMode->chromMode= 0;
724 ppMode->maxTmpNoise[0]= 700;
725 ppMode->maxTmpNoise[1]= 1500;
726 ppMode->maxTmpNoise[2]= 3000;
727 ppMode->maxAllowedY= 234;
728 ppMode->minAllowedY= 16;
729 ppMode->baseDcDiff= 256/8;
730 ppMode->flatnessThreshold= 56-16-1;
731 ppMode->maxClippedThreshold= 0.01;
732 ppMode->error=0;
733
f4f3300c
PK
734 memset(temp, 0, GET_MODE_BUFFER_SIZE);
735 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
16e0bf73
DB
736
737 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
738
739 for(;;){
740 char *filterName;
741 int q= 1000000; //PP_QUALITY_MAX;
742 int chrom=-1;
743 int luma=-1;
744 char *option;
745 char *options[OPTIONS_ARRAY_SIZE];
746 int i;
747 int filterNameOk=0;
748 int numOfUnknownOptions=0;
749 int enable=1; //does the user want us to enabled or disabled the filter
750
751 filterToken= strtok(p, filterDelimiters);
752 if(filterToken == NULL) break;
753 p+= strlen(filterToken) + 1; // p points to next filterToken
754 filterName= strtok(filterToken, optionDelimiters);
755 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
756
757 if(*filterName == '-'){
758 enable=0;
759 filterName++;
760 }
bb270c08 761
16e0bf73
DB
762 for(;;){ //for all options
763 option= strtok(NULL, optionDelimiters);
764 if(option == NULL) break;
765
766 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
767 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
768 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
769 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
770 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
771 else{
772 options[numOfUnknownOptions] = option;
773 numOfUnknownOptions++;
774 }
775 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
776 }
777 options[numOfUnknownOptions] = NULL;
778
779 /* replace stuff from the replace Table */
780 for(i=0; replaceTable[2*i]!=NULL; i++){
781 if(!strcmp(replaceTable[2*i], filterName)){
782 int newlen= strlen(replaceTable[2*i + 1]);
783 int plen;
784 int spaceLeft;
785
786 if(p==NULL) p= temp, *p=0; //last filter
787 else p--, *p=','; //not last filter
788
789 plen= strlen(p);
790 spaceLeft= p - temp + plen;
f4f3300c 791 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
16e0bf73
DB
792 ppMode->error++;
793 break;
bb270c08 794 }
16e0bf73
DB
795 memmove(p + newlen, p, plen+1);
796 memcpy(p, replaceTable[2*i + 1], newlen);
797 filterNameOk=1;
798 }
799 }
bb270c08 800
16e0bf73
DB
801 for(i=0; filters[i].shortName!=NULL; i++){
802 if( !strcmp(filters[i].longName, filterName)
803 || !strcmp(filters[i].shortName, filterName)){
804 ppMode->lumMode &= ~filters[i].mask;
805 ppMode->chromMode &= ~filters[i].mask;
806
807 filterNameOk=1;
808 if(!enable) break; // user wants to disable it
809
810 if(q >= filters[i].minLumQuality && luma)
811 ppMode->lumMode|= filters[i].mask;
812 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
813 if(q >= filters[i].minChromQuality)
814 ppMode->chromMode|= filters[i].mask;
815
816 if(filters[i].mask == LEVEL_FIX){
817 int o;
818 ppMode->minAllowedY= 16;
819 ppMode->maxAllowedY= 234;
820 for(o=0; options[o]!=NULL; o++){
821 if( !strcmp(options[o],"fullyrange")
822 ||!strcmp(options[o],"f")){
823 ppMode->minAllowedY= 0;
824 ppMode->maxAllowedY= 255;
825 numOfUnknownOptions--;
bb270c08 826 }
16e0bf73 827 }
bb270c08 828 }
16e0bf73 829 else if(filters[i].mask == TEMP_NOISE_FILTER)
bb270c08 830 {
16e0bf73
DB
831 int o;
832 int numOfNoises=0;
833
834 for(o=0; options[o]!=NULL; o++){
835 char *tail;
836 ppMode->maxTmpNoise[numOfNoises]=
837 strtol(options[o], &tail, 0);
838 if(tail!=options[o]){
839 numOfNoises++;
840 numOfUnknownOptions--;
841 if(numOfNoises >= 3) break;
bb270c08 842 }
16e0bf73 843 }
bb270c08 844 }
16e0bf73
DB
845 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
846 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
847 int o;
848
849 for(o=0; options[o]!=NULL && o<2; o++){
850 char *tail;
851 int val= strtol(options[o], &tail, 0);
852 if(tail==options[o]) break;
853
854 numOfUnknownOptions--;
855 if(o==0) ppMode->baseDcDiff= val;
856 else ppMode->flatnessThreshold= val;
857 }
858 }
859 else if(filters[i].mask == FORCE_QUANT){
860 int o;
861 ppMode->forcedQuant= 15;
862
863 for(o=0; options[o]!=NULL && o<1; o++){
864 char *tail;
865 int val= strtol(options[o], &tail, 0);
866 if(tail==options[o]) break;
867
868 numOfUnknownOptions--;
869 ppMode->forcedQuant= val;
870 }
871 }
872 }
bb270c08 873 }
16e0bf73
DB
874 if(!filterNameOk) ppMode->error++;
875 ppMode->error += numOfUnknownOptions;
876 }
877
878 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
879 if(ppMode->error){
880 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
881 av_free(ppMode);
882 return NULL;
883 }
884 return ppMode;
911879d1
MN
885}
886
7dfea342 887void pp_free_mode(pp_mode *mode){
6ab6c7c3 888 av_free(mode);
c41d972d
MN
889}
890
88c0bc7e 891static void reallocAlign(void **p, int alignment, int size){
16e0bf73
DB
892 av_free(*p);
893 *p= av_mallocz(size);
88c0bc7e
MN
894}
895
0426af31 896static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
16e0bf73
DB
897 int mbWidth = (width+15)>>4;
898 int mbHeight= (height+15)>>4;
899 int i;
900
901 c->stride= stride;
902 c->qpStride= qpStride;
903
904 reallocAlign((void **)&c->tempDst, 8, stride*24);
905 reallocAlign((void **)&c->tempSrc, 8, stride*24);
906 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
907 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
908 for(i=0; i<256; i++)
909 c->yHistogram[i]= width*height/64*15/256;
910
911 for(i=0; i<3; i++){
da9cea77 912 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
aa089f6c
DB
913 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
914 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
16e0bf73
DB
915 }
916
917 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
918 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
919 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
920 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
921}
922
e7becfb2
DB
923static const char * context_to_name(void * ptr) {
924 return "postproc";
925}
926
31bfd6f3 927static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
e7becfb2 928
7dfea342 929pp_context *pp_get_context(int width, int height, int cpuCaps){
16e0bf73 930 PPContext *c= av_malloc(sizeof(PPContext));
ef516f73 931 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
16e0bf73
DB
932 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
933
934 memset(c, 0, sizeof(PPContext));
935 c->av_class = &av_codec_context_class;
936 c->cpuCaps= cpuCaps;
937 if(cpuCaps&PP_FORMAT){
938 c->hChromaSubSample= cpuCaps&0x3;
939 c->vChromaSubSample= (cpuCaps>>4)&0x3;
940 }else{
941 c->hChromaSubSample= 1;
942 c->vChromaSubSample= 1;
943 }
944
945 reallocBuffers(c, width, height, stride, qpStride);
946
947 c->frameNum=-1;
948
949 return c;
45b4f285
MN
950}
951
9cb54f43 952void pp_free_context(void *vc){
16e0bf73
DB
953 PPContext *c = (PPContext*)vc;
954 int i;
115329f1 955
aa089f6c
DB
956 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
957 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
115329f1 958
16e0bf73
DB
959 av_free(c->tempBlocks);
960 av_free(c->yHistogram);
961 av_free(c->tempDst);
962 av_free(c->tempSrc);
963 av_free(c->deintTemp);
964 av_free(c->stdQPTable);
965 av_free(c->nonBQPTable);
966 av_free(c->forcedQPTable);
115329f1 967
16e0bf73 968 memset(c, 0, sizeof(PPContext));
88c0bc7e 969
16e0bf73 970 av_free(c);
9c9e467d
MN
971}
972
6c51fd3f 973void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
16e0bf73
DB
974 uint8_t * dst[3], const int dstStride[3],
975 int width, int height,
976 const QP_STORE_T *QP_store, int QPStride,
7dfea342 977 pp_mode *vm, void *vc, int pict_type)
911879d1 978{
16e0bf73
DB
979 int mbWidth = (width+15)>>4;
980 int mbHeight= (height+15)>>4;
981 PPMode *mode = (PPMode*)vm;
982 PPContext *c = (PPContext*)vc;
983 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
984 int absQPStride = FFABS(QPStride);
985
986 // c->stride and c->QPStride are always positive
987 if(c->stride < minStride || c->qpStride < absQPStride)
988 reallocBuffers(c, width, height,
989 FFMAX(minStride, c->stride),
990 FFMAX(c->qpStride, absQPStride));
991
992 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
993 int i;
994 QP_store= c->forcedQPTable;
995 absQPStride = QPStride = 0;
996 if(mode->lumMode & FORCE_QUANT)
997 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
998 else
999 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1000 }
0426af31 1001
16e0bf73
DB
1002 if(pict_type & PP_PICT_TYPE_QP2){
1003 int i;
1004 const int count= mbHeight * absQPStride;
1005 for(i=0; i<(count>>2); i++){
1006 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
bb270c08 1007 }
16e0bf73
DB
1008 for(i<<=2; i<count; i++){
1009 c->stdQPTable[i] = QP_store[i]>>1;
1010 }
1011 QP_store= c->stdQPTable;
1012 QPStride= absQPStride;
1013 }
1014
1015 if(0){
1016 int x,y;
1017 for(y=0; y<mbHeight; y++){
1018 for(x=0; x<mbWidth; x++){
e7becfb2 1019 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
16e0bf73
DB
1020 }
1021 av_log(c, AV_LOG_INFO, "\n");
bb270c08 1022 }
e7becfb2 1023 av_log(c, AV_LOG_INFO, "\n");
16e0bf73
DB
1024 }
1025
1026 if((pict_type&7)!=3){
1027 if (QPStride >= 0){
1028 int i;
1029 const int count= mbHeight * QPStride;
1030 for(i=0; i<(count>>2); i++){
1031 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1032 }
1033 for(i<<=2; i<count; i++){
1034 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1035 }
1036 } else {
1037 int i,j;
1038 for(i=0; i<mbHeight; i++) {
1039 for(j=0; j<absQPStride; j++) {
1040 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
bb270c08 1041 }
16e0bf73 1042 }
bb270c08 1043 }
16e0bf73 1044 }
bb270c08 1045
16e0bf73
DB
1046 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1047 mode->lumMode, mode->chromMode);
bb270c08 1048
16e0bf73 1049 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
bb270c08
DB
1050 width, height, QP_store, QPStride, 0, mode, c);
1051
16e0bf73
DB
1052 width = (width )>>c->hChromaSubSample;
1053 height = (height)>>c->vChromaSubSample;
1054
1055 if(mode->chromMode){
1056 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1057 width, height, QP_store, QPStride, 1, mode, c);
1058 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1059 width, height, QP_store, QPStride, 2, mode, c);
1060 }
1061 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1062 linecpy(dst[1], src[1], height, srcStride[1]);
1063 linecpy(dst[2], src[2], height, srcStride[2]);
1064 }else{
1065 int y;
1066 for(y=0; y<height; y++){
1067 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1068 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
bb270c08 1069 }
16e0bf73 1070 }
911879d1 1071}