Include libavformat/avformat.h before all the other libav* headers.
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
2cab6401 50* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
755bfeab 51# more or less selfinvented filters so the exactness is not too meaningful
3057fa66 52E = Exact implementation
04932b0d 53e = almost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
245976da 77#include "libavutil/avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
3057fa66 82//#undef HAVE_MMX2
13e00528 83//#define HAVE_3DNOW
3057fa66 84//#undef HAVE_MMX
cc9b0679 85//#undef ARCH_X86
7f16f6e6 86//#define DEBUG_BRIGHTNESS
13e00528 87#include "postprocess.h"
c41d972d 88#include "postprocess_internal.h"
bba9b16c 89
2a4a62bf
SS
90unsigned postproc_version(void)
91{
92 return LIBPOSTPROC_VERSION_INT;
93}
94
a7b2871c
RD
95#ifdef HAVE_ALTIVEC_H
96#include <altivec.h>
97#endif
98
911879d1
MN
99#define GET_MODE_BUFFER_SIZE 500
100#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
101#define BLOCK_SIZE 8
102#define TEMP_STRIDE 8
103//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 104
3cd52279 105#if defined(ARCH_X86)
2b858d0b
RD
106DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
107DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
108DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
109DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
110DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
111DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
112DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
113DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
b28daef8 114#endif
3057fa66 115
2722e362 116DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
3057fa66 117
9c9e467d 118
911879d1
MN
119static struct PPFilter filters[]=
120{
16e0bf73
DB
121 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
122 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
123/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
124 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
125 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
126 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
127 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
128 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
129 {"dr", "dering", 1, 5, 6, DERING},
130 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
131 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
132 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
133 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
134 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
135 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
136 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
137 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
138 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
139 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
140};
141
7b49ce2e 142static const char *replaceTable[]=
911879d1 143{
16e0bf73
DB
144 "default", "hb:a,vb:a,dr:a",
145 "de", "hb:a,vb:a,dr:a",
146 "fast", "h1:a,v1:a,dr:a",
147 "fa", "h1:a,v1:a,dr:a",
148 "ac", "ha:a:128:7,va:a,dr:a",
149 NULL //End Marker
911879d1
MN
150};
151
3057fa66 152
3cd52279 153#if defined(ARCH_X86)
3057fa66
A
154static inline void prefetchnta(void *p)
155{
be449fca 156 __asm__ volatile( "prefetchnta (%0)\n\t"
16e0bf73
DB
157 : : "r" (p)
158 );
3057fa66
A
159}
160
161static inline void prefetcht0(void *p)
162{
be449fca 163 __asm__ volatile( "prefetcht0 (%0)\n\t"
16e0bf73
DB
164 : : "r" (p)
165 );
3057fa66
A
166}
167
168static inline void prefetcht1(void *p)
169{
be449fca 170 __asm__ volatile( "prefetcht1 (%0)\n\t"
16e0bf73
DB
171 : : "r" (p)
172 );
3057fa66
A
173}
174
175static inline void prefetcht2(void *p)
176{
be449fca 177 __asm__ volatile( "prefetcht2 (%0)\n\t"
16e0bf73
DB
178 : : "r" (p)
179 );
3057fa66 180}
9a722af7 181#endif
3057fa66 182
04932b0d
DB
183/* The horizontal functions exist only in C because the MMX
184 * code is faster with vertical filters and transposing. */
3057fa66 185
cf5ec61d
MN
186/**
187 * Check if the given 8x8 Block is mostly "flat"
188 */
b0ac780a 189static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 190{
16e0bf73
DB
191 int numEq= 0;
192 int y;
193 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
194 const int dcThreshold= dcOffset*2 + 1;
195
196 for(y=0; y<BLOCK_SIZE; y++){
197 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
198 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
199 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
200 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
201 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
202 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
203 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
204 src+= stride;
205 }
206 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
207}
208
209/**
210 * Check if the middle 8x8 Block in the given 8x16 block is flat
211 */
16e0bf73
DB
212static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
213{
214 int numEq= 0;
215 int y;
216 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
217 const int dcThreshold= dcOffset*2 + 1;
218
219 src+= stride*4; // src points to begin of the 8x8 Block
220 for(y=0; y<BLOCK_SIZE-1; y++){
221 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
226 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
227 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
228 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
229 src+= stride;
230 }
231 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
232}
233
b0ac780a 234static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 235{
16e0bf73 236 int i;
cb482d25 237#if 1
16e0bf73
DB
238 for(i=0; i<2; i++){
239 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
240 src += stride;
241 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
242 src += stride;
243 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
244 src += stride;
245 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
246 src += stride;
247 }
115329f1 248#else
16e0bf73
DB
249 for(i=0; i<8; i++){
250 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
251 src += stride;
252 }
cb482d25 253#endif
16e0bf73 254 return 1;
cb482d25 255}
cf5ec61d 256
cb482d25
MN
257static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
258{
259#if 1
260#if 1
16e0bf73
DB
261 int x;
262 src+= stride*4;
263 for(x=0; x<BLOCK_SIZE; x+=4){
264 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
265 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
266 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
267 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
268 }
cb482d25 269#else
16e0bf73
DB
270 int x;
271 src+= stride*3;
272 for(x=0; x<BLOCK_SIZE; x++){
273 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
274 }
cb482d25 275#endif
16e0bf73 276 return 1;
cb482d25 277#else
16e0bf73
DB
278 int x;
279 src+= stride*4;
280 for(x=0; x<BLOCK_SIZE; x++){
281 int min=255;
282 int max=0;
283 int y;
284 for(y=0; y<8; y++){
285 int v= src[x + y*stride];
286 if(v>max) max=v;
287 if(v<min) min=v;
bb270c08 288 }
16e0bf73
DB
289 if(max-min > 2*QP) return 0;
290 }
291 return 1;
cb482d25
MN
292#endif
293}
294
16e0bf73
DB
295static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
296{
297 if( isHorizDC_C(src, stride, c) ){
298 if( isHorizMinMaxOk_C(src, stride, c->QP) )
299 return 1;
300 else
301 return 0;
302 }else{
303 return 2;
304 }
b0ac780a
MN
305}
306
16e0bf73
DB
307static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
308{
309 if( isVertDC_C(src, stride, c) ){
310 if( isVertMinMaxOk_C(src, stride, c->QP) )
311 return 1;
312 else
313 return 0;
314 }else{
315 return 2;
316 }
cf5ec61d
MN
317}
318
b0ac780a 319static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 320{
16e0bf73
DB
321 int y;
322 for(y=0; y<BLOCK_SIZE; y++){
323 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
324
325 if(FFABS(middleEnergy) < 8*c->QP){
326 const int q=(dst[3] - dst[4])/2;
327 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
328 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
329
330 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
331 d= FFMAX(d, 0);
332
333 d= (5*d + 32) >> 6;
334 d*= FFSIGN(-middleEnergy);
335
336 if(q>0)
337 {
338 d= d<0 ? 0 : d;
339 d= d>q ? q : d;
340 }
341 else
342 {
343 d= d>0 ? 0 : d;
344 d= d<q ? q : d;
345 }
346
347 dst[3]-= d;
348 dst[4]+= d;
bb270c08 349 }
16e0bf73
DB
350 dst+= stride;
351 }
cf5ec61d
MN
352}
353
354/**
355 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
356 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
357 */
b0ac780a 358static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 359{
16e0bf73
DB
360 int y;
361 for(y=0; y<BLOCK_SIZE; y++){
362 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
363 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
364
365 int sums[10];
366 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
367 sums[1] = sums[0] - first + dst[3];
368 sums[2] = sums[1] - first + dst[4];
369 sums[3] = sums[2] - first + dst[5];
370 sums[4] = sums[3] - first + dst[6];
371 sums[5] = sums[4] - dst[0] + dst[7];
372 sums[6] = sums[5] - dst[1] + last;
373 sums[7] = sums[6] - dst[2] + last;
374 sums[8] = sums[7] - dst[3] + last;
375 sums[9] = sums[8] - dst[4] + last;
376
377 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
378 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
379 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
380 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
381 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
382 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
383 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
384 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
385
386 dst+= stride;
387 }
cf5ec61d
MN
388}
389
4e4dcbc5 390/**
cc9b0679
MN
391 * Experimental Filter 1 (Horizontal)
392 * will not damage linear gradients
bd107136 393 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
755bfeab
DB
394 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
395 * MMX2 version does correct clipping C version does not
cc9b0679 396 * not identical with the vertical one
4e4dcbc5 397 */
cc9b0679
MN
398static inline void horizX1Filter(uint8_t *src, int stride, int QP)
399{
16e0bf73
DB
400 int y;
401 static uint64_t *lut= NULL;
402 if(lut==NULL)
403 {
404 int i;
405 lut = av_malloc(256*8);
406 for(i=0; i<256; i++)
bb270c08 407 {
16e0bf73 408 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 409/*
cc9b0679 410//Simulate 112242211 9-Tap filter
16e0bf73
DB
411 uint64_t a= (v/16) & 0xFF;
412 uint64_t b= (v/8) & 0xFF;
413 uint64_t c= (v/4) & 0xFF;
414 uint64_t d= (3*v/8) & 0xFF;
117e45b0 415*/
cc9b0679 416//Simulate piecewise linear interpolation
16e0bf73
DB
417 uint64_t a= (v/16) & 0xFF;
418 uint64_t b= (v*3/16) & 0xFF;
419 uint64_t c= (v*5/16) & 0xFF;
420 uint64_t d= (7*v/16) & 0xFF;
421 uint64_t A= (0x100 - a)&0xFF;
422 uint64_t B= (0x100 - b)&0xFF;
423 uint64_t C= (0x100 - c)&0xFF;
424 uint64_t D= (0x100 - c)&0xFF;
425
426 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
427 (D<<24) | (C<<16) | (B<<8) | (A);
428 //lut[i] = (v<<32) | (v<<24);
bb270c08 429 }
16e0bf73 430 }
bb270c08 431
16e0bf73
DB
432 for(y=0; y<BLOCK_SIZE; y++){
433 int a= src[1] - src[2];
434 int b= src[3] - src[4];
435 int c= src[5] - src[6];
bb270c08 436
16e0bf73 437 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
bb270c08 438
16e0bf73
DB
439 if(d < QP){
440 int v = d * FFSIGN(-b);
bb270c08 441
16e0bf73
DB
442 src[1] +=v/8;
443 src[2] +=v/4;
444 src[3] +=3*v/8;
445 src[4] -=3*v/8;
446 src[5] -=v/4;
447 src[6] -=v/8;
bb270c08 448 }
16e0bf73
DB
449 src+=stride;
450 }
cc9b0679
MN
451}
452
12eebd26
MN
453/**
454 * accurate deblock filter
455 */
849f1035 456static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
16e0bf73
DB
457 int y;
458 const int QP= c->QP;
459 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
460 const int dcThreshold= dcOffset*2 + 1;
12eebd26 461//START_TIMER
16e0bf73
DB
462 src+= step*4; // src points to begin of the 8x8 Block
463 for(y=0; y<8; y++){
464 int numEq= 0;
465
466 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
467 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
468 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
469 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
470 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
471 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
472 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
473 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
474 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
475 if(numEq > c->ppMode.flatnessThreshold){
476 int min, max, x;
477
478 if(src[0] > src[step]){
479 max= src[0];
480 min= src[step];
481 }else{
482 max= src[step];
483 min= src[0];
484 }
485 for(x=2; x<8; x+=2){
486 if(src[x*step] > src[(x+1)*step]){
487 if(src[x *step] > max) max= src[ x *step];
488 if(src[(x+1)*step] < min) min= src[(x+1)*step];
bb270c08 489 }else{
16e0bf73
DB
490 if(src[(x+1)*step] > max) max= src[(x+1)*step];
491 if(src[ x *step] < min) min= src[ x *step];
492 }
493 }
494 if(max-min < 2*QP){
495 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
496 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
497
498 int sums[10];
499 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
500 sums[1] = sums[0] - first + src[3*step];
501 sums[2] = sums[1] - first + src[4*step];
502 sums[3] = sums[2] - first + src[5*step];
503 sums[4] = sums[3] - first + src[6*step];
504 sums[5] = sums[4] - src[0*step] + src[7*step];
505 sums[6] = sums[5] - src[1*step] + last;
506 sums[7] = sums[6] - src[2*step] + last;
507 sums[8] = sums[7] - src[3*step] + last;
508 sums[9] = sums[8] - src[4*step] + last;
509
510 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
511 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
512 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
513 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
514 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
515 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
516 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
517 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
518 }
519 }else{
520 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
521
522 if(FFABS(middleEnergy) < 8*QP){
523 const int q=(src[3*step] - src[4*step])/2;
524 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
525 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
526
527 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
528 d= FFMAX(d, 0);
529
530 d= (5*d + 32) >> 6;
531 d*= FFSIGN(-middleEnergy);
532
533 if(q>0){
534 d= d<0 ? 0 : d;
535 d= d>q ? q : d;
536 }else{
537 d= d>0 ? 0 : d;
538 d= d<q ? q : d;
bb270c08
DB
539 }
540
16e0bf73
DB
541 src[3*step]-= d;
542 src[4*step]+= d;
543 }
bb270c08 544 }
16e0bf73
DB
545
546 src += stride;
547 }
12eebd26
MN
548/*if(step==16){
549 STOP_TIMER("step16")
550}else{
551 STOP_TIMER("stepX")
552}*/
553}
cc9b0679 554
e89952aa 555//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 556//Plain C versions
fe9e9d60 557#if !(defined (HAVE_MMX) || defined (HAVE_ALTIVEC)) || defined (RUNTIME_CPUDETECT)
e89952aa
MN
558#define COMPILE_C
559#endif
560
b0ac780a
MN
561#ifdef HAVE_ALTIVEC
562#define COMPILE_ALTIVEC
b0ac780a 563#endif //HAVE_ALTIVEC
b0ac780a 564
3cd52279 565#if defined(ARCH_X86)
e89952aa
MN
566
567#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
568#define COMPILE_MMX
569#endif
570
571#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
572#define COMPILE_MMX2
573#endif
574
575#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
576#define COMPILE_3DNOW
577#endif
3cd52279 578#endif /* defined(ARCH_X86) */
e89952aa
MN
579
580#undef HAVE_MMX
581#undef HAVE_MMX2
582#undef HAVE_3DNOW
b0ac780a 583#undef HAVE_ALTIVEC
e89952aa
MN
584
585#ifdef COMPILE_C
cc9b0679
MN
586#undef HAVE_MMX
587#undef HAVE_MMX2
588#undef HAVE_3DNOW
cc9b0679
MN
589#define RENAME(a) a ## _C
590#include "postprocess_template.c"
e89952aa 591#endif
cc9b0679 592
b0ac780a
MN
593#ifdef COMPILE_ALTIVEC
594#undef RENAME
595#define HAVE_ALTIVEC
596#define RENAME(a) a ## _altivec
597#include "postprocess_altivec_template.c"
598#include "postprocess_template.c"
599#endif
b0ac780a 600
cc9b0679 601//MMX versions
e89952aa 602#ifdef COMPILE_MMX
cc9b0679
MN
603#undef RENAME
604#define HAVE_MMX
605#undef HAVE_MMX2
606#undef HAVE_3DNOW
cc9b0679
MN
607#define RENAME(a) a ## _MMX
608#include "postprocess_template.c"
e89952aa 609#endif
cc9b0679
MN
610
611//MMX2 versions
e89952aa 612#ifdef COMPILE_MMX2
cc9b0679
MN
613#undef RENAME
614#define HAVE_MMX
615#define HAVE_MMX2
616#undef HAVE_3DNOW
cc9b0679
MN
617#define RENAME(a) a ## _MMX2
618#include "postprocess_template.c"
e89952aa 619#endif
cc9b0679
MN
620
621//3DNOW versions
e89952aa 622#ifdef COMPILE_3DNOW
cc9b0679
MN
623#undef RENAME
624#define HAVE_MMX
625#undef HAVE_MMX2
626#define HAVE_3DNOW
cc9b0679
MN
627#define RENAME(a) a ## _3DNow
628#include "postprocess_template.c"
e89952aa 629#endif
cc9b0679 630
755bfeab 631// minor note: the HAVE_xyz is messed up after that line so do not use it.
cc9b0679 632
6c51fd3f 633static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
7dfea342 634 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
cc9b0679 635{
16e0bf73
DB
636 PPContext *c= (PPContext *)vc;
637 PPMode *ppMode= (PPMode *)vm;
638 c->ppMode= *ppMode; //FIXME
9c9e467d 639
16e0bf73
DB
640 // Using ifs here as they are faster than function pointers although the
641 // difference would not be measurable here but it is much better because
642 // someone might exchange the CPU whithout restarting MPlayer ;)
e89952aa 643#ifdef RUNTIME_CPUDETECT
3cd52279 644#if defined(ARCH_X86)
16e0bf73
DB
645 // ordered per speed fastest first
646 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
647 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
648 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
649 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
650 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
651 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
652 else
653 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 654#else
b0ac780a 655#ifdef HAVE_ALTIVEC
16e0bf73
DB
656 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
657 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
658 else
b0ac780a 659#endif
16e0bf73 660 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 661#endif
e89952aa
MN
662#else //RUNTIME_CPUDETECT
663#ifdef HAVE_MMX2
16e0bf73 664 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 665#elif defined (HAVE_3DNOW)
16e0bf73 666 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 667#elif defined (HAVE_MMX)
16e0bf73 668 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 669#elif defined (HAVE_ALTIVEC)
16e0bf73 670 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 671#else
16e0bf73 672 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
673#endif
674#endif //!RUNTIME_CPUDETECT
117e45b0
MN
675}
676
cc9b0679 677//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 678// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 679
911879d1 680/* -pp Command line Help
911879d1 681*/
69fdc40d 682#if LIBPOSTPROC_VERSION_INT < (52<<16)
10ff3ff4 683const char *const pp_help=
69fdc40d
DP
684#else
685const char pp_help[] =
686#endif
bf69c4e5 687"Available postprocessing filters:\n"
bb270c08
DB
688"Filters Options\n"
689"short long name short long option Description\n"
690"* * a autoq CPU power dependent enabler\n"
691" c chrom chrominance filtering enabled\n"
692" y nochrom chrominance filtering disabled\n"
693" n noluma luma filtering disabled\n"
694"hb hdeblock (2 threshold) horizontal deblocking filter\n"
695" 1. difference factor: default=32, higher -> more deblocking\n"
696" 2. flatness threshold: default=39, lower -> more deblocking\n"
697" the h & v deblocking filters share these\n"
698" so you can't set different thresholds for h / v\n"
699"vb vdeblock (2 threshold) vertical deblocking filter\n"
700"ha hadeblock (2 threshold) horizontal deblocking filter\n"
701"va vadeblock (2 threshold) vertical deblocking filter\n"
702"h1 x1hdeblock experimental h deblock filter 1\n"
703"v1 x1vdeblock experimental v deblock filter 1\n"
704"dr dering deringing filter\n"
705"al autolevels automatic brightness / contrast\n"
706" f fullyrange stretch luminance to (0..255)\n"
707"lb linblenddeint linear blend deinterlacer\n"
708"li linipoldeint linear interpolating deinterlace\n"
709"ci cubicipoldeint cubic interpolating deinterlacer\n"
710"md mediandeint median deinterlacer\n"
711"fd ffmpegdeint ffmpeg deinterlacer\n"
712"l5 lowpass5 FIR lowpass deinterlacer\n"
713"de default hb:a,vb:a,dr:a\n"
714"fa fast h1:a,v1:a,dr:a\n"
715"ac ha:a:128:7,va:a,dr:a\n"
716"tn tmpnoise (3 threshold) temporal noise reducer\n"
717" 1. <= 2. <= 3. larger -> stronger filtering\n"
718"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
719"Usage:\n"
720"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
721"long form example:\n"
bb270c08 722"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 723"short form example:\n"
bb270c08 724"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
725"more examples:\n"
726"tn:64:128:256\n"
14b005d0 727"\n"
4b001a13 728;
911879d1 729
7dfea342 730pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
911879d1 731{
16e0bf73
DB
732 char temp[GET_MODE_BUFFER_SIZE];
733 char *p= temp;
734 static const char filterDelimiters[] = ",/";
735 static const char optionDelimiters[] = ":";
736 struct PPMode *ppMode;
737 char *filterToken;
738
739 ppMode= av_malloc(sizeof(PPMode));
740
741 ppMode->lumMode= 0;
742 ppMode->chromMode= 0;
743 ppMode->maxTmpNoise[0]= 700;
744 ppMode->maxTmpNoise[1]= 1500;
745 ppMode->maxTmpNoise[2]= 3000;
746 ppMode->maxAllowedY= 234;
747 ppMode->minAllowedY= 16;
748 ppMode->baseDcDiff= 256/8;
749 ppMode->flatnessThreshold= 56-16-1;
750 ppMode->maxClippedThreshold= 0.01;
751 ppMode->error=0;
752
753 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
754
755 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
756
757 for(;;){
758 char *filterName;
759 int q= 1000000; //PP_QUALITY_MAX;
760 int chrom=-1;
761 int luma=-1;
762 char *option;
763 char *options[OPTIONS_ARRAY_SIZE];
764 int i;
765 int filterNameOk=0;
766 int numOfUnknownOptions=0;
767 int enable=1; //does the user want us to enabled or disabled the filter
768
769 filterToken= strtok(p, filterDelimiters);
770 if(filterToken == NULL) break;
771 p+= strlen(filterToken) + 1; // p points to next filterToken
772 filterName= strtok(filterToken, optionDelimiters);
773 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
774
775 if(*filterName == '-'){
776 enable=0;
777 filterName++;
778 }
bb270c08 779
16e0bf73
DB
780 for(;;){ //for all options
781 option= strtok(NULL, optionDelimiters);
782 if(option == NULL) break;
783
784 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
785 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
786 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
787 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
788 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
789 else{
790 options[numOfUnknownOptions] = option;
791 numOfUnknownOptions++;
792 }
793 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
794 }
795 options[numOfUnknownOptions] = NULL;
796
797 /* replace stuff from the replace Table */
798 for(i=0; replaceTable[2*i]!=NULL; i++){
799 if(!strcmp(replaceTable[2*i], filterName)){
800 int newlen= strlen(replaceTable[2*i + 1]);
801 int plen;
802 int spaceLeft;
803
804 if(p==NULL) p= temp, *p=0; //last filter
805 else p--, *p=','; //not last filter
806
807 plen= strlen(p);
808 spaceLeft= p - temp + plen;
809 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
810 ppMode->error++;
811 break;
bb270c08 812 }
16e0bf73
DB
813 memmove(p + newlen, p, plen+1);
814 memcpy(p, replaceTable[2*i + 1], newlen);
815 filterNameOk=1;
816 }
817 }
bb270c08 818
16e0bf73
DB
819 for(i=0; filters[i].shortName!=NULL; i++){
820 if( !strcmp(filters[i].longName, filterName)
821 || !strcmp(filters[i].shortName, filterName)){
822 ppMode->lumMode &= ~filters[i].mask;
823 ppMode->chromMode &= ~filters[i].mask;
824
825 filterNameOk=1;
826 if(!enable) break; // user wants to disable it
827
828 if(q >= filters[i].minLumQuality && luma)
829 ppMode->lumMode|= filters[i].mask;
830 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
831 if(q >= filters[i].minChromQuality)
832 ppMode->chromMode|= filters[i].mask;
833
834 if(filters[i].mask == LEVEL_FIX){
835 int o;
836 ppMode->minAllowedY= 16;
837 ppMode->maxAllowedY= 234;
838 for(o=0; options[o]!=NULL; o++){
839 if( !strcmp(options[o],"fullyrange")
840 ||!strcmp(options[o],"f")){
841 ppMode->minAllowedY= 0;
842 ppMode->maxAllowedY= 255;
843 numOfUnknownOptions--;
bb270c08 844 }
16e0bf73 845 }
bb270c08 846 }
16e0bf73 847 else if(filters[i].mask == TEMP_NOISE_FILTER)
bb270c08 848 {
16e0bf73
DB
849 int o;
850 int numOfNoises=0;
851
852 for(o=0; options[o]!=NULL; o++){
853 char *tail;
854 ppMode->maxTmpNoise[numOfNoises]=
855 strtol(options[o], &tail, 0);
856 if(tail!=options[o]){
857 numOfNoises++;
858 numOfUnknownOptions--;
859 if(numOfNoises >= 3) break;
bb270c08 860 }
16e0bf73 861 }
bb270c08 862 }
16e0bf73
DB
863 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
864 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
865 int o;
866
867 for(o=0; options[o]!=NULL && o<2; o++){
868 char *tail;
869 int val= strtol(options[o], &tail, 0);
870 if(tail==options[o]) break;
871
872 numOfUnknownOptions--;
873 if(o==0) ppMode->baseDcDiff= val;
874 else ppMode->flatnessThreshold= val;
875 }
876 }
877 else if(filters[i].mask == FORCE_QUANT){
878 int o;
879 ppMode->forcedQuant= 15;
880
881 for(o=0; options[o]!=NULL && o<1; o++){
882 char *tail;
883 int val= strtol(options[o], &tail, 0);
884 if(tail==options[o]) break;
885
886 numOfUnknownOptions--;
887 ppMode->forcedQuant= val;
888 }
889 }
890 }
bb270c08 891 }
16e0bf73
DB
892 if(!filterNameOk) ppMode->error++;
893 ppMode->error += numOfUnknownOptions;
894 }
895
896 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
897 if(ppMode->error){
898 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
899 av_free(ppMode);
900 return NULL;
901 }
902 return ppMode;
911879d1
MN
903}
904
7dfea342 905void pp_free_mode(pp_mode *mode){
6ab6c7c3 906 av_free(mode);
c41d972d
MN
907}
908
88c0bc7e 909static void reallocAlign(void **p, int alignment, int size){
16e0bf73
DB
910 av_free(*p);
911 *p= av_mallocz(size);
88c0bc7e
MN
912}
913
0426af31 914static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
16e0bf73
DB
915 int mbWidth = (width+15)>>4;
916 int mbHeight= (height+15)>>4;
917 int i;
918
919 c->stride= stride;
920 c->qpStride= qpStride;
921
922 reallocAlign((void **)&c->tempDst, 8, stride*24);
923 reallocAlign((void **)&c->tempSrc, 8, stride*24);
924 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
925 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
926 for(i=0; i<256; i++)
927 c->yHistogram[i]= width*height/64*15/256;
928
929 for(i=0; i<3; i++){
930 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
aa089f6c
DB
931 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
932 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
16e0bf73
DB
933 }
934
935 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
936 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
937 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
938 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
939}
940
e7becfb2
DB
941static const char * context_to_name(void * ptr) {
942 return "postproc";
943}
944
31bfd6f3 945static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
e7becfb2 946
7dfea342 947pp_context *pp_get_context(int width, int height, int cpuCaps){
16e0bf73
DB
948 PPContext *c= av_malloc(sizeof(PPContext));
949 int stride= (width+15)&(~15); //assumed / will realloc if needed
950 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
951
952 memset(c, 0, sizeof(PPContext));
953 c->av_class = &av_codec_context_class;
954 c->cpuCaps= cpuCaps;
955 if(cpuCaps&PP_FORMAT){
956 c->hChromaSubSample= cpuCaps&0x3;
957 c->vChromaSubSample= (cpuCaps>>4)&0x3;
958 }else{
959 c->hChromaSubSample= 1;
960 c->vChromaSubSample= 1;
961 }
962
963 reallocBuffers(c, width, height, stride, qpStride);
964
965 c->frameNum=-1;
966
967 return c;
45b4f285
MN
968}
969
9cb54f43 970void pp_free_context(void *vc){
16e0bf73
DB
971 PPContext *c = (PPContext*)vc;
972 int i;
115329f1 973
aa089f6c
DB
974 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
975 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
115329f1 976
16e0bf73
DB
977 av_free(c->tempBlocks);
978 av_free(c->yHistogram);
979 av_free(c->tempDst);
980 av_free(c->tempSrc);
981 av_free(c->deintTemp);
982 av_free(c->stdQPTable);
983 av_free(c->nonBQPTable);
984 av_free(c->forcedQPTable);
115329f1 985
16e0bf73 986 memset(c, 0, sizeof(PPContext));
88c0bc7e 987
16e0bf73 988 av_free(c);
9c9e467d
MN
989}
990
6c51fd3f 991void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
16e0bf73
DB
992 uint8_t * dst[3], const int dstStride[3],
993 int width, int height,
994 const QP_STORE_T *QP_store, int QPStride,
7dfea342 995 pp_mode *vm, void *vc, int pict_type)
911879d1 996{
16e0bf73
DB
997 int mbWidth = (width+15)>>4;
998 int mbHeight= (height+15)>>4;
999 PPMode *mode = (PPMode*)vm;
1000 PPContext *c = (PPContext*)vc;
1001 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1002 int absQPStride = FFABS(QPStride);
1003
1004 // c->stride and c->QPStride are always positive
1005 if(c->stride < minStride || c->qpStride < absQPStride)
1006 reallocBuffers(c, width, height,
1007 FFMAX(minStride, c->stride),
1008 FFMAX(c->qpStride, absQPStride));
1009
1010 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1011 int i;
1012 QP_store= c->forcedQPTable;
1013 absQPStride = QPStride = 0;
1014 if(mode->lumMode & FORCE_QUANT)
1015 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1016 else
1017 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1018 }
0426af31 1019
16e0bf73
DB
1020 if(pict_type & PP_PICT_TYPE_QP2){
1021 int i;
1022 const int count= mbHeight * absQPStride;
1023 for(i=0; i<(count>>2); i++){
1024 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
bb270c08 1025 }
16e0bf73
DB
1026 for(i<<=2; i<count; i++){
1027 c->stdQPTable[i] = QP_store[i]>>1;
1028 }
1029 QP_store= c->stdQPTable;
1030 QPStride= absQPStride;
1031 }
1032
1033 if(0){
1034 int x,y;
1035 for(y=0; y<mbHeight; y++){
1036 for(x=0; x<mbWidth; x++){
e7becfb2 1037 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
16e0bf73
DB
1038 }
1039 av_log(c, AV_LOG_INFO, "\n");
bb270c08 1040 }
e7becfb2 1041 av_log(c, AV_LOG_INFO, "\n");
16e0bf73
DB
1042 }
1043
1044 if((pict_type&7)!=3){
1045 if (QPStride >= 0){
1046 int i;
1047 const int count= mbHeight * QPStride;
1048 for(i=0; i<(count>>2); i++){
1049 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1050 }
1051 for(i<<=2; i<count; i++){
1052 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1053 }
1054 } else {
1055 int i,j;
1056 for(i=0; i<mbHeight; i++) {
1057 for(j=0; j<absQPStride; j++) {
1058 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
bb270c08 1059 }
16e0bf73 1060 }
bb270c08 1061 }
16e0bf73 1062 }
bb270c08 1063
16e0bf73
DB
1064 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1065 mode->lumMode, mode->chromMode);
bb270c08 1066
16e0bf73 1067 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
bb270c08
DB
1068 width, height, QP_store, QPStride, 0, mode, c);
1069
16e0bf73
DB
1070 width = (width )>>c->hChromaSubSample;
1071 height = (height)>>c->vChromaSubSample;
1072
1073 if(mode->chromMode){
1074 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1075 width, height, QP_store, QPStride, 1, mode, c);
1076 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1077 width, height, QP_store, QPStride, 2, mode, c);
1078 }
1079 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1080 linecpy(dst[1], src[1], height, srcStride[1]);
1081 linecpy(dst[2], src[2], height, srcStride[2]);
1082 }else{
1083 int y;
1084 for(y=0; y<height; y++){
1085 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1086 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
bb270c08 1087 }
16e0bf73 1088 }
911879d1
MN
1089}
1090