Implement postproc_version().
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
2cab6401 50* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
755bfeab 51# more or less selfinvented filters so the exactness is not too meaningful
3057fa66 52E = Exact implementation
04932b0d 53e = almost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
245976da 77#include "libavutil/avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
dda87e9f
PL
82#ifdef HAVE_MALLOC_H
83#include <malloc.h>
84#endif
3057fa66 85//#undef HAVE_MMX2
13e00528 86//#define HAVE_3DNOW
3057fa66 87//#undef HAVE_MMX
cc9b0679 88//#undef ARCH_X86
7f16f6e6 89//#define DEBUG_BRIGHTNESS
13e00528 90#include "postprocess.h"
c41d972d 91#include "postprocess_internal.h"
bba9b16c 92
2a4a62bf
SS
93unsigned postproc_version(void)
94{
95 return LIBPOSTPROC_VERSION_INT;
96}
97
a7b2871c
RD
98#ifdef HAVE_ALTIVEC_H
99#include <altivec.h>
100#endif
101
911879d1
MN
102#define GET_MODE_BUFFER_SIZE 500
103#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
104#define BLOCK_SIZE 8
105#define TEMP_STRIDE 8
106//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 107
3cd52279 108#if defined(ARCH_X86)
2b858d0b
RD
109DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
110DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
111DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
112DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
113DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
114DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
115DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
116DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
b28daef8 117#endif
3057fa66 118
2722e362 119DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
3057fa66 120
9c9e467d 121
911879d1
MN
122static struct PPFilter filters[]=
123{
16e0bf73
DB
124 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
125 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
126/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
127 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
128 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
129 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
130 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
131 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
132 {"dr", "dering", 1, 5, 6, DERING},
133 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
134 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
135 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
136 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
137 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
138 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
139 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
140 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
141 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
142 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
143};
144
7b49ce2e 145static const char *replaceTable[]=
911879d1 146{
16e0bf73
DB
147 "default", "hb:a,vb:a,dr:a",
148 "de", "hb:a,vb:a,dr:a",
149 "fast", "h1:a,v1:a,dr:a",
150 "fa", "h1:a,v1:a,dr:a",
151 "ac", "ha:a:128:7,va:a,dr:a",
152 NULL //End Marker
911879d1
MN
153};
154
3057fa66 155
3cd52279 156#if defined(ARCH_X86)
3057fa66
A
157static inline void prefetchnta(void *p)
158{
16e0bf73
DB
159 asm volatile( "prefetchnta (%0)\n\t"
160 : : "r" (p)
161 );
3057fa66
A
162}
163
164static inline void prefetcht0(void *p)
165{
16e0bf73
DB
166 asm volatile( "prefetcht0 (%0)\n\t"
167 : : "r" (p)
168 );
3057fa66
A
169}
170
171static inline void prefetcht1(void *p)
172{
16e0bf73
DB
173 asm volatile( "prefetcht1 (%0)\n\t"
174 : : "r" (p)
175 );
3057fa66
A
176}
177
178static inline void prefetcht2(void *p)
179{
16e0bf73
DB
180 asm volatile( "prefetcht2 (%0)\n\t"
181 : : "r" (p)
182 );
3057fa66 183}
9a722af7 184#endif
3057fa66 185
04932b0d
DB
186/* The horizontal functions exist only in C because the MMX
187 * code is faster with vertical filters and transposing. */
3057fa66 188
cf5ec61d
MN
189/**
190 * Check if the given 8x8 Block is mostly "flat"
191 */
b0ac780a 192static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 193{
16e0bf73
DB
194 int numEq= 0;
195 int y;
196 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
197 const int dcThreshold= dcOffset*2 + 1;
198
199 for(y=0; y<BLOCK_SIZE; y++){
200 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
201 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
202 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
203 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
204 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
205 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
206 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
207 src+= stride;
208 }
209 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
210}
211
212/**
213 * Check if the middle 8x8 Block in the given 8x16 block is flat
214 */
16e0bf73
DB
215static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
216{
217 int numEq= 0;
218 int y;
219 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
220 const int dcThreshold= dcOffset*2 + 1;
221
222 src+= stride*4; // src points to begin of the 8x8 Block
223 for(y=0; y<BLOCK_SIZE-1; y++){
224 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
226 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
227 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
228 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
229 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
230 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
231 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
232 src+= stride;
233 }
234 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
235}
236
b0ac780a 237static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 238{
16e0bf73 239 int i;
cb482d25 240#if 1
16e0bf73
DB
241 for(i=0; i<2; i++){
242 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
243 src += stride;
244 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
245 src += stride;
246 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
247 src += stride;
248 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
249 src += stride;
250 }
115329f1 251#else
16e0bf73
DB
252 for(i=0; i<8; i++){
253 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
254 src += stride;
255 }
cb482d25 256#endif
16e0bf73 257 return 1;
cb482d25 258}
cf5ec61d 259
cb482d25
MN
260static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
261{
262#if 1
263#if 1
16e0bf73
DB
264 int x;
265 src+= stride*4;
266 for(x=0; x<BLOCK_SIZE; x+=4){
267 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
268 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
269 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
270 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
271 }
cb482d25 272#else
16e0bf73
DB
273 int x;
274 src+= stride*3;
275 for(x=0; x<BLOCK_SIZE; x++){
276 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
277 }
cb482d25 278#endif
16e0bf73 279 return 1;
cb482d25 280#else
16e0bf73
DB
281 int x;
282 src+= stride*4;
283 for(x=0; x<BLOCK_SIZE; x++){
284 int min=255;
285 int max=0;
286 int y;
287 for(y=0; y<8; y++){
288 int v= src[x + y*stride];
289 if(v>max) max=v;
290 if(v<min) min=v;
bb270c08 291 }
16e0bf73
DB
292 if(max-min > 2*QP) return 0;
293 }
294 return 1;
cb482d25
MN
295#endif
296}
297
16e0bf73
DB
298static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
299{
300 if( isHorizDC_C(src, stride, c) ){
301 if( isHorizMinMaxOk_C(src, stride, c->QP) )
302 return 1;
303 else
304 return 0;
305 }else{
306 return 2;
307 }
b0ac780a
MN
308}
309
16e0bf73
DB
310static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
311{
312 if( isVertDC_C(src, stride, c) ){
313 if( isVertMinMaxOk_C(src, stride, c->QP) )
314 return 1;
315 else
316 return 0;
317 }else{
318 return 2;
319 }
cf5ec61d
MN
320}
321
b0ac780a 322static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 323{
16e0bf73
DB
324 int y;
325 for(y=0; y<BLOCK_SIZE; y++){
326 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
327
328 if(FFABS(middleEnergy) < 8*c->QP){
329 const int q=(dst[3] - dst[4])/2;
330 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
331 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
332
333 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
334 d= FFMAX(d, 0);
335
336 d= (5*d + 32) >> 6;
337 d*= FFSIGN(-middleEnergy);
338
339 if(q>0)
340 {
341 d= d<0 ? 0 : d;
342 d= d>q ? q : d;
343 }
344 else
345 {
346 d= d>0 ? 0 : d;
347 d= d<q ? q : d;
348 }
349
350 dst[3]-= d;
351 dst[4]+= d;
bb270c08 352 }
16e0bf73
DB
353 dst+= stride;
354 }
cf5ec61d
MN
355}
356
357/**
358 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
359 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
360 */
b0ac780a 361static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 362{
16e0bf73
DB
363 int y;
364 for(y=0; y<BLOCK_SIZE; y++){
365 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
366 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
367
368 int sums[10];
369 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
370 sums[1] = sums[0] - first + dst[3];
371 sums[2] = sums[1] - first + dst[4];
372 sums[3] = sums[2] - first + dst[5];
373 sums[4] = sums[3] - first + dst[6];
374 sums[5] = sums[4] - dst[0] + dst[7];
375 sums[6] = sums[5] - dst[1] + last;
376 sums[7] = sums[6] - dst[2] + last;
377 sums[8] = sums[7] - dst[3] + last;
378 sums[9] = sums[8] - dst[4] + last;
379
380 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
381 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
382 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
383 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
384 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
385 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
386 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
387 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
388
389 dst+= stride;
390 }
cf5ec61d
MN
391}
392
4e4dcbc5 393/**
cc9b0679
MN
394 * Experimental Filter 1 (Horizontal)
395 * will not damage linear gradients
bd107136 396 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
755bfeab
DB
397 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
398 * MMX2 version does correct clipping C version does not
cc9b0679 399 * not identical with the vertical one
4e4dcbc5 400 */
cc9b0679
MN
401static inline void horizX1Filter(uint8_t *src, int stride, int QP)
402{
16e0bf73
DB
403 int y;
404 static uint64_t *lut= NULL;
405 if(lut==NULL)
406 {
407 int i;
408 lut = av_malloc(256*8);
409 for(i=0; i<256; i++)
bb270c08 410 {
16e0bf73 411 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 412/*
cc9b0679 413//Simulate 112242211 9-Tap filter
16e0bf73
DB
414 uint64_t a= (v/16) & 0xFF;
415 uint64_t b= (v/8) & 0xFF;
416 uint64_t c= (v/4) & 0xFF;
417 uint64_t d= (3*v/8) & 0xFF;
117e45b0 418*/
cc9b0679 419//Simulate piecewise linear interpolation
16e0bf73
DB
420 uint64_t a= (v/16) & 0xFF;
421 uint64_t b= (v*3/16) & 0xFF;
422 uint64_t c= (v*5/16) & 0xFF;
423 uint64_t d= (7*v/16) & 0xFF;
424 uint64_t A= (0x100 - a)&0xFF;
425 uint64_t B= (0x100 - b)&0xFF;
426 uint64_t C= (0x100 - c)&0xFF;
427 uint64_t D= (0x100 - c)&0xFF;
428
429 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
430 (D<<24) | (C<<16) | (B<<8) | (A);
431 //lut[i] = (v<<32) | (v<<24);
bb270c08 432 }
16e0bf73 433 }
bb270c08 434
16e0bf73
DB
435 for(y=0; y<BLOCK_SIZE; y++){
436 int a= src[1] - src[2];
437 int b= src[3] - src[4];
438 int c= src[5] - src[6];
bb270c08 439
16e0bf73 440 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
bb270c08 441
16e0bf73
DB
442 if(d < QP){
443 int v = d * FFSIGN(-b);
bb270c08 444
16e0bf73
DB
445 src[1] +=v/8;
446 src[2] +=v/4;
447 src[3] +=3*v/8;
448 src[4] -=3*v/8;
449 src[5] -=v/4;
450 src[6] -=v/8;
bb270c08 451 }
16e0bf73
DB
452 src+=stride;
453 }
cc9b0679
MN
454}
455
12eebd26
MN
456/**
457 * accurate deblock filter
458 */
849f1035 459static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
16e0bf73
DB
460 int y;
461 const int QP= c->QP;
462 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
463 const int dcThreshold= dcOffset*2 + 1;
12eebd26 464//START_TIMER
16e0bf73
DB
465 src+= step*4; // src points to begin of the 8x8 Block
466 for(y=0; y<8; y++){
467 int numEq= 0;
468
469 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
470 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
471 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
472 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
473 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
474 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
475 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
476 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
477 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
478 if(numEq > c->ppMode.flatnessThreshold){
479 int min, max, x;
480
481 if(src[0] > src[step]){
482 max= src[0];
483 min= src[step];
484 }else{
485 max= src[step];
486 min= src[0];
487 }
488 for(x=2; x<8; x+=2){
489 if(src[x*step] > src[(x+1)*step]){
490 if(src[x *step] > max) max= src[ x *step];
491 if(src[(x+1)*step] < min) min= src[(x+1)*step];
bb270c08 492 }else{
16e0bf73
DB
493 if(src[(x+1)*step] > max) max= src[(x+1)*step];
494 if(src[ x *step] < min) min= src[ x *step];
495 }
496 }
497 if(max-min < 2*QP){
498 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
499 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
500
501 int sums[10];
502 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
503 sums[1] = sums[0] - first + src[3*step];
504 sums[2] = sums[1] - first + src[4*step];
505 sums[3] = sums[2] - first + src[5*step];
506 sums[4] = sums[3] - first + src[6*step];
507 sums[5] = sums[4] - src[0*step] + src[7*step];
508 sums[6] = sums[5] - src[1*step] + last;
509 sums[7] = sums[6] - src[2*step] + last;
510 sums[8] = sums[7] - src[3*step] + last;
511 sums[9] = sums[8] - src[4*step] + last;
512
513 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
514 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
515 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
516 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
517 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
518 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
519 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
520 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
521 }
522 }else{
523 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
524
525 if(FFABS(middleEnergy) < 8*QP){
526 const int q=(src[3*step] - src[4*step])/2;
527 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
528 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
529
530 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
531 d= FFMAX(d, 0);
532
533 d= (5*d + 32) >> 6;
534 d*= FFSIGN(-middleEnergy);
535
536 if(q>0){
537 d= d<0 ? 0 : d;
538 d= d>q ? q : d;
539 }else{
540 d= d>0 ? 0 : d;
541 d= d<q ? q : d;
bb270c08
DB
542 }
543
16e0bf73
DB
544 src[3*step]-= d;
545 src[4*step]+= d;
546 }
bb270c08 547 }
16e0bf73
DB
548
549 src += stride;
550 }
12eebd26
MN
551/*if(step==16){
552 STOP_TIMER("step16")
553}else{
554 STOP_TIMER("stepX")
555}*/
556}
cc9b0679 557
e89952aa 558//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 559//Plain C versions
e89952aa
MN
560#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
561#define COMPILE_C
562#endif
563
b0ac780a
MN
564#ifdef HAVE_ALTIVEC
565#define COMPILE_ALTIVEC
b0ac780a 566#endif //HAVE_ALTIVEC
b0ac780a 567
3cd52279 568#if defined(ARCH_X86)
e89952aa
MN
569
570#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
571#define COMPILE_MMX
572#endif
573
574#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
575#define COMPILE_MMX2
576#endif
577
578#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
579#define COMPILE_3DNOW
580#endif
3cd52279 581#endif /* defined(ARCH_X86) */
e89952aa
MN
582
583#undef HAVE_MMX
584#undef HAVE_MMX2
585#undef HAVE_3DNOW
b0ac780a 586#undef HAVE_ALTIVEC
e89952aa
MN
587
588#ifdef COMPILE_C
cc9b0679
MN
589#undef HAVE_MMX
590#undef HAVE_MMX2
591#undef HAVE_3DNOW
cc9b0679
MN
592#define RENAME(a) a ## _C
593#include "postprocess_template.c"
e89952aa 594#endif
cc9b0679 595
b0ac780a
MN
596#ifdef COMPILE_ALTIVEC
597#undef RENAME
598#define HAVE_ALTIVEC
599#define RENAME(a) a ## _altivec
600#include "postprocess_altivec_template.c"
601#include "postprocess_template.c"
602#endif
b0ac780a 603
cc9b0679 604//MMX versions
e89952aa 605#ifdef COMPILE_MMX
cc9b0679
MN
606#undef RENAME
607#define HAVE_MMX
608#undef HAVE_MMX2
609#undef HAVE_3DNOW
cc9b0679
MN
610#define RENAME(a) a ## _MMX
611#include "postprocess_template.c"
e89952aa 612#endif
cc9b0679
MN
613
614//MMX2 versions
e89952aa 615#ifdef COMPILE_MMX2
cc9b0679
MN
616#undef RENAME
617#define HAVE_MMX
618#define HAVE_MMX2
619#undef HAVE_3DNOW
cc9b0679
MN
620#define RENAME(a) a ## _MMX2
621#include "postprocess_template.c"
e89952aa 622#endif
cc9b0679
MN
623
624//3DNOW versions
e89952aa 625#ifdef COMPILE_3DNOW
cc9b0679
MN
626#undef RENAME
627#define HAVE_MMX
628#undef HAVE_MMX2
629#define HAVE_3DNOW
cc9b0679
MN
630#define RENAME(a) a ## _3DNow
631#include "postprocess_template.c"
e89952aa 632#endif
cc9b0679 633
755bfeab 634// minor note: the HAVE_xyz is messed up after that line so do not use it.
cc9b0679 635
6c51fd3f
MN
636static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
637 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 638{
16e0bf73
DB
639 PPContext *c= (PPContext *)vc;
640 PPMode *ppMode= (PPMode *)vm;
641 c->ppMode= *ppMode; //FIXME
9c9e467d 642
16e0bf73
DB
643 // Using ifs here as they are faster than function pointers although the
644 // difference would not be measurable here but it is much better because
645 // someone might exchange the CPU whithout restarting MPlayer ;)
e89952aa 646#ifdef RUNTIME_CPUDETECT
3cd52279 647#if defined(ARCH_X86)
16e0bf73
DB
648 // ordered per speed fastest first
649 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
650 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
651 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
652 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
653 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
654 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
655 else
656 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 657#else
b0ac780a 658#ifdef HAVE_ALTIVEC
16e0bf73
DB
659 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
660 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
661 else
b0ac780a 662#endif
16e0bf73 663 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 664#endif
e89952aa
MN
665#else //RUNTIME_CPUDETECT
666#ifdef HAVE_MMX2
16e0bf73 667 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 668#elif defined (HAVE_3DNOW)
16e0bf73 669 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 670#elif defined (HAVE_MMX)
16e0bf73 671 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 672#elif defined (HAVE_ALTIVEC)
16e0bf73 673 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 674#else
16e0bf73 675 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
676#endif
677#endif //!RUNTIME_CPUDETECT
117e45b0
MN
678}
679
cc9b0679 680//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 681// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 682
911879d1 683/* -pp Command line Help
911879d1 684*/
69fdc40d 685#if LIBPOSTPROC_VERSION_INT < (52<<16)
10ff3ff4 686const char *const pp_help=
69fdc40d
DP
687#else
688const char pp_help[] =
689#endif
bf69c4e5 690"Available postprocessing filters:\n"
bb270c08
DB
691"Filters Options\n"
692"short long name short long option Description\n"
693"* * a autoq CPU power dependent enabler\n"
694" c chrom chrominance filtering enabled\n"
695" y nochrom chrominance filtering disabled\n"
696" n noluma luma filtering disabled\n"
697"hb hdeblock (2 threshold) horizontal deblocking filter\n"
698" 1. difference factor: default=32, higher -> more deblocking\n"
699" 2. flatness threshold: default=39, lower -> more deblocking\n"
700" the h & v deblocking filters share these\n"
701" so you can't set different thresholds for h / v\n"
702"vb vdeblock (2 threshold) vertical deblocking filter\n"
703"ha hadeblock (2 threshold) horizontal deblocking filter\n"
704"va vadeblock (2 threshold) vertical deblocking filter\n"
705"h1 x1hdeblock experimental h deblock filter 1\n"
706"v1 x1vdeblock experimental v deblock filter 1\n"
707"dr dering deringing filter\n"
708"al autolevels automatic brightness / contrast\n"
709" f fullyrange stretch luminance to (0..255)\n"
710"lb linblenddeint linear blend deinterlacer\n"
711"li linipoldeint linear interpolating deinterlace\n"
712"ci cubicipoldeint cubic interpolating deinterlacer\n"
713"md mediandeint median deinterlacer\n"
714"fd ffmpegdeint ffmpeg deinterlacer\n"
715"l5 lowpass5 FIR lowpass deinterlacer\n"
716"de default hb:a,vb:a,dr:a\n"
717"fa fast h1:a,v1:a,dr:a\n"
718"ac ha:a:128:7,va:a,dr:a\n"
719"tn tmpnoise (3 threshold) temporal noise reducer\n"
720" 1. <= 2. <= 3. larger -> stronger filtering\n"
721"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
722"Usage:\n"
723"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
724"long form example:\n"
bb270c08 725"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 726"short form example:\n"
bb270c08 727"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
728"more examples:\n"
729"tn:64:128:256\n"
14b005d0 730"\n"
4b001a13 731;
911879d1 732
ca325343 733pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
911879d1 734{
16e0bf73
DB
735 char temp[GET_MODE_BUFFER_SIZE];
736 char *p= temp;
737 static const char filterDelimiters[] = ",/";
738 static const char optionDelimiters[] = ":";
739 struct PPMode *ppMode;
740 char *filterToken;
741
742 ppMode= av_malloc(sizeof(PPMode));
743
744 ppMode->lumMode= 0;
745 ppMode->chromMode= 0;
746 ppMode->maxTmpNoise[0]= 700;
747 ppMode->maxTmpNoise[1]= 1500;
748 ppMode->maxTmpNoise[2]= 3000;
749 ppMode->maxAllowedY= 234;
750 ppMode->minAllowedY= 16;
751 ppMode->baseDcDiff= 256/8;
752 ppMode->flatnessThreshold= 56-16-1;
753 ppMode->maxClippedThreshold= 0.01;
754 ppMode->error=0;
755
756 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
757
758 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
759
760 for(;;){
761 char *filterName;
762 int q= 1000000; //PP_QUALITY_MAX;
763 int chrom=-1;
764 int luma=-1;
765 char *option;
766 char *options[OPTIONS_ARRAY_SIZE];
767 int i;
768 int filterNameOk=0;
769 int numOfUnknownOptions=0;
770 int enable=1; //does the user want us to enabled or disabled the filter
771
772 filterToken= strtok(p, filterDelimiters);
773 if(filterToken == NULL) break;
774 p+= strlen(filterToken) + 1; // p points to next filterToken
775 filterName= strtok(filterToken, optionDelimiters);
776 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
777
778 if(*filterName == '-'){
779 enable=0;
780 filterName++;
781 }
bb270c08 782
16e0bf73
DB
783 for(;;){ //for all options
784 option= strtok(NULL, optionDelimiters);
785 if(option == NULL) break;
786
787 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
788 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
789 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
790 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
791 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
792 else{
793 options[numOfUnknownOptions] = option;
794 numOfUnknownOptions++;
795 }
796 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
797 }
798 options[numOfUnknownOptions] = NULL;
799
800 /* replace stuff from the replace Table */
801 for(i=0; replaceTable[2*i]!=NULL; i++){
802 if(!strcmp(replaceTable[2*i], filterName)){
803 int newlen= strlen(replaceTable[2*i + 1]);
804 int plen;
805 int spaceLeft;
806
807 if(p==NULL) p= temp, *p=0; //last filter
808 else p--, *p=','; //not last filter
809
810 plen= strlen(p);
811 spaceLeft= p - temp + plen;
812 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
813 ppMode->error++;
814 break;
bb270c08 815 }
16e0bf73
DB
816 memmove(p + newlen, p, plen+1);
817 memcpy(p, replaceTable[2*i + 1], newlen);
818 filterNameOk=1;
819 }
820 }
bb270c08 821
16e0bf73
DB
822 for(i=0; filters[i].shortName!=NULL; i++){
823 if( !strcmp(filters[i].longName, filterName)
824 || !strcmp(filters[i].shortName, filterName)){
825 ppMode->lumMode &= ~filters[i].mask;
826 ppMode->chromMode &= ~filters[i].mask;
827
828 filterNameOk=1;
829 if(!enable) break; // user wants to disable it
830
831 if(q >= filters[i].minLumQuality && luma)
832 ppMode->lumMode|= filters[i].mask;
833 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
834 if(q >= filters[i].minChromQuality)
835 ppMode->chromMode|= filters[i].mask;
836
837 if(filters[i].mask == LEVEL_FIX){
838 int o;
839 ppMode->minAllowedY= 16;
840 ppMode->maxAllowedY= 234;
841 for(o=0; options[o]!=NULL; o++){
842 if( !strcmp(options[o],"fullyrange")
843 ||!strcmp(options[o],"f")){
844 ppMode->minAllowedY= 0;
845 ppMode->maxAllowedY= 255;
846 numOfUnknownOptions--;
bb270c08 847 }
16e0bf73 848 }
bb270c08 849 }
16e0bf73 850 else if(filters[i].mask == TEMP_NOISE_FILTER)
bb270c08 851 {
16e0bf73
DB
852 int o;
853 int numOfNoises=0;
854
855 for(o=0; options[o]!=NULL; o++){
856 char *tail;
857 ppMode->maxTmpNoise[numOfNoises]=
858 strtol(options[o], &tail, 0);
859 if(tail!=options[o]){
860 numOfNoises++;
861 numOfUnknownOptions--;
862 if(numOfNoises >= 3) break;
bb270c08 863 }
16e0bf73 864 }
bb270c08 865 }
16e0bf73
DB
866 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
867 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
868 int o;
869
870 for(o=0; options[o]!=NULL && o<2; o++){
871 char *tail;
872 int val= strtol(options[o], &tail, 0);
873 if(tail==options[o]) break;
874
875 numOfUnknownOptions--;
876 if(o==0) ppMode->baseDcDiff= val;
877 else ppMode->flatnessThreshold= val;
878 }
879 }
880 else if(filters[i].mask == FORCE_QUANT){
881 int o;
882 ppMode->forcedQuant= 15;
883
884 for(o=0; options[o]!=NULL && o<1; o++){
885 char *tail;
886 int val= strtol(options[o], &tail, 0);
887 if(tail==options[o]) break;
888
889 numOfUnknownOptions--;
890 ppMode->forcedQuant= val;
891 }
892 }
893 }
bb270c08 894 }
16e0bf73
DB
895 if(!filterNameOk) ppMode->error++;
896 ppMode->error += numOfUnknownOptions;
897 }
898
899 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
900 if(ppMode->error){
901 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
902 av_free(ppMode);
903 return NULL;
904 }
905 return ppMode;
911879d1
MN
906}
907
c41d972d 908void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 909 av_free(mode);
c41d972d
MN
910}
911
88c0bc7e 912static void reallocAlign(void **p, int alignment, int size){
16e0bf73
DB
913 av_free(*p);
914 *p= av_mallocz(size);
88c0bc7e
MN
915}
916
0426af31 917static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
16e0bf73
DB
918 int mbWidth = (width+15)>>4;
919 int mbHeight= (height+15)>>4;
920 int i;
921
922 c->stride= stride;
923 c->qpStride= qpStride;
924
925 reallocAlign((void **)&c->tempDst, 8, stride*24);
926 reallocAlign((void **)&c->tempSrc, 8, stride*24);
927 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
928 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
929 for(i=0; i<256; i++)
930 c->yHistogram[i]= width*height/64*15/256;
931
932 for(i=0; i<3; i++){
933 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
aa089f6c
DB
934 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
935 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
16e0bf73
DB
936 }
937
938 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
939 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
940 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
941 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
942}
943
e7becfb2
DB
944static const char * context_to_name(void * ptr) {
945 return "postproc";
946}
947
31bfd6f3 948static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
e7becfb2 949
88c0bc7e 950pp_context_t *pp_get_context(int width, int height, int cpuCaps){
16e0bf73
DB
951 PPContext *c= av_malloc(sizeof(PPContext));
952 int stride= (width+15)&(~15); //assumed / will realloc if needed
953 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
954
955 memset(c, 0, sizeof(PPContext));
956 c->av_class = &av_codec_context_class;
957 c->cpuCaps= cpuCaps;
958 if(cpuCaps&PP_FORMAT){
959 c->hChromaSubSample= cpuCaps&0x3;
960 c->vChromaSubSample= (cpuCaps>>4)&0x3;
961 }else{
962 c->hChromaSubSample= 1;
963 c->vChromaSubSample= 1;
964 }
965
966 reallocBuffers(c, width, height, stride, qpStride);
967
968 c->frameNum=-1;
969
970 return c;
45b4f285
MN
971}
972
9cb54f43 973void pp_free_context(void *vc){
16e0bf73
DB
974 PPContext *c = (PPContext*)vc;
975 int i;
115329f1 976
aa089f6c
DB
977 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
978 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
115329f1 979
16e0bf73
DB
980 av_free(c->tempBlocks);
981 av_free(c->yHistogram);
982 av_free(c->tempDst);
983 av_free(c->tempSrc);
984 av_free(c->deintTemp);
985 av_free(c->stdQPTable);
986 av_free(c->nonBQPTable);
987 av_free(c->forcedQPTable);
115329f1 988
16e0bf73 989 memset(c, 0, sizeof(PPContext));
88c0bc7e 990
16e0bf73 991 av_free(c);
9c9e467d
MN
992}
993
6c51fd3f 994void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
16e0bf73
DB
995 uint8_t * dst[3], const int dstStride[3],
996 int width, int height,
997 const QP_STORE_T *QP_store, int QPStride,
998 pp_mode_t *vm, void *vc, int pict_type)
911879d1 999{
16e0bf73
DB
1000 int mbWidth = (width+15)>>4;
1001 int mbHeight= (height+15)>>4;
1002 PPMode *mode = (PPMode*)vm;
1003 PPContext *c = (PPContext*)vc;
1004 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1005 int absQPStride = FFABS(QPStride);
1006
1007 // c->stride and c->QPStride are always positive
1008 if(c->stride < minStride || c->qpStride < absQPStride)
1009 reallocBuffers(c, width, height,
1010 FFMAX(minStride, c->stride),
1011 FFMAX(c->qpStride, absQPStride));
1012
1013 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1014 int i;
1015 QP_store= c->forcedQPTable;
1016 absQPStride = QPStride = 0;
1017 if(mode->lumMode & FORCE_QUANT)
1018 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1019 else
1020 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1021 }
0426af31 1022
16e0bf73
DB
1023 if(pict_type & PP_PICT_TYPE_QP2){
1024 int i;
1025 const int count= mbHeight * absQPStride;
1026 for(i=0; i<(count>>2); i++){
1027 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
bb270c08 1028 }
16e0bf73
DB
1029 for(i<<=2; i<count; i++){
1030 c->stdQPTable[i] = QP_store[i]>>1;
1031 }
1032 QP_store= c->stdQPTable;
1033 QPStride= absQPStride;
1034 }
1035
1036 if(0){
1037 int x,y;
1038 for(y=0; y<mbHeight; y++){
1039 for(x=0; x<mbWidth; x++){
e7becfb2 1040 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
16e0bf73
DB
1041 }
1042 av_log(c, AV_LOG_INFO, "\n");
bb270c08 1043 }
e7becfb2 1044 av_log(c, AV_LOG_INFO, "\n");
16e0bf73
DB
1045 }
1046
1047 if((pict_type&7)!=3){
1048 if (QPStride >= 0){
1049 int i;
1050 const int count= mbHeight * QPStride;
1051 for(i=0; i<(count>>2); i++){
1052 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1053 }
1054 for(i<<=2; i<count; i++){
1055 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1056 }
1057 } else {
1058 int i,j;
1059 for(i=0; i<mbHeight; i++) {
1060 for(j=0; j<absQPStride; j++) {
1061 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
bb270c08 1062 }
16e0bf73 1063 }
bb270c08 1064 }
16e0bf73 1065 }
bb270c08 1066
16e0bf73
DB
1067 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1068 mode->lumMode, mode->chromMode);
bb270c08 1069
16e0bf73 1070 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
bb270c08
DB
1071 width, height, QP_store, QPStride, 0, mode, c);
1072
16e0bf73
DB
1073 width = (width )>>c->hChromaSubSample;
1074 height = (height)>>c->vChromaSubSample;
1075
1076 if(mode->chromMode){
1077 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1078 width, height, QP_store, QPStride, 1, mode, c);
1079 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1080 width, height, QP_store, QPStride, 2, mode, c);
1081 }
1082 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1083 linecpy(dst[1], src[1], height, srcStride[1]);
1084 linecpy(dst[2], src[2], height, srcStride[2]);
1085 }else{
1086 int y;
1087 for(y=0; y<height; y++){
1088 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1089 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
bb270c08 1090 }
16e0bf73 1091 }
911879d1
MN
1092}
1093