Make option time parsing functions take argument from options
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
2cab6401 50* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
755bfeab 51# more or less selfinvented filters so the exactness is not too meaningful
3057fa66 52E = Exact implementation
acced553 53e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
6ab6c7c3 77#include "avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
dda87e9f
PL
82#ifdef HAVE_MALLOC_H
83#include <malloc.h>
84#endif
3057fa66 85//#undef HAVE_MMX2
13e00528 86//#define HAVE_3DNOW
3057fa66 87//#undef HAVE_MMX
cc9b0679 88//#undef ARCH_X86
7f16f6e6 89//#define DEBUG_BRIGHTNESS
13e00528 90#include "postprocess.h"
c41d972d 91#include "postprocess_internal.h"
bba9b16c 92
a7b2871c
RD
93#ifdef HAVE_ALTIVEC_H
94#include <altivec.h>
95#endif
96
911879d1
MN
97#define GET_MODE_BUFFER_SIZE 500
98#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
99#define BLOCK_SIZE 8
100#define TEMP_STRIDE 8
101//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 102
3cd52279 103#if defined(ARCH_X86)
2b858d0b
RD
104DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
105DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
106DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
107DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
108DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
109DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
110DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
111DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
b28daef8 112#endif
3057fa66 113
2722e362 114DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
3057fa66 115
9c9e467d 116
911879d1
MN
117static struct PPFilter filters[]=
118{
bb270c08
DB
119 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
120 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
121/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
122 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
123 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
124 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
125 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
126 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
127 {"dr", "dering", 1, 5, 6, DERING},
128 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
129 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
130 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
131 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
132 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
133 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
134 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
135 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
136 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
137 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
138};
139
7b49ce2e 140static const char *replaceTable[]=
911879d1 141{
64953743
DP
142 "default", "hb:a,vb:a,dr:a",
143 "de", "hb:a,vb:a,dr:a",
144 "fast", "h1:a,v1:a,dr:a",
145 "fa", "h1:a,v1:a,dr:a",
146 "ac", "ha:a:128:7,va:a,dr:a",
bb270c08 147 NULL //End Marker
911879d1
MN
148};
149
3057fa66 150
3cd52279 151#if defined(ARCH_X86)
3057fa66
A
152static inline void prefetchnta(void *p)
153{
bb270c08
DB
154 asm volatile( "prefetchnta (%0)\n\t"
155 : : "r" (p)
156 );
3057fa66
A
157}
158
159static inline void prefetcht0(void *p)
160{
bb270c08
DB
161 asm volatile( "prefetcht0 (%0)\n\t"
162 : : "r" (p)
163 );
3057fa66
A
164}
165
166static inline void prefetcht1(void *p)
167{
bb270c08
DB
168 asm volatile( "prefetcht1 (%0)\n\t"
169 : : "r" (p)
170 );
3057fa66
A
171}
172
173static inline void prefetcht2(void *p)
174{
bb270c08
DB
175 asm volatile( "prefetcht2 (%0)\n\t"
176 : : "r" (p)
177 );
3057fa66 178}
9a722af7 179#endif
3057fa66 180
cc9b0679 181// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 182
cf5ec61d
MN
183/**
184 * Check if the given 8x8 Block is mostly "flat"
185 */
b0ac780a 186static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 187{
bb270c08
DB
188 int numEq= 0;
189 int y;
190 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
191 const int dcThreshold= dcOffset*2 + 1;
192
193 for(y=0; y<BLOCK_SIZE; y++)
194 {
195 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
196 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
197 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
198 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
199 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
200 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
201 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
202 src+= stride;
203 }
204 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
205}
206
207/**
208 * Check if the middle 8x8 Block in the given 8x16 block is flat
209 */
210static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
211 int numEq= 0;
212 int y;
213 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
214 const int dcThreshold= dcOffset*2 + 1;
215
216 src+= stride*4; // src points to begin of the 8x8 Block
217 for(y=0; y<BLOCK_SIZE-1; y++)
218 {
219 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
226 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
227 src+= stride;
228 }
229 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
230}
231
b0ac780a 232static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 233{
bb270c08 234 int i;
cb482d25 235#if 1
bb270c08
DB
236 for(i=0; i<2; i++){
237 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
238 src += stride;
239 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
240 src += stride;
241 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
242 src += stride;
243 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
244 src += stride;
245 }
115329f1 246#else
bb270c08
DB
247 for(i=0; i<8; i++){
248 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
249 src += stride;
250 }
cb482d25 251#endif
bb270c08 252 return 1;
cb482d25 253}
cf5ec61d 254
cb482d25
MN
255static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
256{
257#if 1
258#if 1
bb270c08
DB
259 int x;
260 src+= stride*4;
261 for(x=0; x<BLOCK_SIZE; x+=4)
262 {
263 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
264 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
265 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
266 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
267 }
cb482d25 268#else
bb270c08
DB
269 int x;
270 src+= stride*3;
271 for(x=0; x<BLOCK_SIZE; x++)
272 {
273 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
274 }
cb482d25 275#endif
bb270c08 276 return 1;
cb482d25 277#else
bb270c08
DB
278 int x;
279 src+= stride*4;
280 for(x=0; x<BLOCK_SIZE; x++)
281 {
282 int min=255;
283 int max=0;
284 int y;
285 for(y=0; y<8; y++){
286 int v= src[x + y*stride];
287 if(v>max) max=v;
288 if(v<min) min=v;
289 }
290 if(max-min > 2*QP) return 0;
291 }
292 return 1;
cb482d25
MN
293#endif
294}
295
b0ac780a 296static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
297 if( isHorizDC_C(src, stride, c) ){
298 if( isHorizMinMaxOk_C(src, stride, c->QP) )
299 return 1;
300 else
301 return 0;
302 }else{
303 return 2;
304 }
b0ac780a
MN
305}
306
cb482d25 307static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
308 if( isVertDC_C(src, stride, c) ){
309 if( isVertMinMaxOk_C(src, stride, c->QP) )
310 return 1;
311 else
312 return 0;
313 }else{
314 return 2;
315 }
cf5ec61d
MN
316}
317
b0ac780a 318static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 319{
bb270c08
DB
320 int y;
321 for(y=0; y<BLOCK_SIZE; y++)
322 {
323 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
324
c26abfa5 325 if(FFABS(middleEnergy) < 8*c->QP)
bb270c08
DB
326 {
327 const int q=(dst[3] - dst[4])/2;
328 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
329 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
330
c26abfa5 331 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
8925915f 332 d= FFMAX(d, 0);
bb270c08
DB
333
334 d= (5*d + 32) >> 6;
02305ff3 335 d*= FFSIGN(-middleEnergy);
bb270c08
DB
336
337 if(q>0)
338 {
339 d= d<0 ? 0 : d;
340 d= d>q ? q : d;
341 }
342 else
343 {
344 d= d>0 ? 0 : d;
345 d= d<q ? q : d;
346 }
347
348 dst[3]-= d;
349 dst[4]+= d;
350 }
351 dst+= stride;
352 }
cf5ec61d
MN
353}
354
355/**
356 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
357 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
358 */
b0ac780a 359static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 360{
bb270c08
DB
361 int y;
362 for(y=0; y<BLOCK_SIZE; y++)
363 {
c26abfa5
DB
364 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
365 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
bb270c08
DB
366
367 int sums[10];
368 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
369 sums[1] = sums[0] - first + dst[3];
370 sums[2] = sums[1] - first + dst[4];
371 sums[3] = sums[2] - first + dst[5];
372 sums[4] = sums[3] - first + dst[6];
373 sums[5] = sums[4] - dst[0] + dst[7];
374 sums[6] = sums[5] - dst[1] + last;
375 sums[7] = sums[6] - dst[2] + last;
376 sums[8] = sums[7] - dst[3] + last;
377 sums[9] = sums[8] - dst[4] + last;
378
379 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
380 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
381 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
382 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
383 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
384 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
385 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
386 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
387
388 dst+= stride;
389 }
cf5ec61d
MN
390}
391
4e4dcbc5 392/**
cc9b0679
MN
393 * Experimental Filter 1 (Horizontal)
394 * will not damage linear gradients
bd107136 395 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
755bfeab
DB
396 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
397 * MMX2 version does correct clipping C version does not
cc9b0679 398 * not identical with the vertical one
4e4dcbc5 399 */
cc9b0679
MN
400static inline void horizX1Filter(uint8_t *src, int stride, int QP)
401{
bb270c08
DB
402 int y;
403 static uint64_t *lut= NULL;
404 if(lut==NULL)
405 {
406 int i;
6ab6c7c3 407 lut = av_malloc(256*8);
bb270c08
DB
408 for(i=0; i<256; i++)
409 {
410 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 411/*
cc9b0679 412//Simulate 112242211 9-Tap filter
bb270c08
DB
413 uint64_t a= (v/16) & 0xFF;
414 uint64_t b= (v/8) & 0xFF;
415 uint64_t c= (v/4) & 0xFF;
416 uint64_t d= (3*v/8) & 0xFF;
117e45b0 417*/
cc9b0679 418//Simulate piecewise linear interpolation
bb270c08
DB
419 uint64_t a= (v/16) & 0xFF;
420 uint64_t b= (v*3/16) & 0xFF;
421 uint64_t c= (v*5/16) & 0xFF;
422 uint64_t d= (7*v/16) & 0xFF;
423 uint64_t A= (0x100 - a)&0xFF;
424 uint64_t B= (0x100 - b)&0xFF;
425 uint64_t C= (0x100 - c)&0xFF;
426 uint64_t D= (0x100 - c)&0xFF;
427
428 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
429 (D<<24) | (C<<16) | (B<<8) | (A);
430 //lut[i] = (v<<32) | (v<<24);
431 }
432 }
433
434 for(y=0; y<BLOCK_SIZE; y++)
435 {
436 int a= src[1] - src[2];
437 int b= src[3] - src[4];
438 int c= src[5] - src[6];
439
c26abfa5 440 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
bb270c08
DB
441
442 if(d < QP)
443 {
02305ff3 444 int v = d * FFSIGN(-b);
bb270c08
DB
445
446 src[1] +=v/8;
447 src[2] +=v/4;
448 src[3] +=3*v/8;
449 src[4] -=3*v/8;
450 src[5] -=v/4;
451 src[6] -=v/8;
452
453 }
454 src+=stride;
455 }
cc9b0679
MN
456}
457
12eebd26
MN
458/**
459 * accurate deblock filter
460 */
849f1035 461static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
bb270c08
DB
462 int y;
463 const int QP= c->QP;
464 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
465 const int dcThreshold= dcOffset*2 + 1;
12eebd26 466//START_TIMER
bb270c08
DB
467 src+= step*4; // src points to begin of the 8x8 Block
468 for(y=0; y<8; y++){
469 int numEq= 0;
470
471 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
472 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
473 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
474 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
475 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
476 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
477 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
478 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
479 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
480 if(numEq > c->ppMode.flatnessThreshold){
481 int min, max, x;
482
483 if(src[0] > src[step]){
484 max= src[0];
485 min= src[step];
486 }else{
487 max= src[step];
488 min= src[0];
489 }
490 for(x=2; x<8; x+=2){
491 if(src[x*step] > src[(x+1)*step]){
492 if(src[x *step] > max) max= src[ x *step];
493 if(src[(x+1)*step] < min) min= src[(x+1)*step];
494 }else{
495 if(src[(x+1)*step] > max) max= src[(x+1)*step];
496 if(src[ x *step] < min) min= src[ x *step];
497 }
498 }
499 if(max-min < 2*QP){
c26abfa5
DB
500 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
501 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
bb270c08
DB
502
503 int sums[10];
504 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
505 sums[1] = sums[0] - first + src[3*step];
506 sums[2] = sums[1] - first + src[4*step];
507 sums[3] = sums[2] - first + src[5*step];
508 sums[4] = sums[3] - first + src[6*step];
509 sums[5] = sums[4] - src[0*step] + src[7*step];
510 sums[6] = sums[5] - src[1*step] + last;
511 sums[7] = sums[6] - src[2*step] + last;
512 sums[8] = sums[7] - src[3*step] + last;
513 sums[9] = sums[8] - src[4*step] + last;
514
515 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
516 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
517 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
518 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
519 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
520 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
521 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
522 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
523 }
524 }else{
525 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
526
c26abfa5 527 if(FFABS(middleEnergy) < 8*QP)
bb270c08
DB
528 {
529 const int q=(src[3*step] - src[4*step])/2;
530 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
531 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
532
c26abfa5 533 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
8925915f 534 d= FFMAX(d, 0);
bb270c08
DB
535
536 d= (5*d + 32) >> 6;
02305ff3 537 d*= FFSIGN(-middleEnergy);
bb270c08
DB
538
539 if(q>0)
540 {
541 d= d<0 ? 0 : d;
542 d= d>q ? q : d;
543 }
544 else
545 {
546 d= d>0 ? 0 : d;
547 d= d<q ? q : d;
548 }
549
550 src[3*step]-= d;
551 src[4*step]+= d;
552 }
553 }
554
555 src += stride;
556 }
12eebd26
MN
557/*if(step==16){
558 STOP_TIMER("step16")
559}else{
560 STOP_TIMER("stepX")
561}*/
562}
cc9b0679 563
e89952aa 564//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 565//Plain C versions
e89952aa
MN
566#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
567#define COMPILE_C
568#endif
569
b0ac780a
MN
570#ifdef HAVE_ALTIVEC
571#define COMPILE_ALTIVEC
b0ac780a 572#endif //HAVE_ALTIVEC
b0ac780a 573
3cd52279 574#if defined(ARCH_X86)
e89952aa
MN
575
576#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
577#define COMPILE_MMX
578#endif
579
580#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
581#define COMPILE_MMX2
582#endif
583
584#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
585#define COMPILE_3DNOW
586#endif
3cd52279 587#endif /* defined(ARCH_X86) */
e89952aa
MN
588
589#undef HAVE_MMX
590#undef HAVE_MMX2
591#undef HAVE_3DNOW
b0ac780a 592#undef HAVE_ALTIVEC
e89952aa
MN
593
594#ifdef COMPILE_C
cc9b0679
MN
595#undef HAVE_MMX
596#undef HAVE_MMX2
597#undef HAVE_3DNOW
cc9b0679
MN
598#define RENAME(a) a ## _C
599#include "postprocess_template.c"
e89952aa 600#endif
cc9b0679 601
b0ac780a
MN
602#ifdef COMPILE_ALTIVEC
603#undef RENAME
604#define HAVE_ALTIVEC
605#define RENAME(a) a ## _altivec
606#include "postprocess_altivec_template.c"
607#include "postprocess_template.c"
608#endif
b0ac780a 609
cc9b0679 610//MMX versions
e89952aa 611#ifdef COMPILE_MMX
cc9b0679
MN
612#undef RENAME
613#define HAVE_MMX
614#undef HAVE_MMX2
615#undef HAVE_3DNOW
cc9b0679
MN
616#define RENAME(a) a ## _MMX
617#include "postprocess_template.c"
e89952aa 618#endif
cc9b0679
MN
619
620//MMX2 versions
e89952aa 621#ifdef COMPILE_MMX2
cc9b0679
MN
622#undef RENAME
623#define HAVE_MMX
624#define HAVE_MMX2
625#undef HAVE_3DNOW
cc9b0679
MN
626#define RENAME(a) a ## _MMX2
627#include "postprocess_template.c"
e89952aa 628#endif
cc9b0679
MN
629
630//3DNOW versions
e89952aa 631#ifdef COMPILE_3DNOW
cc9b0679
MN
632#undef RENAME
633#define HAVE_MMX
634#undef HAVE_MMX2
635#define HAVE_3DNOW
cc9b0679
MN
636#define RENAME(a) a ## _3DNow
637#include "postprocess_template.c"
e89952aa 638#endif
cc9b0679 639
755bfeab 640// minor note: the HAVE_xyz is messed up after that line so do not use it.
cc9b0679 641
6c51fd3f
MN
642static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
643 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 644{
bb270c08
DB
645 PPContext *c= (PPContext *)vc;
646 PPMode *ppMode= (PPMode *)vm;
647 c->ppMode= *ppMode; //FIXME
9c9e467d 648
755bfeab 649 // Using ifs here as they are faster than function pointers although the
216d513c 650 // difference would not be measurable here but it is much better because
755bfeab 651 // someone might exchange the CPU whithout restarting MPlayer ;)
e89952aa 652#ifdef RUNTIME_CPUDETECT
3cd52279 653#if defined(ARCH_X86)
216d513c 654 // ordered per speed fastest first
bb270c08
DB
655 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
656 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
658 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
660 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
661 else
662 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 663#else
b0ac780a 664#ifdef HAVE_ALTIVEC
71487254 665 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
bb270c08 666 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
667 else
668#endif
bb270c08 669 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 670#endif
e89952aa
MN
671#else //RUNTIME_CPUDETECT
672#ifdef HAVE_MMX2
bb270c08 673 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 674#elif defined (HAVE_3DNOW)
bb270c08 675 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 676#elif defined (HAVE_MMX)
bb270c08 677 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 678#elif defined (HAVE_ALTIVEC)
bb270c08 679 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 680#else
bb270c08 681 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
682#endif
683#endif //!RUNTIME_CPUDETECT
117e45b0
MN
684}
685
cc9b0679 686//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 687// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 688
911879d1 689/* -pp Command line Help
911879d1 690*/
69fdc40d 691#if LIBPOSTPROC_VERSION_INT < (52<<16)
10ff3ff4 692const char *const pp_help=
69fdc40d
DP
693#else
694const char pp_help[] =
695#endif
bf69c4e5 696"Available postprocessing filters:\n"
bb270c08
DB
697"Filters Options\n"
698"short long name short long option Description\n"
699"* * a autoq CPU power dependent enabler\n"
700" c chrom chrominance filtering enabled\n"
701" y nochrom chrominance filtering disabled\n"
702" n noluma luma filtering disabled\n"
703"hb hdeblock (2 threshold) horizontal deblocking filter\n"
704" 1. difference factor: default=32, higher -> more deblocking\n"
705" 2. flatness threshold: default=39, lower -> more deblocking\n"
706" the h & v deblocking filters share these\n"
707" so you can't set different thresholds for h / v\n"
708"vb vdeblock (2 threshold) vertical deblocking filter\n"
709"ha hadeblock (2 threshold) horizontal deblocking filter\n"
710"va vadeblock (2 threshold) vertical deblocking filter\n"
711"h1 x1hdeblock experimental h deblock filter 1\n"
712"v1 x1vdeblock experimental v deblock filter 1\n"
713"dr dering deringing filter\n"
714"al autolevels automatic brightness / contrast\n"
715" f fullyrange stretch luminance to (0..255)\n"
716"lb linblenddeint linear blend deinterlacer\n"
717"li linipoldeint linear interpolating deinterlace\n"
718"ci cubicipoldeint cubic interpolating deinterlacer\n"
719"md mediandeint median deinterlacer\n"
720"fd ffmpegdeint ffmpeg deinterlacer\n"
721"l5 lowpass5 FIR lowpass deinterlacer\n"
722"de default hb:a,vb:a,dr:a\n"
723"fa fast h1:a,v1:a,dr:a\n"
724"ac ha:a:128:7,va:a,dr:a\n"
725"tn tmpnoise (3 threshold) temporal noise reducer\n"
726" 1. <= 2. <= 3. larger -> stronger filtering\n"
727"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
728"Usage:\n"
729"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
730"long form example:\n"
bb270c08 731"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 732"short form example:\n"
bb270c08 733"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
734"more examples:\n"
735"tn:64:128:256\n"
14b005d0 736"\n"
4b001a13 737;
911879d1 738
ca325343 739pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
911879d1 740{
bb270c08
DB
741 char temp[GET_MODE_BUFFER_SIZE];
742 char *p= temp;
a8988916
DP
743 static const char filterDelimiters[] = ",/";
744 static const char optionDelimiters[] = ":";
bb270c08
DB
745 struct PPMode *ppMode;
746 char *filterToken;
747
6ab6c7c3 748 ppMode= av_malloc(sizeof(PPMode));
bb270c08
DB
749
750 ppMode->lumMode= 0;
751 ppMode->chromMode= 0;
752 ppMode->maxTmpNoise[0]= 700;
753 ppMode->maxTmpNoise[1]= 1500;
754 ppMode->maxTmpNoise[2]= 3000;
755 ppMode->maxAllowedY= 234;
756 ppMode->minAllowedY= 16;
757 ppMode->baseDcDiff= 256/8;
758 ppMode->flatnessThreshold= 56-16-1;
759 ppMode->maxClippedThreshold= 0.01;
760 ppMode->error=0;
761
762 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
763
e7becfb2 764 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
bb270c08
DB
765
766 for(;;){
767 char *filterName;
768 int q= 1000000; //PP_QUALITY_MAX;
769 int chrom=-1;
770 int luma=-1;
771 char *option;
772 char *options[OPTIONS_ARRAY_SIZE];
773 int i;
774 int filterNameOk=0;
775 int numOfUnknownOptions=0;
776 int enable=1; //does the user want us to enabled or disabled the filter
777
778 filterToken= strtok(p, filterDelimiters);
779 if(filterToken == NULL) break;
780 p+= strlen(filterToken) + 1; // p points to next filterToken
781 filterName= strtok(filterToken, optionDelimiters);
e7becfb2 782 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
bb270c08
DB
783
784 if(*filterName == '-')
785 {
786 enable=0;
787 filterName++;
788 }
789
790 for(;;){ //for all options
791 option= strtok(NULL, optionDelimiters);
792 if(option == NULL) break;
793
e7becfb2 794 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
bb270c08
DB
795 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
796 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
797 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
798 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
799 else
800 {
801 options[numOfUnknownOptions] = option;
802 numOfUnknownOptions++;
803 }
804 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
805 }
806 options[numOfUnknownOptions] = NULL;
807
808 /* replace stuff from the replace Table */
809 for(i=0; replaceTable[2*i]!=NULL; i++)
810 {
811 if(!strcmp(replaceTable[2*i], filterName))
812 {
813 int newlen= strlen(replaceTable[2*i + 1]);
814 int plen;
815 int spaceLeft;
816
817 if(p==NULL) p= temp, *p=0; //last filter
818 else p--, *p=','; //not last filter
819
820 plen= strlen(p);
821 spaceLeft= p - temp + plen;
822 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
823 {
824 ppMode->error++;
825 break;
826 }
827 memmove(p + newlen, p, plen+1);
828 memcpy(p, replaceTable[2*i + 1], newlen);
829 filterNameOk=1;
830 }
831 }
832
833 for(i=0; filters[i].shortName!=NULL; i++)
834 {
bb270c08
DB
835 if( !strcmp(filters[i].longName, filterName)
836 || !strcmp(filters[i].shortName, filterName))
837 {
838 ppMode->lumMode &= ~filters[i].mask;
839 ppMode->chromMode &= ~filters[i].mask;
840
841 filterNameOk=1;
842 if(!enable) break; // user wants to disable it
843
844 if(q >= filters[i].minLumQuality && luma)
845 ppMode->lumMode|= filters[i].mask;
846 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
847 if(q >= filters[i].minChromQuality)
848 ppMode->chromMode|= filters[i].mask;
849
850 if(filters[i].mask == LEVEL_FIX)
851 {
852 int o;
853 ppMode->minAllowedY= 16;
854 ppMode->maxAllowedY= 234;
855 for(o=0; options[o]!=NULL; o++)
856 {
857 if( !strcmp(options[o],"fullyrange")
858 ||!strcmp(options[o],"f"))
859 {
860 ppMode->minAllowedY= 0;
861 ppMode->maxAllowedY= 255;
862 numOfUnknownOptions--;
863 }
864 }
865 }
866 else if(filters[i].mask == TEMP_NOISE_FILTER)
867 {
868 int o;
869 int numOfNoises=0;
870
871 for(o=0; options[o]!=NULL; o++)
872 {
873 char *tail;
874 ppMode->maxTmpNoise[numOfNoises]=
875 strtol(options[o], &tail, 0);
876 if(tail!=options[o])
877 {
878 numOfNoises++;
879 numOfUnknownOptions--;
880 if(numOfNoises >= 3) break;
881 }
882 }
883 }
884 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
885 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
886 {
887 int o;
888
889 for(o=0; options[o]!=NULL && o<2; o++)
890 {
891 char *tail;
892 int val= strtol(options[o], &tail, 0);
893 if(tail==options[o]) break;
894
895 numOfUnknownOptions--;
896 if(o==0) ppMode->baseDcDiff= val;
897 else ppMode->flatnessThreshold= val;
898 }
899 }
900 else if(filters[i].mask == FORCE_QUANT)
901 {
902 int o;
903 ppMode->forcedQuant= 15;
904
905 for(o=0; options[o]!=NULL && o<1; o++)
906 {
907 char *tail;
908 int val= strtol(options[o], &tail, 0);
909 if(tail==options[o]) break;
910
911 numOfUnknownOptions--;
912 ppMode->forcedQuant= val;
913 }
914 }
915 }
916 }
917 if(!filterNameOk) ppMode->error++;
918 ppMode->error += numOfUnknownOptions;
919 }
920
e7becfb2 921 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
bb270c08
DB
922 if(ppMode->error)
923 {
e7becfb2 924 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
6ab6c7c3 925 av_free(ppMode);
bb270c08
DB
926 return NULL;
927 }
928 return ppMode;
911879d1
MN
929}
930
c41d972d 931void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 932 av_free(mode);
c41d972d
MN
933}
934
88c0bc7e 935static void reallocAlign(void **p, int alignment, int size){
4851f2ad 936 av_free(*p);
6ab6c7c3 937 *p= av_mallocz(size);
88c0bc7e
MN
938}
939
0426af31 940static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
bb270c08
DB
941 int mbWidth = (width+15)>>4;
942 int mbHeight= (height+15)>>4;
943 int i;
944
945 c->stride= stride;
946 c->qpStride= qpStride;
947
948 reallocAlign((void **)&c->tempDst, 8, stride*24);
949 reallocAlign((void **)&c->tempSrc, 8, stride*24);
950 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
951 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
952 for(i=0; i<256; i++)
953 c->yHistogram[i]= width*height/64*15/256;
954
955 for(i=0; i<3; i++)
956 {
755bfeab 957 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
bb270c08
DB
958 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
959 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
960 }
961
962 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
963 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
964 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
965 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
966}
967
e7becfb2
DB
968static const char * context_to_name(void * ptr) {
969 return "postproc";
970}
971
31bfd6f3 972static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
e7becfb2 973
88c0bc7e 974pp_context_t *pp_get_context(int width, int height, int cpuCaps){
6ab6c7c3 975 PPContext *c= av_malloc(sizeof(PPContext));
bb270c08
DB
976 int stride= (width+15)&(~15); //assumed / will realloc if needed
977 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 978
bb270c08 979 memset(c, 0, sizeof(PPContext));
e7becfb2 980 c->av_class = &av_codec_context_class;
bb270c08
DB
981 c->cpuCaps= cpuCaps;
982 if(cpuCaps&PP_FORMAT){
983 c->hChromaSubSample= cpuCaps&0x3;
984 c->vChromaSubSample= (cpuCaps>>4)&0x3;
985 }else{
986 c->hChromaSubSample= 1;
987 c->vChromaSubSample= 1;
988 }
88c0bc7e 989
bb270c08 990 reallocBuffers(c, width, height, stride, qpStride);
115329f1 991
bb270c08 992 c->frameNum=-1;
45b4f285 993
bb270c08 994 return c;
45b4f285
MN
995}
996
9cb54f43 997void pp_free_context(void *vc){
bb270c08
DB
998 PPContext *c = (PPContext*)vc;
999 int i;
115329f1 1000
6ab6c7c3
LB
1001 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1002 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
115329f1 1003
6ab6c7c3
LB
1004 av_free(c->tempBlocks);
1005 av_free(c->yHistogram);
1006 av_free(c->tempDst);
1007 av_free(c->tempSrc);
1008 av_free(c->deintTemp);
1009 av_free(c->stdQPTable);
1010 av_free(c->nonBQPTable);
1011 av_free(c->forcedQPTable);
115329f1 1012
bb270c08 1013 memset(c, 0, sizeof(PPContext));
88c0bc7e 1014
6ab6c7c3 1015 av_free(c);
9c9e467d
MN
1016}
1017
6c51fd3f
MN
1018void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1019 uint8_t * dst[3], const int dstStride[3],
ec487e5d 1020 int width, int height,
6c51fd3f 1021 const QP_STORE_T *QP_store, int QPStride,
bb270c08 1022 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1023{
bb270c08
DB
1024 int mbWidth = (width+15)>>4;
1025 int mbHeight= (height+15)>>4;
1026 PPMode *mode = (PPMode*)vm;
1027 PPContext *c = (PPContext*)vc;
c26abfa5
DB
1028 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1029 int absQPStride = FFABS(QPStride);
bb270c08
DB
1030
1031 // c->stride and c->QPStride are always positive
1032 if(c->stride < minStride || c->qpStride < absQPStride)
1033 reallocBuffers(c, width, height,
8925915f
DB
1034 FFMAX(minStride, c->stride),
1035 FFMAX(c->qpStride, absQPStride));
bb270c08
DB
1036
1037 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1038 {
1039 int i;
1040 QP_store= c->forcedQPTable;
1041 absQPStride = QPStride = 0;
1042 if(mode->lumMode & FORCE_QUANT)
6c51fd3f 1043 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
bb270c08 1044 else
6c51fd3f 1045 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
bb270c08 1046 }
0426af31 1047
bb270c08
DB
1048 if(pict_type & PP_PICT_TYPE_QP2){
1049 int i;
1050 const int count= mbHeight * absQPStride;
1051 for(i=0; i<(count>>2); i++){
6c51fd3f 1052 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
bb270c08
DB
1053 }
1054 for(i<<=2; i<count; i++){
1055 c->stdQPTable[i] = QP_store[i]>>1;
1056 }
0426af31 1057 QP_store= c->stdQPTable;
bb270c08
DB
1058 QPStride= absQPStride;
1059 }
0426af31 1060
ec487e5d
MN
1061if(0){
1062int x,y;
1063for(y=0; y<mbHeight; y++){
bb270c08 1064 for(x=0; x<mbWidth; x++){
e7becfb2 1065 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
bb270c08 1066 }
e7becfb2 1067 av_log(c, AV_LOG_INFO, "\n");
ec487e5d 1068}
e7becfb2 1069 av_log(c, AV_LOG_INFO, "\n");
ec487e5d 1070}
51e19dcc 1071
bb270c08
DB
1072 if((pict_type&7)!=3)
1073 {
1074 if (QPStride >= 0) {
1075 int i;
1076 const int count= mbHeight * QPStride;
1077 for(i=0; i<(count>>2); i++){
6c51fd3f 1078 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
bb270c08
DB
1079 }
1080 for(i<<=2; i<count; i++){
1081 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1082 }
1083 } else {
1084 int i,j;
1085 for(i=0; i<mbHeight; i++) {
1086 for(j=0; j<absQPStride; j++) {
1087 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1088 }
1089 }
1090 }
1091 }
1092
e7becfb2
DB
1093 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1094 mode->lumMode, mode->chromMode);
bb270c08
DB
1095
1096 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1097 width, height, QP_store, QPStride, 0, mode, c);
1098
1099 width = (width )>>c->hChromaSubSample;
1100 height = (height)>>c->vChromaSubSample;
1101
1102 if(mode->chromMode)
1103 {
1104 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1105 width, height, QP_store, QPStride, 1, mode, c);
1106 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1107 width, height, QP_store, QPStride, 2, mode, c);
1108 }
1109 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1110 {
1111 linecpy(dst[1], src[1], height, srcStride[1]);
1112 linecpy(dst[2], src[2], height, srcStride[2]);
1113 }
1114 else
1115 {
1116 int y;
1117 for(y=0; y<height; y++)
1118 {
1119 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1120 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1121 }
1122 }
911879d1
MN
1123}
1124