spelling
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
755bfeab
DB
50* i do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51# more or less selfinvented filters so the exactness is not too meaningful
3057fa66 52E = Exact implementation
acced553 53e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
6ab6c7c3 77#include "avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
dda87e9f
PL
82#ifdef HAVE_MALLOC_H
83#include <malloc.h>
84#endif
3057fa66 85//#undef HAVE_MMX2
13e00528 86//#define HAVE_3DNOW
3057fa66 87//#undef HAVE_MMX
cc9b0679 88//#undef ARCH_X86
7f16f6e6 89//#define DEBUG_BRIGHTNESS
13e00528 90#include "postprocess.h"
c41d972d 91#include "postprocess_internal.h"
bba9b16c 92
a7b2871c
RD
93#ifdef HAVE_ALTIVEC_H
94#include <altivec.h>
95#endif
96
911879d1
MN
97#define GET_MODE_BUFFER_SIZE 500
98#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
99#define BLOCK_SIZE 8
100#define TEMP_STRIDE 8
101//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 102
3cd52279 103#if defined(ARCH_X86)
0bda7817
RD
104static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
105static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
106static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
107static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
108static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
109static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
110static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
111static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
b28daef8 112#endif
3057fa66 113
134eb1e5
MN
114static uint8_t clip_table[3*256];
115static uint8_t * const clip_tab= clip_table + 256;
116
3f1d4e96 117static const int attribute_used deringThreshold= 20;
3057fa66 118
9c9e467d 119
911879d1
MN
120static struct PPFilter filters[]=
121{
bb270c08
DB
122 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
123 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
124/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
125 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
126 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
127 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
128 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
129 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
130 {"dr", "dering", 1, 5, 6, DERING},
131 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
132 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
133 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
134 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
135 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
136 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
137 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
138 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
139 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
140 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
141};
142
7b49ce2e 143static const char *replaceTable[]=
911879d1 144{
bb270c08
DB
145 "default", "hdeblock:a,vdeblock:a,dering:a",
146 "de", "hdeblock:a,vdeblock:a,dering:a",
147 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
148 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
149 "ac", "ha:a:128:7,va:a,dering:a",
150 NULL //End Marker
911879d1
MN
151};
152
3057fa66 153
3cd52279 154#if defined(ARCH_X86)
3057fa66
A
155static inline void prefetchnta(void *p)
156{
bb270c08
DB
157 asm volatile( "prefetchnta (%0)\n\t"
158 : : "r" (p)
159 );
3057fa66
A
160}
161
162static inline void prefetcht0(void *p)
163{
bb270c08
DB
164 asm volatile( "prefetcht0 (%0)\n\t"
165 : : "r" (p)
166 );
3057fa66
A
167}
168
169static inline void prefetcht1(void *p)
170{
bb270c08
DB
171 asm volatile( "prefetcht1 (%0)\n\t"
172 : : "r" (p)
173 );
3057fa66
A
174}
175
176static inline void prefetcht2(void *p)
177{
bb270c08
DB
178 asm volatile( "prefetcht2 (%0)\n\t"
179 : : "r" (p)
180 );
3057fa66 181}
9a722af7 182#endif
3057fa66 183
cc9b0679 184// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 185
cf5ec61d
MN
186/**
187 * Check if the given 8x8 Block is mostly "flat"
188 */
b0ac780a 189static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 190{
bb270c08
DB
191 int numEq= 0;
192 int y;
193 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
194 const int dcThreshold= dcOffset*2 + 1;
195
196 for(y=0; y<BLOCK_SIZE; y++)
197 {
198 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
199 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
200 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
201 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
202 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
203 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
204 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
205 src+= stride;
206 }
207 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
208}
209
210/**
211 * Check if the middle 8x8 Block in the given 8x16 block is flat
212 */
213static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
214 int numEq= 0;
215 int y;
216 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
217 const int dcThreshold= dcOffset*2 + 1;
218
219 src+= stride*4; // src points to begin of the 8x8 Block
220 for(y=0; y<BLOCK_SIZE-1; y++)
221 {
222 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
226 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
227 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
228 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
229 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
230 src+= stride;
231 }
232 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
233}
234
b0ac780a 235static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 236{
bb270c08 237 int i;
cb482d25 238#if 1
bb270c08
DB
239 for(i=0; i<2; i++){
240 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
241 src += stride;
242 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
243 src += stride;
244 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
245 src += stride;
246 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
247 src += stride;
248 }
115329f1 249#else
bb270c08
DB
250 for(i=0; i<8; i++){
251 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
252 src += stride;
253 }
cb482d25 254#endif
bb270c08 255 return 1;
cb482d25 256}
cf5ec61d 257
cb482d25
MN
258static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
259{
260#if 1
261#if 1
bb270c08
DB
262 int x;
263 src+= stride*4;
264 for(x=0; x<BLOCK_SIZE; x+=4)
265 {
266 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
267 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
268 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
269 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
270 }
cb482d25 271#else
bb270c08
DB
272 int x;
273 src+= stride*3;
274 for(x=0; x<BLOCK_SIZE; x++)
275 {
276 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
277 }
cb482d25 278#endif
bb270c08 279 return 1;
cb482d25 280#else
bb270c08
DB
281 int x;
282 src+= stride*4;
283 for(x=0; x<BLOCK_SIZE; x++)
284 {
285 int min=255;
286 int max=0;
287 int y;
288 for(y=0; y<8; y++){
289 int v= src[x + y*stride];
290 if(v>max) max=v;
291 if(v<min) min=v;
292 }
293 if(max-min > 2*QP) return 0;
294 }
295 return 1;
cb482d25
MN
296#endif
297}
298
b0ac780a 299static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
300 if( isHorizDC_C(src, stride, c) ){
301 if( isHorizMinMaxOk_C(src, stride, c->QP) )
302 return 1;
303 else
304 return 0;
305 }else{
306 return 2;
307 }
b0ac780a
MN
308}
309
cb482d25 310static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
311 if( isVertDC_C(src, stride, c) ){
312 if( isVertMinMaxOk_C(src, stride, c->QP) )
313 return 1;
314 else
315 return 0;
316 }else{
317 return 2;
318 }
cf5ec61d
MN
319}
320
b0ac780a 321static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 322{
bb270c08
DB
323 int y;
324 for(y=0; y<BLOCK_SIZE; y++)
325 {
326 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
327
c26abfa5 328 if(FFABS(middleEnergy) < 8*c->QP)
bb270c08
DB
329 {
330 const int q=(dst[3] - dst[4])/2;
331 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
332 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
333
c26abfa5 334 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
8925915f 335 d= FFMAX(d, 0);
bb270c08
DB
336
337 d= (5*d + 32) >> 6;
02305ff3 338 d*= FFSIGN(-middleEnergy);
bb270c08
DB
339
340 if(q>0)
341 {
342 d= d<0 ? 0 : d;
343 d= d>q ? q : d;
344 }
345 else
346 {
347 d= d>0 ? 0 : d;
348 d= d<q ? q : d;
349 }
350
351 dst[3]-= d;
352 dst[4]+= d;
353 }
354 dst+= stride;
355 }
cf5ec61d
MN
356}
357
358/**
359 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
360 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
361 */
b0ac780a 362static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 363{
bb270c08
DB
364 int y;
365 for(y=0; y<BLOCK_SIZE; y++)
366 {
c26abfa5
DB
367 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
368 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
bb270c08
DB
369
370 int sums[10];
371 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
372 sums[1] = sums[0] - first + dst[3];
373 sums[2] = sums[1] - first + dst[4];
374 sums[3] = sums[2] - first + dst[5];
375 sums[4] = sums[3] - first + dst[6];
376 sums[5] = sums[4] - dst[0] + dst[7];
377 sums[6] = sums[5] - dst[1] + last;
378 sums[7] = sums[6] - dst[2] + last;
379 sums[8] = sums[7] - dst[3] + last;
380 sums[9] = sums[8] - dst[4] + last;
381
382 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
383 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
384 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
385 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
386 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
387 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
388 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
389 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
390
391 dst+= stride;
392 }
cf5ec61d
MN
393}
394
4e4dcbc5 395/**
cc9b0679
MN
396 * Experimental Filter 1 (Horizontal)
397 * will not damage linear gradients
398 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
755bfeab
DB
399 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
400 * MMX2 version does correct clipping C version does not
cc9b0679 401 * not identical with the vertical one
4e4dcbc5 402 */
cc9b0679
MN
403static inline void horizX1Filter(uint8_t *src, int stride, int QP)
404{
bb270c08
DB
405 int y;
406 static uint64_t *lut= NULL;
407 if(lut==NULL)
408 {
409 int i;
6ab6c7c3 410 lut = av_malloc(256*8);
bb270c08
DB
411 for(i=0; i<256; i++)
412 {
413 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 414/*
cc9b0679 415//Simulate 112242211 9-Tap filter
bb270c08
DB
416 uint64_t a= (v/16) & 0xFF;
417 uint64_t b= (v/8) & 0xFF;
418 uint64_t c= (v/4) & 0xFF;
419 uint64_t d= (3*v/8) & 0xFF;
117e45b0 420*/
cc9b0679 421//Simulate piecewise linear interpolation
bb270c08
DB
422 uint64_t a= (v/16) & 0xFF;
423 uint64_t b= (v*3/16) & 0xFF;
424 uint64_t c= (v*5/16) & 0xFF;
425 uint64_t d= (7*v/16) & 0xFF;
426 uint64_t A= (0x100 - a)&0xFF;
427 uint64_t B= (0x100 - b)&0xFF;
428 uint64_t C= (0x100 - c)&0xFF;
429 uint64_t D= (0x100 - c)&0xFF;
430
431 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
432 (D<<24) | (C<<16) | (B<<8) | (A);
433 //lut[i] = (v<<32) | (v<<24);
434 }
435 }
436
437 for(y=0; y<BLOCK_SIZE; y++)
438 {
439 int a= src[1] - src[2];
440 int b= src[3] - src[4];
441 int c= src[5] - src[6];
442
c26abfa5 443 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
bb270c08
DB
444
445 if(d < QP)
446 {
02305ff3 447 int v = d * FFSIGN(-b);
bb270c08
DB
448
449 src[1] +=v/8;
450 src[2] +=v/4;
451 src[3] +=3*v/8;
452 src[4] -=3*v/8;
453 src[5] -=v/4;
454 src[6] -=v/8;
455
456 }
457 src+=stride;
458 }
cc9b0679
MN
459}
460
12eebd26
MN
461/**
462 * accurate deblock filter
463 */
849f1035 464static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
bb270c08
DB
465 int y;
466 const int QP= c->QP;
467 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
468 const int dcThreshold= dcOffset*2 + 1;
12eebd26 469//START_TIMER
bb270c08
DB
470 src+= step*4; // src points to begin of the 8x8 Block
471 for(y=0; y<8; y++){
472 int numEq= 0;
473
474 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
475 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
476 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
477 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
478 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
479 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
480 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
481 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
482 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
483 if(numEq > c->ppMode.flatnessThreshold){
484 int min, max, x;
485
486 if(src[0] > src[step]){
487 max= src[0];
488 min= src[step];
489 }else{
490 max= src[step];
491 min= src[0];
492 }
493 for(x=2; x<8; x+=2){
494 if(src[x*step] > src[(x+1)*step]){
495 if(src[x *step] > max) max= src[ x *step];
496 if(src[(x+1)*step] < min) min= src[(x+1)*step];
497 }else{
498 if(src[(x+1)*step] > max) max= src[(x+1)*step];
499 if(src[ x *step] < min) min= src[ x *step];
500 }
501 }
502 if(max-min < 2*QP){
c26abfa5
DB
503 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
504 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
bb270c08
DB
505
506 int sums[10];
507 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
508 sums[1] = sums[0] - first + src[3*step];
509 sums[2] = sums[1] - first + src[4*step];
510 sums[3] = sums[2] - first + src[5*step];
511 sums[4] = sums[3] - first + src[6*step];
512 sums[5] = sums[4] - src[0*step] + src[7*step];
513 sums[6] = sums[5] - src[1*step] + last;
514 sums[7] = sums[6] - src[2*step] + last;
515 sums[8] = sums[7] - src[3*step] + last;
516 sums[9] = sums[8] - src[4*step] + last;
517
518 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
519 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
520 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
521 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
522 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
523 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
524 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
525 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
526 }
527 }else{
528 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
529
c26abfa5 530 if(FFABS(middleEnergy) < 8*QP)
bb270c08
DB
531 {
532 const int q=(src[3*step] - src[4*step])/2;
533 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
534 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
535
c26abfa5 536 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
8925915f 537 d= FFMAX(d, 0);
bb270c08
DB
538
539 d= (5*d + 32) >> 6;
02305ff3 540 d*= FFSIGN(-middleEnergy);
bb270c08
DB
541
542 if(q>0)
543 {
544 d= d<0 ? 0 : d;
545 d= d>q ? q : d;
546 }
547 else
548 {
549 d= d>0 ? 0 : d;
550 d= d<q ? q : d;
551 }
552
553 src[3*step]-= d;
554 src[4*step]+= d;
555 }
556 }
557
558 src += stride;
559 }
12eebd26
MN
560/*if(step==16){
561 STOP_TIMER("step16")
562}else{
563 STOP_TIMER("stepX")
564}*/
565}
cc9b0679 566
e89952aa 567//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 568//Plain C versions
e89952aa
MN
569#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
570#define COMPILE_C
571#endif
572
b0ac780a
MN
573#ifdef ARCH_POWERPC
574#ifdef HAVE_ALTIVEC
575#define COMPILE_ALTIVEC
b0ac780a
MN
576#endif //HAVE_ALTIVEC
577#endif //ARCH_POWERPC
578
3cd52279 579#if defined(ARCH_X86)
e89952aa
MN
580
581#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
582#define COMPILE_MMX
583#endif
584
585#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
586#define COMPILE_MMX2
587#endif
588
589#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
590#define COMPILE_3DNOW
591#endif
3cd52279 592#endif /* defined(ARCH_X86) */
e89952aa
MN
593
594#undef HAVE_MMX
595#undef HAVE_MMX2
596#undef HAVE_3DNOW
b0ac780a 597#undef HAVE_ALTIVEC
e89952aa
MN
598
599#ifdef COMPILE_C
cc9b0679
MN
600#undef HAVE_MMX
601#undef HAVE_MMX2
602#undef HAVE_3DNOW
cc9b0679
MN
603#define RENAME(a) a ## _C
604#include "postprocess_template.c"
e89952aa 605#endif
cc9b0679 606
b0ac780a
MN
607#ifdef ARCH_POWERPC
608#ifdef COMPILE_ALTIVEC
609#undef RENAME
610#define HAVE_ALTIVEC
611#define RENAME(a) a ## _altivec
612#include "postprocess_altivec_template.c"
613#include "postprocess_template.c"
614#endif
615#endif //ARCH_POWERPC
616
cc9b0679 617//MMX versions
e89952aa 618#ifdef COMPILE_MMX
cc9b0679
MN
619#undef RENAME
620#define HAVE_MMX
621#undef HAVE_MMX2
622#undef HAVE_3DNOW
cc9b0679
MN
623#define RENAME(a) a ## _MMX
624#include "postprocess_template.c"
e89952aa 625#endif
cc9b0679
MN
626
627//MMX2 versions
e89952aa 628#ifdef COMPILE_MMX2
cc9b0679
MN
629#undef RENAME
630#define HAVE_MMX
631#define HAVE_MMX2
632#undef HAVE_3DNOW
cc9b0679
MN
633#define RENAME(a) a ## _MMX2
634#include "postprocess_template.c"
e89952aa 635#endif
cc9b0679
MN
636
637//3DNOW versions
e89952aa 638#ifdef COMPILE_3DNOW
cc9b0679
MN
639#undef RENAME
640#define HAVE_MMX
641#undef HAVE_MMX2
642#define HAVE_3DNOW
cc9b0679
MN
643#define RENAME(a) a ## _3DNow
644#include "postprocess_template.c"
e89952aa 645#endif
cc9b0679 646
755bfeab 647// minor note: the HAVE_xyz is messed up after that line so do not use it.
cc9b0679
MN
648
649static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 650 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 651{
bb270c08
DB
652 PPContext *c= (PPContext *)vc;
653 PPMode *ppMode= (PPMode *)vm;
654 c->ppMode= *ppMode; //FIXME
9c9e467d 655
755bfeab
DB
656 // Using ifs here as they are faster than function pointers although the
657 // difference would not be measureable here but it is much better because
658 // someone might exchange the CPU whithout restarting MPlayer ;)
e89952aa 659#ifdef RUNTIME_CPUDETECT
3cd52279 660#if defined(ARCH_X86)
bb270c08
DB
661 // ordered per speed fasterst first
662 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
663 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
665 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
667 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668 else
669 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 670#else
b0ac780a
MN
671#ifdef ARCH_POWERPC
672#ifdef HAVE_ALTIVEC
71487254 673 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
bb270c08 674 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
675 else
676#endif
677#endif
bb270c08 678 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 679#endif
e89952aa
MN
680#else //RUNTIME_CPUDETECT
681#ifdef HAVE_MMX2
bb270c08 682 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 683#elif defined (HAVE_3DNOW)
bb270c08 684 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 685#elif defined (HAVE_MMX)
bb270c08 686 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 687#elif defined (HAVE_ALTIVEC)
bb270c08 688 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 689#else
bb270c08 690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
691#endif
692#endif //!RUNTIME_CPUDETECT
117e45b0
MN
693}
694
cc9b0679 695//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 696// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 697
911879d1 698/* -pp Command line Help
911879d1 699*/
4407a3c4 700char *pp_help=
bf69c4e5 701"Available postprocessing filters:\n"
bb270c08
DB
702"Filters Options\n"
703"short long name short long option Description\n"
704"* * a autoq CPU power dependent enabler\n"
705" c chrom chrominance filtering enabled\n"
706" y nochrom chrominance filtering disabled\n"
707" n noluma luma filtering disabled\n"
708"hb hdeblock (2 threshold) horizontal deblocking filter\n"
709" 1. difference factor: default=32, higher -> more deblocking\n"
710" 2. flatness threshold: default=39, lower -> more deblocking\n"
711" the h & v deblocking filters share these\n"
712" so you can't set different thresholds for h / v\n"
713"vb vdeblock (2 threshold) vertical deblocking filter\n"
714"ha hadeblock (2 threshold) horizontal deblocking filter\n"
715"va vadeblock (2 threshold) vertical deblocking filter\n"
716"h1 x1hdeblock experimental h deblock filter 1\n"
717"v1 x1vdeblock experimental v deblock filter 1\n"
718"dr dering deringing filter\n"
719"al autolevels automatic brightness / contrast\n"
720" f fullyrange stretch luminance to (0..255)\n"
721"lb linblenddeint linear blend deinterlacer\n"
722"li linipoldeint linear interpolating deinterlace\n"
723"ci cubicipoldeint cubic interpolating deinterlacer\n"
724"md mediandeint median deinterlacer\n"
725"fd ffmpegdeint ffmpeg deinterlacer\n"
726"l5 lowpass5 FIR lowpass deinterlacer\n"
727"de default hb:a,vb:a,dr:a\n"
728"fa fast h1:a,v1:a,dr:a\n"
729"ac ha:a:128:7,va:a,dr:a\n"
730"tn tmpnoise (3 threshold) temporal noise reducer\n"
731" 1. <= 2. <= 3. larger -> stronger filtering\n"
732"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
733"Usage:\n"
734"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
735"long form example:\n"
bb270c08 736"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 737"short form example:\n"
bb270c08 738"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
739"more examples:\n"
740"tn:64:128:256\n"
14b005d0 741"\n"
4b001a13 742;
911879d1 743
c41d972d 744pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1 745{
bb270c08
DB
746 char temp[GET_MODE_BUFFER_SIZE];
747 char *p= temp;
7b49ce2e
SH
748 const char *filterDelimiters= ",/";
749 const char *optionDelimiters= ":";
bb270c08
DB
750 struct PPMode *ppMode;
751 char *filterToken;
752
6ab6c7c3 753 ppMode= av_malloc(sizeof(PPMode));
bb270c08
DB
754
755 ppMode->lumMode= 0;
756 ppMode->chromMode= 0;
757 ppMode->maxTmpNoise[0]= 700;
758 ppMode->maxTmpNoise[1]= 1500;
759 ppMode->maxTmpNoise[2]= 3000;
760 ppMode->maxAllowedY= 234;
761 ppMode->minAllowedY= 16;
762 ppMode->baseDcDiff= 256/8;
763 ppMode->flatnessThreshold= 56-16-1;
764 ppMode->maxClippedThreshold= 0.01;
765 ppMode->error=0;
766
767 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
768
e7becfb2 769 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
bb270c08
DB
770
771 for(;;){
772 char *filterName;
773 int q= 1000000; //PP_QUALITY_MAX;
774 int chrom=-1;
775 int luma=-1;
776 char *option;
777 char *options[OPTIONS_ARRAY_SIZE];
778 int i;
779 int filterNameOk=0;
780 int numOfUnknownOptions=0;
781 int enable=1; //does the user want us to enabled or disabled the filter
782
783 filterToken= strtok(p, filterDelimiters);
784 if(filterToken == NULL) break;
785 p+= strlen(filterToken) + 1; // p points to next filterToken
786 filterName= strtok(filterToken, optionDelimiters);
e7becfb2 787 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
bb270c08
DB
788
789 if(*filterName == '-')
790 {
791 enable=0;
792 filterName++;
793 }
794
795 for(;;){ //for all options
796 option= strtok(NULL, optionDelimiters);
797 if(option == NULL) break;
798
e7becfb2 799 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
bb270c08
DB
800 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
801 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
802 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
803 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
804 else
805 {
806 options[numOfUnknownOptions] = option;
807 numOfUnknownOptions++;
808 }
809 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
810 }
811 options[numOfUnknownOptions] = NULL;
812
813 /* replace stuff from the replace Table */
814 for(i=0; replaceTable[2*i]!=NULL; i++)
815 {
816 if(!strcmp(replaceTable[2*i], filterName))
817 {
818 int newlen= strlen(replaceTable[2*i + 1]);
819 int plen;
820 int spaceLeft;
821
822 if(p==NULL) p= temp, *p=0; //last filter
823 else p--, *p=','; //not last filter
824
825 plen= strlen(p);
826 spaceLeft= p - temp + plen;
827 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
828 {
829 ppMode->error++;
830 break;
831 }
832 memmove(p + newlen, p, plen+1);
833 memcpy(p, replaceTable[2*i + 1], newlen);
834 filterNameOk=1;
835 }
836 }
837
838 for(i=0; filters[i].shortName!=NULL; i++)
839 {
bb270c08
DB
840 if( !strcmp(filters[i].longName, filterName)
841 || !strcmp(filters[i].shortName, filterName))
842 {
843 ppMode->lumMode &= ~filters[i].mask;
844 ppMode->chromMode &= ~filters[i].mask;
845
846 filterNameOk=1;
847 if(!enable) break; // user wants to disable it
848
849 if(q >= filters[i].minLumQuality && luma)
850 ppMode->lumMode|= filters[i].mask;
851 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
852 if(q >= filters[i].minChromQuality)
853 ppMode->chromMode|= filters[i].mask;
854
855 if(filters[i].mask == LEVEL_FIX)
856 {
857 int o;
858 ppMode->minAllowedY= 16;
859 ppMode->maxAllowedY= 234;
860 for(o=0; options[o]!=NULL; o++)
861 {
862 if( !strcmp(options[o],"fullyrange")
863 ||!strcmp(options[o],"f"))
864 {
865 ppMode->minAllowedY= 0;
866 ppMode->maxAllowedY= 255;
867 numOfUnknownOptions--;
868 }
869 }
870 }
871 else if(filters[i].mask == TEMP_NOISE_FILTER)
872 {
873 int o;
874 int numOfNoises=0;
875
876 for(o=0; options[o]!=NULL; o++)
877 {
878 char *tail;
879 ppMode->maxTmpNoise[numOfNoises]=
880 strtol(options[o], &tail, 0);
881 if(tail!=options[o])
882 {
883 numOfNoises++;
884 numOfUnknownOptions--;
885 if(numOfNoises >= 3) break;
886 }
887 }
888 }
889 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
890 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
891 {
892 int o;
893
894 for(o=0; options[o]!=NULL && o<2; o++)
895 {
896 char *tail;
897 int val= strtol(options[o], &tail, 0);
898 if(tail==options[o]) break;
899
900 numOfUnknownOptions--;
901 if(o==0) ppMode->baseDcDiff= val;
902 else ppMode->flatnessThreshold= val;
903 }
904 }
905 else if(filters[i].mask == FORCE_QUANT)
906 {
907 int o;
908 ppMode->forcedQuant= 15;
909
910 for(o=0; options[o]!=NULL && o<1; o++)
911 {
912 char *tail;
913 int val= strtol(options[o], &tail, 0);
914 if(tail==options[o]) break;
915
916 numOfUnknownOptions--;
917 ppMode->forcedQuant= val;
918 }
919 }
920 }
921 }
922 if(!filterNameOk) ppMode->error++;
923 ppMode->error += numOfUnknownOptions;
924 }
925
e7becfb2 926 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
bb270c08
DB
927 if(ppMode->error)
928 {
e7becfb2 929 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
6ab6c7c3 930 av_free(ppMode);
bb270c08
DB
931 return NULL;
932 }
933 return ppMode;
911879d1
MN
934}
935
c41d972d 936void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 937 av_free(mode);
c41d972d
MN
938}
939
88c0bc7e 940static void reallocAlign(void **p, int alignment, int size){
4851f2ad 941 av_free(*p);
6ab6c7c3 942 *p= av_mallocz(size);
88c0bc7e
MN
943}
944
0426af31 945static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
bb270c08
DB
946 int mbWidth = (width+15)>>4;
947 int mbHeight= (height+15)>>4;
948 int i;
949
950 c->stride= stride;
951 c->qpStride= qpStride;
952
953 reallocAlign((void **)&c->tempDst, 8, stride*24);
954 reallocAlign((void **)&c->tempSrc, 8, stride*24);
955 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
956 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
957 for(i=0; i<256; i++)
958 c->yHistogram[i]= width*height/64*15/256;
959
960 for(i=0; i<3; i++)
961 {
755bfeab 962 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
bb270c08
DB
963 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
964 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
965 }
966
967 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
968 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
969 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
970 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
971}
972
4cfbf61b 973static void global_init(void){
bb270c08
DB
974 int i;
975 memset(clip_table, 0, 256);
976 for(i=256; i<512; i++)
977 clip_table[i]= i;
978 memset(clip_table+512, 0, 256);
134eb1e5
MN
979}
980
e7becfb2
DB
981static const char * context_to_name(void * ptr) {
982 return "postproc";
983}
984
985static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
986
88c0bc7e 987pp_context_t *pp_get_context(int width, int height, int cpuCaps){
6ab6c7c3 988 PPContext *c= av_malloc(sizeof(PPContext));
bb270c08
DB
989 int stride= (width+15)&(~15); //assumed / will realloc if needed
990 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 991
bb270c08 992 global_init();
134eb1e5 993
bb270c08 994 memset(c, 0, sizeof(PPContext));
e7becfb2 995 c->av_class = &av_codec_context_class;
bb270c08
DB
996 c->cpuCaps= cpuCaps;
997 if(cpuCaps&PP_FORMAT){
998 c->hChromaSubSample= cpuCaps&0x3;
999 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1000 }else{
1001 c->hChromaSubSample= 1;
1002 c->vChromaSubSample= 1;
1003 }
88c0bc7e 1004
bb270c08 1005 reallocBuffers(c, width, height, stride, qpStride);
115329f1 1006
bb270c08 1007 c->frameNum=-1;
45b4f285 1008
bb270c08 1009 return c;
45b4f285
MN
1010}
1011
9cb54f43 1012void pp_free_context(void *vc){
bb270c08
DB
1013 PPContext *c = (PPContext*)vc;
1014 int i;
115329f1 1015
6ab6c7c3
LB
1016 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1017 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
115329f1 1018
6ab6c7c3
LB
1019 av_free(c->tempBlocks);
1020 av_free(c->yHistogram);
1021 av_free(c->tempDst);
1022 av_free(c->tempSrc);
1023 av_free(c->deintTemp);
1024 av_free(c->stdQPTable);
1025 av_free(c->nonBQPTable);
1026 av_free(c->forcedQPTable);
115329f1 1027
bb270c08 1028 memset(c, 0, sizeof(PPContext));
88c0bc7e 1029
6ab6c7c3 1030 av_free(c);
9c9e467d
MN
1031}
1032
9cb54f43 1033void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1034 uint8_t * dst[3], int dstStride[3],
ec487e5d 1035 int width, int height,
9c9e467d 1036 QP_STORE_T *QP_store, int QPStride,
bb270c08 1037 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1038{
bb270c08
DB
1039 int mbWidth = (width+15)>>4;
1040 int mbHeight= (height+15)>>4;
1041 PPMode *mode = (PPMode*)vm;
1042 PPContext *c = (PPContext*)vc;
c26abfa5
DB
1043 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1044 int absQPStride = FFABS(QPStride);
bb270c08
DB
1045
1046 // c->stride and c->QPStride are always positive
1047 if(c->stride < minStride || c->qpStride < absQPStride)
1048 reallocBuffers(c, width, height,
8925915f
DB
1049 FFMAX(minStride, c->stride),
1050 FFMAX(c->qpStride, absQPStride));
bb270c08
DB
1051
1052 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1053 {
1054 int i;
1055 QP_store= c->forcedQPTable;
1056 absQPStride = QPStride = 0;
1057 if(mode->lumMode & FORCE_QUANT)
1058 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1059 else
1060 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1061 }
0426af31 1062
bb270c08
DB
1063 if(pict_type & PP_PICT_TYPE_QP2){
1064 int i;
1065 const int count= mbHeight * absQPStride;
1066 for(i=0; i<(count>>2); i++){
1067 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1068 }
1069 for(i<<=2; i<count; i++){
1070 c->stdQPTable[i] = QP_store[i]>>1;
1071 }
0426af31 1072 QP_store= c->stdQPTable;
bb270c08
DB
1073 QPStride= absQPStride;
1074 }
0426af31 1075
ec487e5d
MN
1076if(0){
1077int x,y;
1078for(y=0; y<mbHeight; y++){
bb270c08 1079 for(x=0; x<mbWidth; x++){
e7becfb2 1080 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
bb270c08 1081 }
e7becfb2 1082 av_log(c, AV_LOG_INFO, "\n");
ec487e5d 1083}
e7becfb2 1084 av_log(c, AV_LOG_INFO, "\n");
ec487e5d 1085}
51e19dcc 1086
bb270c08
DB
1087 if((pict_type&7)!=3)
1088 {
1089 if (QPStride >= 0) {
1090 int i;
1091 const int count= mbHeight * QPStride;
1092 for(i=0; i<(count>>2); i++){
1093 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1094 }
1095 for(i<<=2; i<count; i++){
1096 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1097 }
1098 } else {
1099 int i,j;
1100 for(i=0; i<mbHeight; i++) {
1101 for(j=0; j<absQPStride; j++) {
1102 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1103 }
1104 }
1105 }
1106 }
1107
e7becfb2
DB
1108 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1109 mode->lumMode, mode->chromMode);
bb270c08
DB
1110
1111 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1112 width, height, QP_store, QPStride, 0, mode, c);
1113
1114 width = (width )>>c->hChromaSubSample;
1115 height = (height)>>c->vChromaSubSample;
1116
1117 if(mode->chromMode)
1118 {
1119 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1120 width, height, QP_store, QPStride, 1, mode, c);
1121 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1122 width, height, QP_store, QPStride, 2, mode, c);
1123 }
1124 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1125 {
1126 linecpy(dst[1], src[1], height, srcStride[1]);
1127 linecpy(dst[2], src[2], height, srcStride[2]);
1128 }
1129 else
1130 {
1131 int y;
1132 for(y=0; y<height; y++)
1133 {
1134 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1135 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1136 }
1137 }
911879d1
MN
1138}
1139