Clarify comment about parity nibble.
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
2cab6401 50* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
755bfeab 51# more or less selfinvented filters so the exactness is not too meaningful
3057fa66 52E = Exact implementation
04932b0d 53e = almost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
245976da 77#include "libavutil/avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
dda87e9f
PL
82#ifdef HAVE_MALLOC_H
83#include <malloc.h>
84#endif
3057fa66 85//#undef HAVE_MMX2
13e00528 86//#define HAVE_3DNOW
3057fa66 87//#undef HAVE_MMX
cc9b0679 88//#undef ARCH_X86
7f16f6e6 89//#define DEBUG_BRIGHTNESS
13e00528 90#include "postprocess.h"
c41d972d 91#include "postprocess_internal.h"
bba9b16c 92
a7b2871c
RD
93#ifdef HAVE_ALTIVEC_H
94#include <altivec.h>
95#endif
96
911879d1
MN
97#define GET_MODE_BUFFER_SIZE 500
98#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
99#define BLOCK_SIZE 8
100#define TEMP_STRIDE 8
101//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 102
3cd52279 103#if defined(ARCH_X86)
2b858d0b
RD
104DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
105DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
106DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
107DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
108DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
109DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
110DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
111DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
b28daef8 112#endif
3057fa66 113
2722e362 114DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
3057fa66 115
9c9e467d 116
911879d1
MN
117static struct PPFilter filters[]=
118{
16e0bf73
DB
119 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
120 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
121/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
122 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
123 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
124 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
125 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
126 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
127 {"dr", "dering", 1, 5, 6, DERING},
128 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
129 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
130 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
131 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
132 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
133 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
134 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
135 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
136 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
137 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
138};
139
7b49ce2e 140static const char *replaceTable[]=
911879d1 141{
16e0bf73
DB
142 "default", "hb:a,vb:a,dr:a",
143 "de", "hb:a,vb:a,dr:a",
144 "fast", "h1:a,v1:a,dr:a",
145 "fa", "h1:a,v1:a,dr:a",
146 "ac", "ha:a:128:7,va:a,dr:a",
147 NULL //End Marker
911879d1
MN
148};
149
3057fa66 150
3cd52279 151#if defined(ARCH_X86)
3057fa66
A
152static inline void prefetchnta(void *p)
153{
16e0bf73
DB
154 asm volatile( "prefetchnta (%0)\n\t"
155 : : "r" (p)
156 );
3057fa66
A
157}
158
159static inline void prefetcht0(void *p)
160{
16e0bf73
DB
161 asm volatile( "prefetcht0 (%0)\n\t"
162 : : "r" (p)
163 );
3057fa66
A
164}
165
166static inline void prefetcht1(void *p)
167{
16e0bf73
DB
168 asm volatile( "prefetcht1 (%0)\n\t"
169 : : "r" (p)
170 );
3057fa66
A
171}
172
173static inline void prefetcht2(void *p)
174{
16e0bf73
DB
175 asm volatile( "prefetcht2 (%0)\n\t"
176 : : "r" (p)
177 );
3057fa66 178}
9a722af7 179#endif
3057fa66 180
04932b0d
DB
181/* The horizontal functions exist only in C because the MMX
182 * code is faster with vertical filters and transposing. */
3057fa66 183
cf5ec61d
MN
184/**
185 * Check if the given 8x8 Block is mostly "flat"
186 */
b0ac780a 187static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 188{
16e0bf73
DB
189 int numEq= 0;
190 int y;
191 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
192 const int dcThreshold= dcOffset*2 + 1;
193
194 for(y=0; y<BLOCK_SIZE; y++){
195 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
196 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
197 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
198 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
199 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
200 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
201 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
202 src+= stride;
203 }
204 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
205}
206
207/**
208 * Check if the middle 8x8 Block in the given 8x16 block is flat
209 */
16e0bf73
DB
210static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
211{
212 int numEq= 0;
213 int y;
214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
215 const int dcThreshold= dcOffset*2 + 1;
216
217 src+= stride*4; // src points to begin of the 8x8 Block
218 for(y=0; y<BLOCK_SIZE-1; y++){
219 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
220 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
221 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
222 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
223 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
224 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
226 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
227 src+= stride;
228 }
229 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
230}
231
b0ac780a 232static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 233{
16e0bf73 234 int i;
cb482d25 235#if 1
16e0bf73
DB
236 for(i=0; i<2; i++){
237 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
238 src += stride;
239 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
240 src += stride;
241 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
242 src += stride;
243 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
244 src += stride;
245 }
115329f1 246#else
16e0bf73
DB
247 for(i=0; i<8; i++){
248 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
249 src += stride;
250 }
cb482d25 251#endif
16e0bf73 252 return 1;
cb482d25 253}
cf5ec61d 254
cb482d25
MN
255static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
256{
257#if 1
258#if 1
16e0bf73
DB
259 int x;
260 src+= stride*4;
261 for(x=0; x<BLOCK_SIZE; x+=4){
262 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
263 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
264 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
265 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
266 }
cb482d25 267#else
16e0bf73
DB
268 int x;
269 src+= stride*3;
270 for(x=0; x<BLOCK_SIZE; x++){
271 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
272 }
cb482d25 273#endif
16e0bf73 274 return 1;
cb482d25 275#else
16e0bf73
DB
276 int x;
277 src+= stride*4;
278 for(x=0; x<BLOCK_SIZE; x++){
279 int min=255;
280 int max=0;
281 int y;
282 for(y=0; y<8; y++){
283 int v= src[x + y*stride];
284 if(v>max) max=v;
285 if(v<min) min=v;
bb270c08 286 }
16e0bf73
DB
287 if(max-min > 2*QP) return 0;
288 }
289 return 1;
cb482d25
MN
290#endif
291}
292
16e0bf73
DB
293static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
294{
295 if( isHorizDC_C(src, stride, c) ){
296 if( isHorizMinMaxOk_C(src, stride, c->QP) )
297 return 1;
298 else
299 return 0;
300 }else{
301 return 2;
302 }
b0ac780a
MN
303}
304
16e0bf73
DB
305static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
306{
307 if( isVertDC_C(src, stride, c) ){
308 if( isVertMinMaxOk_C(src, stride, c->QP) )
309 return 1;
310 else
311 return 0;
312 }else{
313 return 2;
314 }
cf5ec61d
MN
315}
316
b0ac780a 317static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 318{
16e0bf73
DB
319 int y;
320 for(y=0; y<BLOCK_SIZE; y++){
321 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
322
323 if(FFABS(middleEnergy) < 8*c->QP){
324 const int q=(dst[3] - dst[4])/2;
325 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
326 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
327
328 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
329 d= FFMAX(d, 0);
330
331 d= (5*d + 32) >> 6;
332 d*= FFSIGN(-middleEnergy);
333
334 if(q>0)
335 {
336 d= d<0 ? 0 : d;
337 d= d>q ? q : d;
338 }
339 else
340 {
341 d= d>0 ? 0 : d;
342 d= d<q ? q : d;
343 }
344
345 dst[3]-= d;
346 dst[4]+= d;
bb270c08 347 }
16e0bf73
DB
348 dst+= stride;
349 }
cf5ec61d
MN
350}
351
352/**
353 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
354 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
355 */
b0ac780a 356static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 357{
16e0bf73
DB
358 int y;
359 for(y=0; y<BLOCK_SIZE; y++){
360 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
361 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
362
363 int sums[10];
364 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
365 sums[1] = sums[0] - first + dst[3];
366 sums[2] = sums[1] - first + dst[4];
367 sums[3] = sums[2] - first + dst[5];
368 sums[4] = sums[3] - first + dst[6];
369 sums[5] = sums[4] - dst[0] + dst[7];
370 sums[6] = sums[5] - dst[1] + last;
371 sums[7] = sums[6] - dst[2] + last;
372 sums[8] = sums[7] - dst[3] + last;
373 sums[9] = sums[8] - dst[4] + last;
374
375 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
376 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
377 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
378 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
379 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
380 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
381 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
382 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
383
384 dst+= stride;
385 }
cf5ec61d
MN
386}
387
4e4dcbc5 388/**
cc9b0679
MN
389 * Experimental Filter 1 (Horizontal)
390 * will not damage linear gradients
bd107136 391 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
755bfeab
DB
392 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
393 * MMX2 version does correct clipping C version does not
cc9b0679 394 * not identical with the vertical one
4e4dcbc5 395 */
cc9b0679
MN
396static inline void horizX1Filter(uint8_t *src, int stride, int QP)
397{
16e0bf73
DB
398 int y;
399 static uint64_t *lut= NULL;
400 if(lut==NULL)
401 {
402 int i;
403 lut = av_malloc(256*8);
404 for(i=0; i<256; i++)
bb270c08 405 {
16e0bf73 406 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 407/*
cc9b0679 408//Simulate 112242211 9-Tap filter
16e0bf73
DB
409 uint64_t a= (v/16) & 0xFF;
410 uint64_t b= (v/8) & 0xFF;
411 uint64_t c= (v/4) & 0xFF;
412 uint64_t d= (3*v/8) & 0xFF;
117e45b0 413*/
cc9b0679 414//Simulate piecewise linear interpolation
16e0bf73
DB
415 uint64_t a= (v/16) & 0xFF;
416 uint64_t b= (v*3/16) & 0xFF;
417 uint64_t c= (v*5/16) & 0xFF;
418 uint64_t d= (7*v/16) & 0xFF;
419 uint64_t A= (0x100 - a)&0xFF;
420 uint64_t B= (0x100 - b)&0xFF;
421 uint64_t C= (0x100 - c)&0xFF;
422 uint64_t D= (0x100 - c)&0xFF;
423
424 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
425 (D<<24) | (C<<16) | (B<<8) | (A);
426 //lut[i] = (v<<32) | (v<<24);
bb270c08 427 }
16e0bf73 428 }
bb270c08 429
16e0bf73
DB
430 for(y=0; y<BLOCK_SIZE; y++){
431 int a= src[1] - src[2];
432 int b= src[3] - src[4];
433 int c= src[5] - src[6];
bb270c08 434
16e0bf73 435 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
bb270c08 436
16e0bf73
DB
437 if(d < QP){
438 int v = d * FFSIGN(-b);
bb270c08 439
16e0bf73
DB
440 src[1] +=v/8;
441 src[2] +=v/4;
442 src[3] +=3*v/8;
443 src[4] -=3*v/8;
444 src[5] -=v/4;
445 src[6] -=v/8;
bb270c08 446 }
16e0bf73
DB
447 src+=stride;
448 }
cc9b0679
MN
449}
450
12eebd26
MN
451/**
452 * accurate deblock filter
453 */
849f1035 454static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
16e0bf73
DB
455 int y;
456 const int QP= c->QP;
457 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
458 const int dcThreshold= dcOffset*2 + 1;
12eebd26 459//START_TIMER
16e0bf73
DB
460 src+= step*4; // src points to begin of the 8x8 Block
461 for(y=0; y<8; y++){
462 int numEq= 0;
463
464 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
465 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
466 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
467 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
468 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
469 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
470 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
471 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
472 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
473 if(numEq > c->ppMode.flatnessThreshold){
474 int min, max, x;
475
476 if(src[0] > src[step]){
477 max= src[0];
478 min= src[step];
479 }else{
480 max= src[step];
481 min= src[0];
482 }
483 for(x=2; x<8; x+=2){
484 if(src[x*step] > src[(x+1)*step]){
485 if(src[x *step] > max) max= src[ x *step];
486 if(src[(x+1)*step] < min) min= src[(x+1)*step];
bb270c08 487 }else{
16e0bf73
DB
488 if(src[(x+1)*step] > max) max= src[(x+1)*step];
489 if(src[ x *step] < min) min= src[ x *step];
490 }
491 }
492 if(max-min < 2*QP){
493 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
494 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
495
496 int sums[10];
497 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
498 sums[1] = sums[0] - first + src[3*step];
499 sums[2] = sums[1] - first + src[4*step];
500 sums[3] = sums[2] - first + src[5*step];
501 sums[4] = sums[3] - first + src[6*step];
502 sums[5] = sums[4] - src[0*step] + src[7*step];
503 sums[6] = sums[5] - src[1*step] + last;
504 sums[7] = sums[6] - src[2*step] + last;
505 sums[8] = sums[7] - src[3*step] + last;
506 sums[9] = sums[8] - src[4*step] + last;
507
508 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
509 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
510 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
511 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
512 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
513 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
514 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
515 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
516 }
517 }else{
518 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
519
520 if(FFABS(middleEnergy) < 8*QP){
521 const int q=(src[3*step] - src[4*step])/2;
522 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
523 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
524
525 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
526 d= FFMAX(d, 0);
527
528 d= (5*d + 32) >> 6;
529 d*= FFSIGN(-middleEnergy);
530
531 if(q>0){
532 d= d<0 ? 0 : d;
533 d= d>q ? q : d;
534 }else{
535 d= d>0 ? 0 : d;
536 d= d<q ? q : d;
bb270c08
DB
537 }
538
16e0bf73
DB
539 src[3*step]-= d;
540 src[4*step]+= d;
541 }
bb270c08 542 }
16e0bf73
DB
543
544 src += stride;
545 }
12eebd26
MN
546/*if(step==16){
547 STOP_TIMER("step16")
548}else{
549 STOP_TIMER("stepX")
550}*/
551}
cc9b0679 552
e89952aa 553//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 554//Plain C versions
e89952aa
MN
555#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
556#define COMPILE_C
557#endif
558
b0ac780a
MN
559#ifdef HAVE_ALTIVEC
560#define COMPILE_ALTIVEC
b0ac780a 561#endif //HAVE_ALTIVEC
b0ac780a 562
3cd52279 563#if defined(ARCH_X86)
e89952aa
MN
564
565#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
566#define COMPILE_MMX
567#endif
568
569#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
570#define COMPILE_MMX2
571#endif
572
573#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
574#define COMPILE_3DNOW
575#endif
3cd52279 576#endif /* defined(ARCH_X86) */
e89952aa
MN
577
578#undef HAVE_MMX
579#undef HAVE_MMX2
580#undef HAVE_3DNOW
b0ac780a 581#undef HAVE_ALTIVEC
e89952aa
MN
582
583#ifdef COMPILE_C
cc9b0679
MN
584#undef HAVE_MMX
585#undef HAVE_MMX2
586#undef HAVE_3DNOW
cc9b0679
MN
587#define RENAME(a) a ## _C
588#include "postprocess_template.c"
e89952aa 589#endif
cc9b0679 590
b0ac780a
MN
591#ifdef COMPILE_ALTIVEC
592#undef RENAME
593#define HAVE_ALTIVEC
594#define RENAME(a) a ## _altivec
595#include "postprocess_altivec_template.c"
596#include "postprocess_template.c"
597#endif
b0ac780a 598
cc9b0679 599//MMX versions
e89952aa 600#ifdef COMPILE_MMX
cc9b0679
MN
601#undef RENAME
602#define HAVE_MMX
603#undef HAVE_MMX2
604#undef HAVE_3DNOW
cc9b0679
MN
605#define RENAME(a) a ## _MMX
606#include "postprocess_template.c"
e89952aa 607#endif
cc9b0679
MN
608
609//MMX2 versions
e89952aa 610#ifdef COMPILE_MMX2
cc9b0679
MN
611#undef RENAME
612#define HAVE_MMX
613#define HAVE_MMX2
614#undef HAVE_3DNOW
cc9b0679
MN
615#define RENAME(a) a ## _MMX2
616#include "postprocess_template.c"
e89952aa 617#endif
cc9b0679
MN
618
619//3DNOW versions
e89952aa 620#ifdef COMPILE_3DNOW
cc9b0679
MN
621#undef RENAME
622#define HAVE_MMX
623#undef HAVE_MMX2
624#define HAVE_3DNOW
cc9b0679
MN
625#define RENAME(a) a ## _3DNow
626#include "postprocess_template.c"
e89952aa 627#endif
cc9b0679 628
755bfeab 629// minor note: the HAVE_xyz is messed up after that line so do not use it.
cc9b0679 630
6c51fd3f
MN
631static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
632 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 633{
16e0bf73
DB
634 PPContext *c= (PPContext *)vc;
635 PPMode *ppMode= (PPMode *)vm;
636 c->ppMode= *ppMode; //FIXME
9c9e467d 637
16e0bf73
DB
638 // Using ifs here as they are faster than function pointers although the
639 // difference would not be measurable here but it is much better because
640 // someone might exchange the CPU whithout restarting MPlayer ;)
e89952aa 641#ifdef RUNTIME_CPUDETECT
3cd52279 642#if defined(ARCH_X86)
16e0bf73
DB
643 // ordered per speed fastest first
644 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
645 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
646 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
647 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
648 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
649 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
650 else
651 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 652#else
b0ac780a 653#ifdef HAVE_ALTIVEC
16e0bf73
DB
654 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
655 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
656 else
b0ac780a 657#endif
16e0bf73 658 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 659#endif
e89952aa
MN
660#else //RUNTIME_CPUDETECT
661#ifdef HAVE_MMX2
16e0bf73 662 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 663#elif defined (HAVE_3DNOW)
16e0bf73 664 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 665#elif defined (HAVE_MMX)
16e0bf73 666 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 667#elif defined (HAVE_ALTIVEC)
16e0bf73 668 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 669#else
16e0bf73 670 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
671#endif
672#endif //!RUNTIME_CPUDETECT
117e45b0
MN
673}
674
cc9b0679 675//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 676// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 677
911879d1 678/* -pp Command line Help
911879d1 679*/
69fdc40d 680#if LIBPOSTPROC_VERSION_INT < (52<<16)
10ff3ff4 681const char *const pp_help=
69fdc40d
DP
682#else
683const char pp_help[] =
684#endif
bf69c4e5 685"Available postprocessing filters:\n"
bb270c08
DB
686"Filters Options\n"
687"short long name short long option Description\n"
688"* * a autoq CPU power dependent enabler\n"
689" c chrom chrominance filtering enabled\n"
690" y nochrom chrominance filtering disabled\n"
691" n noluma luma filtering disabled\n"
692"hb hdeblock (2 threshold) horizontal deblocking filter\n"
693" 1. difference factor: default=32, higher -> more deblocking\n"
694" 2. flatness threshold: default=39, lower -> more deblocking\n"
695" the h & v deblocking filters share these\n"
696" so you can't set different thresholds for h / v\n"
697"vb vdeblock (2 threshold) vertical deblocking filter\n"
698"ha hadeblock (2 threshold) horizontal deblocking filter\n"
699"va vadeblock (2 threshold) vertical deblocking filter\n"
700"h1 x1hdeblock experimental h deblock filter 1\n"
701"v1 x1vdeblock experimental v deblock filter 1\n"
702"dr dering deringing filter\n"
703"al autolevels automatic brightness / contrast\n"
704" f fullyrange stretch luminance to (0..255)\n"
705"lb linblenddeint linear blend deinterlacer\n"
706"li linipoldeint linear interpolating deinterlace\n"
707"ci cubicipoldeint cubic interpolating deinterlacer\n"
708"md mediandeint median deinterlacer\n"
709"fd ffmpegdeint ffmpeg deinterlacer\n"
710"l5 lowpass5 FIR lowpass deinterlacer\n"
711"de default hb:a,vb:a,dr:a\n"
712"fa fast h1:a,v1:a,dr:a\n"
713"ac ha:a:128:7,va:a,dr:a\n"
714"tn tmpnoise (3 threshold) temporal noise reducer\n"
715" 1. <= 2. <= 3. larger -> stronger filtering\n"
716"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
717"Usage:\n"
718"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
719"long form example:\n"
bb270c08 720"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 721"short form example:\n"
bb270c08 722"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
723"more examples:\n"
724"tn:64:128:256\n"
14b005d0 725"\n"
4b001a13 726;
911879d1 727
ca325343 728pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
911879d1 729{
16e0bf73
DB
730 char temp[GET_MODE_BUFFER_SIZE];
731 char *p= temp;
732 static const char filterDelimiters[] = ",/";
733 static const char optionDelimiters[] = ":";
734 struct PPMode *ppMode;
735 char *filterToken;
736
737 ppMode= av_malloc(sizeof(PPMode));
738
739 ppMode->lumMode= 0;
740 ppMode->chromMode= 0;
741 ppMode->maxTmpNoise[0]= 700;
742 ppMode->maxTmpNoise[1]= 1500;
743 ppMode->maxTmpNoise[2]= 3000;
744 ppMode->maxAllowedY= 234;
745 ppMode->minAllowedY= 16;
746 ppMode->baseDcDiff= 256/8;
747 ppMode->flatnessThreshold= 56-16-1;
748 ppMode->maxClippedThreshold= 0.01;
749 ppMode->error=0;
750
751 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
752
753 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
754
755 for(;;){
756 char *filterName;
757 int q= 1000000; //PP_QUALITY_MAX;
758 int chrom=-1;
759 int luma=-1;
760 char *option;
761 char *options[OPTIONS_ARRAY_SIZE];
762 int i;
763 int filterNameOk=0;
764 int numOfUnknownOptions=0;
765 int enable=1; //does the user want us to enabled or disabled the filter
766
767 filterToken= strtok(p, filterDelimiters);
768 if(filterToken == NULL) break;
769 p+= strlen(filterToken) + 1; // p points to next filterToken
770 filterName= strtok(filterToken, optionDelimiters);
771 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
772
773 if(*filterName == '-'){
774 enable=0;
775 filterName++;
776 }
bb270c08 777
16e0bf73
DB
778 for(;;){ //for all options
779 option= strtok(NULL, optionDelimiters);
780 if(option == NULL) break;
781
782 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
783 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
784 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
785 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
786 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
787 else{
788 options[numOfUnknownOptions] = option;
789 numOfUnknownOptions++;
790 }
791 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
792 }
793 options[numOfUnknownOptions] = NULL;
794
795 /* replace stuff from the replace Table */
796 for(i=0; replaceTable[2*i]!=NULL; i++){
797 if(!strcmp(replaceTable[2*i], filterName)){
798 int newlen= strlen(replaceTable[2*i + 1]);
799 int plen;
800 int spaceLeft;
801
802 if(p==NULL) p= temp, *p=0; //last filter
803 else p--, *p=','; //not last filter
804
805 plen= strlen(p);
806 spaceLeft= p - temp + plen;
807 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
808 ppMode->error++;
809 break;
bb270c08 810 }
16e0bf73
DB
811 memmove(p + newlen, p, plen+1);
812 memcpy(p, replaceTable[2*i + 1], newlen);
813 filterNameOk=1;
814 }
815 }
bb270c08 816
16e0bf73
DB
817 for(i=0; filters[i].shortName!=NULL; i++){
818 if( !strcmp(filters[i].longName, filterName)
819 || !strcmp(filters[i].shortName, filterName)){
820 ppMode->lumMode &= ~filters[i].mask;
821 ppMode->chromMode &= ~filters[i].mask;
822
823 filterNameOk=1;
824 if(!enable) break; // user wants to disable it
825
826 if(q >= filters[i].minLumQuality && luma)
827 ppMode->lumMode|= filters[i].mask;
828 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
829 if(q >= filters[i].minChromQuality)
830 ppMode->chromMode|= filters[i].mask;
831
832 if(filters[i].mask == LEVEL_FIX){
833 int o;
834 ppMode->minAllowedY= 16;
835 ppMode->maxAllowedY= 234;
836 for(o=0; options[o]!=NULL; o++){
837 if( !strcmp(options[o],"fullyrange")
838 ||!strcmp(options[o],"f")){
839 ppMode->minAllowedY= 0;
840 ppMode->maxAllowedY= 255;
841 numOfUnknownOptions--;
bb270c08 842 }
16e0bf73 843 }
bb270c08 844 }
16e0bf73 845 else if(filters[i].mask == TEMP_NOISE_FILTER)
bb270c08 846 {
16e0bf73
DB
847 int o;
848 int numOfNoises=0;
849
850 for(o=0; options[o]!=NULL; o++){
851 char *tail;
852 ppMode->maxTmpNoise[numOfNoises]=
853 strtol(options[o], &tail, 0);
854 if(tail!=options[o]){
855 numOfNoises++;
856 numOfUnknownOptions--;
857 if(numOfNoises >= 3) break;
bb270c08 858 }
16e0bf73 859 }
bb270c08 860 }
16e0bf73
DB
861 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
862 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
863 int o;
864
865 for(o=0; options[o]!=NULL && o<2; o++){
866 char *tail;
867 int val= strtol(options[o], &tail, 0);
868 if(tail==options[o]) break;
869
870 numOfUnknownOptions--;
871 if(o==0) ppMode->baseDcDiff= val;
872 else ppMode->flatnessThreshold= val;
873 }
874 }
875 else if(filters[i].mask == FORCE_QUANT){
876 int o;
877 ppMode->forcedQuant= 15;
878
879 for(o=0; options[o]!=NULL && o<1; o++){
880 char *tail;
881 int val= strtol(options[o], &tail, 0);
882 if(tail==options[o]) break;
883
884 numOfUnknownOptions--;
885 ppMode->forcedQuant= val;
886 }
887 }
888 }
bb270c08 889 }
16e0bf73
DB
890 if(!filterNameOk) ppMode->error++;
891 ppMode->error += numOfUnknownOptions;
892 }
893
894 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
895 if(ppMode->error){
896 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
897 av_free(ppMode);
898 return NULL;
899 }
900 return ppMode;
911879d1
MN
901}
902
c41d972d 903void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 904 av_free(mode);
c41d972d
MN
905}
906
88c0bc7e 907static void reallocAlign(void **p, int alignment, int size){
16e0bf73
DB
908 av_free(*p);
909 *p= av_mallocz(size);
88c0bc7e
MN
910}
911
0426af31 912static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
16e0bf73
DB
913 int mbWidth = (width+15)>>4;
914 int mbHeight= (height+15)>>4;
915 int i;
916
917 c->stride= stride;
918 c->qpStride= qpStride;
919
920 reallocAlign((void **)&c->tempDst, 8, stride*24);
921 reallocAlign((void **)&c->tempSrc, 8, stride*24);
922 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
923 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
924 for(i=0; i<256; i++)
925 c->yHistogram[i]= width*height/64*15/256;
926
927 for(i=0; i<3; i++){
928 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
aa089f6c
DB
929 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
930 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
16e0bf73
DB
931 }
932
933 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
934 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
935 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
936 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
937}
938
e7becfb2
DB
939static const char * context_to_name(void * ptr) {
940 return "postproc";
941}
942
31bfd6f3 943static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
e7becfb2 944
88c0bc7e 945pp_context_t *pp_get_context(int width, int height, int cpuCaps){
16e0bf73
DB
946 PPContext *c= av_malloc(sizeof(PPContext));
947 int stride= (width+15)&(~15); //assumed / will realloc if needed
948 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
949
950 memset(c, 0, sizeof(PPContext));
951 c->av_class = &av_codec_context_class;
952 c->cpuCaps= cpuCaps;
953 if(cpuCaps&PP_FORMAT){
954 c->hChromaSubSample= cpuCaps&0x3;
955 c->vChromaSubSample= (cpuCaps>>4)&0x3;
956 }else{
957 c->hChromaSubSample= 1;
958 c->vChromaSubSample= 1;
959 }
960
961 reallocBuffers(c, width, height, stride, qpStride);
962
963 c->frameNum=-1;
964
965 return c;
45b4f285
MN
966}
967
9cb54f43 968void pp_free_context(void *vc){
16e0bf73
DB
969 PPContext *c = (PPContext*)vc;
970 int i;
115329f1 971
aa089f6c
DB
972 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
973 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
115329f1 974
16e0bf73
DB
975 av_free(c->tempBlocks);
976 av_free(c->yHistogram);
977 av_free(c->tempDst);
978 av_free(c->tempSrc);
979 av_free(c->deintTemp);
980 av_free(c->stdQPTable);
981 av_free(c->nonBQPTable);
982 av_free(c->forcedQPTable);
115329f1 983
16e0bf73 984 memset(c, 0, sizeof(PPContext));
88c0bc7e 985
16e0bf73 986 av_free(c);
9c9e467d
MN
987}
988
6c51fd3f 989void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
16e0bf73
DB
990 uint8_t * dst[3], const int dstStride[3],
991 int width, int height,
992 const QP_STORE_T *QP_store, int QPStride,
993 pp_mode_t *vm, void *vc, int pict_type)
911879d1 994{
16e0bf73
DB
995 int mbWidth = (width+15)>>4;
996 int mbHeight= (height+15)>>4;
997 PPMode *mode = (PPMode*)vm;
998 PPContext *c = (PPContext*)vc;
999 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1000 int absQPStride = FFABS(QPStride);
1001
1002 // c->stride and c->QPStride are always positive
1003 if(c->stride < minStride || c->qpStride < absQPStride)
1004 reallocBuffers(c, width, height,
1005 FFMAX(minStride, c->stride),
1006 FFMAX(c->qpStride, absQPStride));
1007
1008 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1009 int i;
1010 QP_store= c->forcedQPTable;
1011 absQPStride = QPStride = 0;
1012 if(mode->lumMode & FORCE_QUANT)
1013 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1014 else
1015 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1016 }
0426af31 1017
16e0bf73
DB
1018 if(pict_type & PP_PICT_TYPE_QP2){
1019 int i;
1020 const int count= mbHeight * absQPStride;
1021 for(i=0; i<(count>>2); i++){
1022 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
bb270c08 1023 }
16e0bf73
DB
1024 for(i<<=2; i<count; i++){
1025 c->stdQPTable[i] = QP_store[i]>>1;
1026 }
1027 QP_store= c->stdQPTable;
1028 QPStride= absQPStride;
1029 }
1030
1031 if(0){
1032 int x,y;
1033 for(y=0; y<mbHeight; y++){
1034 for(x=0; x<mbWidth; x++){
e7becfb2 1035 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
16e0bf73
DB
1036 }
1037 av_log(c, AV_LOG_INFO, "\n");
bb270c08 1038 }
e7becfb2 1039 av_log(c, AV_LOG_INFO, "\n");
16e0bf73
DB
1040 }
1041
1042 if((pict_type&7)!=3){
1043 if (QPStride >= 0){
1044 int i;
1045 const int count= mbHeight * QPStride;
1046 for(i=0; i<(count>>2); i++){
1047 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1048 }
1049 for(i<<=2; i<count; i++){
1050 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1051 }
1052 } else {
1053 int i,j;
1054 for(i=0; i<mbHeight; i++) {
1055 for(j=0; j<absQPStride; j++) {
1056 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
bb270c08 1057 }
16e0bf73 1058 }
bb270c08 1059 }
16e0bf73 1060 }
bb270c08 1061
16e0bf73
DB
1062 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1063 mode->lumMode, mode->chromMode);
bb270c08 1064
16e0bf73 1065 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
bb270c08
DB
1066 width, height, QP_store, QPStride, 0, mode, c);
1067
16e0bf73
DB
1068 width = (width )>>c->hChromaSubSample;
1069 height = (height)>>c->vChromaSubSample;
1070
1071 if(mode->chromMode){
1072 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1073 width, height, QP_store, QPStride, 1, mode, c);
1074 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1075 width, height, QP_store, QPStride, 2, mode, c);
1076 }
1077 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1078 linecpy(dst[1], src[1], height, srcStride[1]);
1079 linecpy(dst[2], src[2], height, srcStride[2]);
1080 }else{
1081 int y;
1082 for(y=0; y<height; y++){
1083 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1084 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
bb270c08 1085 }
16e0bf73 1086 }
911879d1
MN
1087}
1088