Move the video size and rate abbreviations system from libavformat to libavcodec
[libav.git] / libpostproc / postprocess.c
CommitLineData
3057fa66 1/*
b78e7197
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3 *
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 *
ef85972b 6 * This file is part of FFmpeg.
b78e7197
DB
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
3057fa66 22
b304569a
MN
23/**
24 * @file postprocess.c
25 * postprocessing.
26 */
115329f1 27
3057fa66 28/*
bb270c08
DB
29 C MMX MMX2 3DNow AltiVec
30isVertDC Ec Ec Ec
31isVertMinMaxOk Ec Ec Ec
32doVertLowPass E e e Ec
33doVertDefFilter Ec Ec e e Ec
34isHorizDC Ec Ec Ec
35isHorizMinMaxOk a E Ec
36doHorizLowPass E e e Ec
37doHorizDefFilter Ec Ec e e Ec
38do_a_deblock Ec E Ec E
39deRing E e e* Ecp
40Vertical RKAlgo1 E a a
41Horizontal RKAlgo1 a a
42Vertical X1# a E E
43Horizontal X1# a E E
44LinIpolDeinterlace e E E*
45CubicIpolDeinterlace a e e*
46LinBlendDeinterlace e E E*
47MedianDeinterlace# E Ec Ec
48TempDeNoiser# E e e Ec
d5a1a995 49
117e45b0
MN
50* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51# more or less selfinvented filters so the exactness isnt too meaningfull
3057fa66 52E = Exact implementation
acced553 53e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
54a = alternative / approximate impl
55c = checked against the other implementations (-vo md5)
b0ac780a 56p = partially optimized, still some work to do
3057fa66
A
57*/
58
59/*
60TODO:
3057fa66 61reduce the time wasted on the mem transfer
3057fa66 62unroll stuff if instructions depend too much on the prior one
3057fa66 63move YScale thing to the end instead of fixing QP
13e00528 64write a faster and higher quality deblocking filter :)
d5a1a995 65make the mainloop more flexible (variable number of blocks at once
bb270c08 66 (the if/else stuff per block is slowing things down)
9f45d04d 67compare the quality & speed of all filters
9f45d04d 68split this huge file
8405b3fd 69optimize c versions
117e45b0 70try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66 71...
13e00528
A
72*/
73
36b1b0bc 74//Changelog: use the Subversion log
3057fa66 75
9858f773 76#include "config.h"
6ab6c7c3 77#include "avutil.h"
3057fa66
A
78#include <inttypes.h>
79#include <stdio.h>
d5a1a995 80#include <stdlib.h>
911879d1 81#include <string.h>
dda87e9f
PL
82#ifdef HAVE_MALLOC_H
83#include <malloc.h>
84#endif
3057fa66 85//#undef HAVE_MMX2
13e00528 86//#define HAVE_3DNOW
3057fa66 87//#undef HAVE_MMX
cc9b0679 88//#undef ARCH_X86
7f16f6e6 89//#define DEBUG_BRIGHTNESS
13e00528 90#include "postprocess.h"
c41d972d 91#include "postprocess_internal.h"
bba9b16c
MN
92
93#include "mangle.h" //FIXME should be supressed
3057fa66 94
a7b2871c
RD
95#ifdef HAVE_ALTIVEC_H
96#include <altivec.h>
97#endif
98
911879d1
MN
99#define GET_MODE_BUFFER_SIZE 500
100#define OPTIONS_ARRAY_SIZE 10
9c9e467d
MN
101#define BLOCK_SIZE 8
102#define TEMP_STRIDE 8
103//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1 104
3cd52279 105#if defined(ARCH_X86)
0bda7817
RD
106static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
107static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
108static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
109static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
110static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
111static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
112static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
113static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
b28daef8 114#endif
3057fa66 115
134eb1e5
MN
116static uint8_t clip_table[3*256];
117static uint8_t * const clip_tab= clip_table + 256;
118
3f1d4e96 119static const int attribute_used deringThreshold= 20;
3057fa66 120
9c9e467d 121
911879d1
MN
122static struct PPFilter filters[]=
123{
bb270c08
DB
124 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
125 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
126/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
127 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
128 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
129 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
130 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
131 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
132 {"dr", "dering", 1, 5, 6, DERING},
133 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
134 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
135 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
136 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
137 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
138 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
139 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
140 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
141 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
142 {NULL, NULL,0,0,0,0} //End Marker
911879d1
MN
143};
144
7b49ce2e 145static const char *replaceTable[]=
911879d1 146{
bb270c08
DB
147 "default", "hdeblock:a,vdeblock:a,dering:a",
148 "de", "hdeblock:a,vdeblock:a,dering:a",
149 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
150 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
151 "ac", "ha:a:128:7,va:a,dering:a",
152 NULL //End Marker
911879d1
MN
153};
154
3057fa66 155
3cd52279 156#if defined(ARCH_X86)
3057fa66
A
157static inline void prefetchnta(void *p)
158{
bb270c08
DB
159 asm volatile( "prefetchnta (%0)\n\t"
160 : : "r" (p)
161 );
3057fa66
A
162}
163
164static inline void prefetcht0(void *p)
165{
bb270c08
DB
166 asm volatile( "prefetcht0 (%0)\n\t"
167 : : "r" (p)
168 );
3057fa66
A
169}
170
171static inline void prefetcht1(void *p)
172{
bb270c08
DB
173 asm volatile( "prefetcht1 (%0)\n\t"
174 : : "r" (p)
175 );
3057fa66
A
176}
177
178static inline void prefetcht2(void *p)
179{
bb270c08
DB
180 asm volatile( "prefetcht2 (%0)\n\t"
181 : : "r" (p)
182 );
3057fa66 183}
9a722af7 184#endif
3057fa66 185
cc9b0679 186// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
3057fa66 187
cf5ec61d
MN
188/**
189 * Check if the given 8x8 Block is mostly "flat"
190 */
b0ac780a 191static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
cf5ec61d 192{
bb270c08
DB
193 int numEq= 0;
194 int y;
195 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
196 const int dcThreshold= dcOffset*2 + 1;
197
198 for(y=0; y<BLOCK_SIZE; y++)
199 {
200 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
201 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
202 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
203 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
204 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
205 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
206 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
207 src+= stride;
208 }
209 return numEq > c->ppMode.flatnessThreshold;
9c9e467d
MN
210}
211
212/**
213 * Check if the middle 8x8 Block in the given 8x16 block is flat
214 */
215static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
216 int numEq= 0;
217 int y;
218 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
219 const int dcThreshold= dcOffset*2 + 1;
220
221 src+= stride*4; // src points to begin of the 8x8 Block
222 for(y=0; y<BLOCK_SIZE-1; y++)
223 {
224 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
226 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
227 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
228 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
229 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
230 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
231 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
232 src+= stride;
233 }
234 return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
MN
235}
236
b0ac780a 237static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
cf5ec61d 238{
bb270c08 239 int i;
cb482d25 240#if 1
bb270c08
DB
241 for(i=0; i<2; i++){
242 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
243 src += stride;
244 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
245 src += stride;
246 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
247 src += stride;
248 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
249 src += stride;
250 }
115329f1 251#else
bb270c08
DB
252 for(i=0; i<8; i++){
253 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
254 src += stride;
255 }
cb482d25 256#endif
bb270c08 257 return 1;
cb482d25 258}
cf5ec61d 259
cb482d25
MN
260static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
261{
262#if 1
263#if 1
bb270c08
DB
264 int x;
265 src+= stride*4;
266 for(x=0; x<BLOCK_SIZE; x+=4)
267 {
268 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
269 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
270 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
271 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
272 }
cb482d25 273#else
bb270c08
DB
274 int x;
275 src+= stride*3;
276 for(x=0; x<BLOCK_SIZE; x++)
277 {
278 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
279 }
cb482d25 280#endif
bb270c08 281 return 1;
cb482d25 282#else
bb270c08
DB
283 int x;
284 src+= stride*4;
285 for(x=0; x<BLOCK_SIZE; x++)
286 {
287 int min=255;
288 int max=0;
289 int y;
290 for(y=0; y<8; y++){
291 int v= src[x + y*stride];
292 if(v>max) max=v;
293 if(v<min) min=v;
294 }
295 if(max-min > 2*QP) return 0;
296 }
297 return 1;
cb482d25
MN
298#endif
299}
300
b0ac780a 301static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
302 if( isHorizDC_C(src, stride, c) ){
303 if( isHorizMinMaxOk_C(src, stride, c->QP) )
304 return 1;
305 else
306 return 0;
307 }else{
308 return 2;
309 }
b0ac780a
MN
310}
311
cb482d25 312static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
bb270c08
DB
313 if( isVertDC_C(src, stride, c) ){
314 if( isVertMinMaxOk_C(src, stride, c->QP) )
315 return 1;
316 else
317 return 0;
318 }else{
319 return 2;
320 }
cf5ec61d
MN
321}
322
b0ac780a 323static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 324{
bb270c08
DB
325 int y;
326 for(y=0; y<BLOCK_SIZE; y++)
327 {
328 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
329
c26abfa5 330 if(FFABS(middleEnergy) < 8*c->QP)
bb270c08
DB
331 {
332 const int q=(dst[3] - dst[4])/2;
333 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
334 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
335
c26abfa5 336 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
8925915f 337 d= FFMAX(d, 0);
bb270c08
DB
338
339 d= (5*d + 32) >> 6;
02305ff3 340 d*= FFSIGN(-middleEnergy);
bb270c08
DB
341
342 if(q>0)
343 {
344 d= d<0 ? 0 : d;
345 d= d>q ? q : d;
346 }
347 else
348 {
349 d= d>0 ? 0 : d;
350 d= d<q ? q : d;
351 }
352
353 dst[3]-= d;
354 dst[4]+= d;
355 }
356 dst+= stride;
357 }
cf5ec61d
MN
358}
359
360/**
361 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
362 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
363 */
b0ac780a 364static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
cf5ec61d 365{
bb270c08
DB
366 int y;
367 for(y=0; y<BLOCK_SIZE; y++)
368 {
c26abfa5
DB
369 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
370 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
bb270c08
DB
371
372 int sums[10];
373 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
374 sums[1] = sums[0] - first + dst[3];
375 sums[2] = sums[1] - first + dst[4];
376 sums[3] = sums[2] - first + dst[5];
377 sums[4] = sums[3] - first + dst[6];
378 sums[5] = sums[4] - dst[0] + dst[7];
379 sums[6] = sums[5] - dst[1] + last;
380 sums[7] = sums[6] - dst[2] + last;
381 sums[8] = sums[7] - dst[3] + last;
382 sums[9] = sums[8] - dst[4] + last;
383
384 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
385 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
386 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
387 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
388 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
389 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
390 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
391 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
392
393 dst+= stride;
394 }
cf5ec61d
MN
395}
396
4e4dcbc5 397/**
cc9b0679
MN
398 * Experimental Filter 1 (Horizontal)
399 * will not damage linear gradients
400 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
401 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
402 * MMX2 version does correct clipping C version doesnt
403 * not identical with the vertical one
4e4dcbc5 404 */
cc9b0679
MN
405static inline void horizX1Filter(uint8_t *src, int stride, int QP)
406{
bb270c08
DB
407 int y;
408 static uint64_t *lut= NULL;
409 if(lut==NULL)
410 {
411 int i;
6ab6c7c3 412 lut = av_malloc(256*8);
bb270c08
DB
413 for(i=0; i<256; i++)
414 {
415 int v= i < 128 ? 2*i : 2*(i-256);
117e45b0 416/*
cc9b0679 417//Simulate 112242211 9-Tap filter
bb270c08
DB
418 uint64_t a= (v/16) & 0xFF;
419 uint64_t b= (v/8) & 0xFF;
420 uint64_t c= (v/4) & 0xFF;
421 uint64_t d= (3*v/8) & 0xFF;
117e45b0 422*/
cc9b0679 423//Simulate piecewise linear interpolation
bb270c08
DB
424 uint64_t a= (v/16) & 0xFF;
425 uint64_t b= (v*3/16) & 0xFF;
426 uint64_t c= (v*5/16) & 0xFF;
427 uint64_t d= (7*v/16) & 0xFF;
428 uint64_t A= (0x100 - a)&0xFF;
429 uint64_t B= (0x100 - b)&0xFF;
430 uint64_t C= (0x100 - c)&0xFF;
431 uint64_t D= (0x100 - c)&0xFF;
432
433 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
434 (D<<24) | (C<<16) | (B<<8) | (A);
435 //lut[i] = (v<<32) | (v<<24);
436 }
437 }
438
439 for(y=0; y<BLOCK_SIZE; y++)
440 {
441 int a= src[1] - src[2];
442 int b= src[3] - src[4];
443 int c= src[5] - src[6];
444
c26abfa5 445 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
bb270c08
DB
446
447 if(d < QP)
448 {
02305ff3 449 int v = d * FFSIGN(-b);
bb270c08
DB
450
451 src[1] +=v/8;
452 src[2] +=v/4;
453 src[3] +=3*v/8;
454 src[4] -=3*v/8;
455 src[5] -=v/4;
456 src[6] -=v/8;
457
458 }
459 src+=stride;
460 }
cc9b0679
MN
461}
462
12eebd26
MN
463/**
464 * accurate deblock filter
465 */
849f1035 466static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
bb270c08
DB
467 int y;
468 const int QP= c->QP;
469 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
470 const int dcThreshold= dcOffset*2 + 1;
12eebd26 471//START_TIMER
bb270c08
DB
472 src+= step*4; // src points to begin of the 8x8 Block
473 for(y=0; y<8; y++){
474 int numEq= 0;
475
476 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
477 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
478 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
479 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
480 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
481 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
482 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
483 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
484 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
485 if(numEq > c->ppMode.flatnessThreshold){
486 int min, max, x;
487
488 if(src[0] > src[step]){
489 max= src[0];
490 min= src[step];
491 }else{
492 max= src[step];
493 min= src[0];
494 }
495 for(x=2; x<8; x+=2){
496 if(src[x*step] > src[(x+1)*step]){
497 if(src[x *step] > max) max= src[ x *step];
498 if(src[(x+1)*step] < min) min= src[(x+1)*step];
499 }else{
500 if(src[(x+1)*step] > max) max= src[(x+1)*step];
501 if(src[ x *step] < min) min= src[ x *step];
502 }
503 }
504 if(max-min < 2*QP){
c26abfa5
DB
505 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
506 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
bb270c08
DB
507
508 int sums[10];
509 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
510 sums[1] = sums[0] - first + src[3*step];
511 sums[2] = sums[1] - first + src[4*step];
512 sums[3] = sums[2] - first + src[5*step];
513 sums[4] = sums[3] - first + src[6*step];
514 sums[5] = sums[4] - src[0*step] + src[7*step];
515 sums[6] = sums[5] - src[1*step] + last;
516 sums[7] = sums[6] - src[2*step] + last;
517 sums[8] = sums[7] - src[3*step] + last;
518 sums[9] = sums[8] - src[4*step] + last;
519
520 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
521 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
522 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
523 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
524 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
525 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
526 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
527 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
528 }
529 }else{
530 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
531
c26abfa5 532 if(FFABS(middleEnergy) < 8*QP)
bb270c08
DB
533 {
534 const int q=(src[3*step] - src[4*step])/2;
535 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
536 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
537
c26abfa5 538 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
8925915f 539 d= FFMAX(d, 0);
bb270c08
DB
540
541 d= (5*d + 32) >> 6;
02305ff3 542 d*= FFSIGN(-middleEnergy);
bb270c08
DB
543
544 if(q>0)
545 {
546 d= d<0 ? 0 : d;
547 d= d>q ? q : d;
548 }
549 else
550 {
551 d= d>0 ? 0 : d;
552 d= d<q ? q : d;
553 }
554
555 src[3*step]-= d;
556 src[4*step]+= d;
557 }
558 }
559
560 src += stride;
561 }
12eebd26
MN
562/*if(step==16){
563 STOP_TIMER("step16")
564}else{
565 STOP_TIMER("stepX")
566}*/
567}
cc9b0679 568
e89952aa 569//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679 570//Plain C versions
e89952aa
MN
571#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
572#define COMPILE_C
573#endif
574
b0ac780a
MN
575#ifdef ARCH_POWERPC
576#ifdef HAVE_ALTIVEC
577#define COMPILE_ALTIVEC
b0ac780a
MN
578#endif //HAVE_ALTIVEC
579#endif //ARCH_POWERPC
580
3cd52279 581#if defined(ARCH_X86)
e89952aa
MN
582
583#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
584#define COMPILE_MMX
585#endif
586
587#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
588#define COMPILE_MMX2
589#endif
590
591#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
592#define COMPILE_3DNOW
593#endif
3cd52279 594#endif /* defined(ARCH_X86) */
e89952aa
MN
595
596#undef HAVE_MMX
597#undef HAVE_MMX2
598#undef HAVE_3DNOW
b0ac780a 599#undef HAVE_ALTIVEC
e89952aa
MN
600
601#ifdef COMPILE_C
cc9b0679
MN
602#undef HAVE_MMX
603#undef HAVE_MMX2
604#undef HAVE_3DNOW
cc9b0679
MN
605#define RENAME(a) a ## _C
606#include "postprocess_template.c"
e89952aa 607#endif
cc9b0679 608
b0ac780a
MN
609#ifdef ARCH_POWERPC
610#ifdef COMPILE_ALTIVEC
611#undef RENAME
612#define HAVE_ALTIVEC
613#define RENAME(a) a ## _altivec
614#include "postprocess_altivec_template.c"
615#include "postprocess_template.c"
616#endif
617#endif //ARCH_POWERPC
618
cc9b0679 619//MMX versions
e89952aa 620#ifdef COMPILE_MMX
cc9b0679
MN
621#undef RENAME
622#define HAVE_MMX
623#undef HAVE_MMX2
624#undef HAVE_3DNOW
cc9b0679
MN
625#define RENAME(a) a ## _MMX
626#include "postprocess_template.c"
e89952aa 627#endif
cc9b0679
MN
628
629//MMX2 versions
e89952aa 630#ifdef COMPILE_MMX2
cc9b0679
MN
631#undef RENAME
632#define HAVE_MMX
633#define HAVE_MMX2
634#undef HAVE_3DNOW
cc9b0679
MN
635#define RENAME(a) a ## _MMX2
636#include "postprocess_template.c"
e89952aa 637#endif
cc9b0679
MN
638
639//3DNOW versions
e89952aa 640#ifdef COMPILE_3DNOW
cc9b0679
MN
641#undef RENAME
642#define HAVE_MMX
643#undef HAVE_MMX2
644#define HAVE_3DNOW
cc9b0679
MN
645#define RENAME(a) a ## _3DNow
646#include "postprocess_template.c"
e89952aa 647#endif
cc9b0679
MN
648
649// minor note: the HAVE_xyz is messed up after that line so dont use it
650
651static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 652 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
cc9b0679 653{
bb270c08
DB
654 PPContext *c= (PPContext *)vc;
655 PPMode *ppMode= (PPMode *)vm;
656 c->ppMode= *ppMode; //FIXME
9c9e467d 657
bb270c08
DB
658 // useing ifs here as they are faster than function pointers allthough the
659 // difference wouldnt be messureable here but its much better because
660 // someone might exchange the cpu whithout restarting mplayer ;)
e89952aa 661#ifdef RUNTIME_CPUDETECT
3cd52279 662#if defined(ARCH_X86)
bb270c08
DB
663 // ordered per speed fasterst first
664 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
665 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
667 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
669 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670 else
671 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
cc9b0679 672#else
b0ac780a
MN
673#ifdef ARCH_POWERPC
674#ifdef HAVE_ALTIVEC
71487254 675 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
bb270c08 676 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a
MN
677 else
678#endif
679#endif
bb270c08 680 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
be44a4d7 681#endif
e89952aa
MN
682#else //RUNTIME_CPUDETECT
683#ifdef HAVE_MMX2
bb270c08 684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 685#elif defined (HAVE_3DNOW)
bb270c08 686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 687#elif defined (HAVE_MMX)
bb270c08 688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
b0ac780a 689#elif defined (HAVE_ALTIVEC)
bb270c08 690 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa 691#else
bb270c08 692 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
e89952aa
MN
693#endif
694#endif //!RUNTIME_CPUDETECT
117e45b0
MN
695}
696
cc9b0679 697//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
bb270c08 698// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
13e00528 699
911879d1 700/* -pp Command line Help
911879d1 701*/
4407a3c4 702char *pp_help=
bf69c4e5 703"Available postprocessing filters:\n"
bb270c08
DB
704"Filters Options\n"
705"short long name short long option Description\n"
706"* * a autoq CPU power dependent enabler\n"
707" c chrom chrominance filtering enabled\n"
708" y nochrom chrominance filtering disabled\n"
709" n noluma luma filtering disabled\n"
710"hb hdeblock (2 threshold) horizontal deblocking filter\n"
711" 1. difference factor: default=32, higher -> more deblocking\n"
712" 2. flatness threshold: default=39, lower -> more deblocking\n"
713" the h & v deblocking filters share these\n"
714" so you can't set different thresholds for h / v\n"
715"vb vdeblock (2 threshold) vertical deblocking filter\n"
716"ha hadeblock (2 threshold) horizontal deblocking filter\n"
717"va vadeblock (2 threshold) vertical deblocking filter\n"
718"h1 x1hdeblock experimental h deblock filter 1\n"
719"v1 x1vdeblock experimental v deblock filter 1\n"
720"dr dering deringing filter\n"
721"al autolevels automatic brightness / contrast\n"
722" f fullyrange stretch luminance to (0..255)\n"
723"lb linblenddeint linear blend deinterlacer\n"
724"li linipoldeint linear interpolating deinterlace\n"
725"ci cubicipoldeint cubic interpolating deinterlacer\n"
726"md mediandeint median deinterlacer\n"
727"fd ffmpegdeint ffmpeg deinterlacer\n"
728"l5 lowpass5 FIR lowpass deinterlacer\n"
729"de default hb:a,vb:a,dr:a\n"
730"fa fast h1:a,v1:a,dr:a\n"
731"ac ha:a:128:7,va:a,dr:a\n"
732"tn tmpnoise (3 threshold) temporal noise reducer\n"
733" 1. <= 2. <= 3. larger -> stronger filtering\n"
734"fq forceQuant <quantizer> force quantizer\n"
bf69c4e5
DB
735"Usage:\n"
736"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
737"long form example:\n"
bb270c08 738"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
bf69c4e5 739"short form example:\n"
bb270c08 740"vb:a/hb:a/lb de,-vb\n"
bf69c4e5
DB
741"more examples:\n"
742"tn:64:128:256\n"
14b005d0 743"\n"
4b001a13 744;
911879d1 745
c41d972d 746pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
911879d1 747{
bb270c08
DB
748 char temp[GET_MODE_BUFFER_SIZE];
749 char *p= temp;
7b49ce2e
SH
750 const char *filterDelimiters= ",/";
751 const char *optionDelimiters= ":";
bb270c08
DB
752 struct PPMode *ppMode;
753 char *filterToken;
754
6ab6c7c3 755 ppMode= av_malloc(sizeof(PPMode));
bb270c08
DB
756
757 ppMode->lumMode= 0;
758 ppMode->chromMode= 0;
759 ppMode->maxTmpNoise[0]= 700;
760 ppMode->maxTmpNoise[1]= 1500;
761 ppMode->maxTmpNoise[2]= 3000;
762 ppMode->maxAllowedY= 234;
763 ppMode->minAllowedY= 16;
764 ppMode->baseDcDiff= 256/8;
765 ppMode->flatnessThreshold= 56-16-1;
766 ppMode->maxClippedThreshold= 0.01;
767 ppMode->error=0;
768
769 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
770
e7becfb2 771 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
bb270c08
DB
772
773 for(;;){
774 char *filterName;
775 int q= 1000000; //PP_QUALITY_MAX;
776 int chrom=-1;
777 int luma=-1;
778 char *option;
779 char *options[OPTIONS_ARRAY_SIZE];
780 int i;
781 int filterNameOk=0;
782 int numOfUnknownOptions=0;
783 int enable=1; //does the user want us to enabled or disabled the filter
784
785 filterToken= strtok(p, filterDelimiters);
786 if(filterToken == NULL) break;
787 p+= strlen(filterToken) + 1; // p points to next filterToken
788 filterName= strtok(filterToken, optionDelimiters);
e7becfb2 789 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
bb270c08
DB
790
791 if(*filterName == '-')
792 {
793 enable=0;
794 filterName++;
795 }
796
797 for(;;){ //for all options
798 option= strtok(NULL, optionDelimiters);
799 if(option == NULL) break;
800
e7becfb2 801 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
bb270c08
DB
802 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
803 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
804 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
805 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
806 else
807 {
808 options[numOfUnknownOptions] = option;
809 numOfUnknownOptions++;
810 }
811 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
812 }
813 options[numOfUnknownOptions] = NULL;
814
815 /* replace stuff from the replace Table */
816 for(i=0; replaceTable[2*i]!=NULL; i++)
817 {
818 if(!strcmp(replaceTable[2*i], filterName))
819 {
820 int newlen= strlen(replaceTable[2*i + 1]);
821 int plen;
822 int spaceLeft;
823
824 if(p==NULL) p= temp, *p=0; //last filter
825 else p--, *p=','; //not last filter
826
827 plen= strlen(p);
828 spaceLeft= p - temp + plen;
829 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
830 {
831 ppMode->error++;
832 break;
833 }
834 memmove(p + newlen, p, plen+1);
835 memcpy(p, replaceTable[2*i + 1], newlen);
836 filterNameOk=1;
837 }
838 }
839
840 for(i=0; filters[i].shortName!=NULL; i++)
841 {
bb270c08
DB
842 if( !strcmp(filters[i].longName, filterName)
843 || !strcmp(filters[i].shortName, filterName))
844 {
845 ppMode->lumMode &= ~filters[i].mask;
846 ppMode->chromMode &= ~filters[i].mask;
847
848 filterNameOk=1;
849 if(!enable) break; // user wants to disable it
850
851 if(q >= filters[i].minLumQuality && luma)
852 ppMode->lumMode|= filters[i].mask;
853 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
854 if(q >= filters[i].minChromQuality)
855 ppMode->chromMode|= filters[i].mask;
856
857 if(filters[i].mask == LEVEL_FIX)
858 {
859 int o;
860 ppMode->minAllowedY= 16;
861 ppMode->maxAllowedY= 234;
862 for(o=0; options[o]!=NULL; o++)
863 {
864 if( !strcmp(options[o],"fullyrange")
865 ||!strcmp(options[o],"f"))
866 {
867 ppMode->minAllowedY= 0;
868 ppMode->maxAllowedY= 255;
869 numOfUnknownOptions--;
870 }
871 }
872 }
873 else if(filters[i].mask == TEMP_NOISE_FILTER)
874 {
875 int o;
876 int numOfNoises=0;
877
878 for(o=0; options[o]!=NULL; o++)
879 {
880 char *tail;
881 ppMode->maxTmpNoise[numOfNoises]=
882 strtol(options[o], &tail, 0);
883 if(tail!=options[o])
884 {
885 numOfNoises++;
886 numOfUnknownOptions--;
887 if(numOfNoises >= 3) break;
888 }
889 }
890 }
891 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
892 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
893 {
894 int o;
895
896 for(o=0; options[o]!=NULL && o<2; o++)
897 {
898 char *tail;
899 int val= strtol(options[o], &tail, 0);
900 if(tail==options[o]) break;
901
902 numOfUnknownOptions--;
903 if(o==0) ppMode->baseDcDiff= val;
904 else ppMode->flatnessThreshold= val;
905 }
906 }
907 else if(filters[i].mask == FORCE_QUANT)
908 {
909 int o;
910 ppMode->forcedQuant= 15;
911
912 for(o=0; options[o]!=NULL && o<1; o++)
913 {
914 char *tail;
915 int val= strtol(options[o], &tail, 0);
916 if(tail==options[o]) break;
917
918 numOfUnknownOptions--;
919 ppMode->forcedQuant= val;
920 }
921 }
922 }
923 }
924 if(!filterNameOk) ppMode->error++;
925 ppMode->error += numOfUnknownOptions;
926 }
927
e7becfb2 928 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
bb270c08
DB
929 if(ppMode->error)
930 {
e7becfb2 931 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
6ab6c7c3 932 av_free(ppMode);
bb270c08
DB
933 return NULL;
934 }
935 return ppMode;
911879d1
MN
936}
937
c41d972d 938void pp_free_mode(pp_mode_t *mode){
6ab6c7c3 939 av_free(mode);
c41d972d
MN
940}
941
88c0bc7e 942static void reallocAlign(void **p, int alignment, int size){
4851f2ad 943 av_free(*p);
6ab6c7c3 944 *p= av_mallocz(size);
88c0bc7e
MN
945}
946
0426af31 947static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
bb270c08
DB
948 int mbWidth = (width+15)>>4;
949 int mbHeight= (height+15)>>4;
950 int i;
951
952 c->stride= stride;
953 c->qpStride= qpStride;
954
955 reallocAlign((void **)&c->tempDst, 8, stride*24);
956 reallocAlign((void **)&c->tempSrc, 8, stride*24);
957 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
958 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
959 for(i=0; i<256; i++)
960 c->yHistogram[i]= width*height/64*15/256;
961
962 for(i=0; i<3; i++)
963 {
964 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
965 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
966 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
967 }
968
969 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
970 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
971 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
972 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
88c0bc7e
MN
973}
974
4cfbf61b 975static void global_init(void){
bb270c08
DB
976 int i;
977 memset(clip_table, 0, 256);
978 for(i=256; i<512; i++)
979 clip_table[i]= i;
980 memset(clip_table+512, 0, 256);
134eb1e5
MN
981}
982
e7becfb2
DB
983static const char * context_to_name(void * ptr) {
984 return "postproc";
985}
986
987static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
988
88c0bc7e 989pp_context_t *pp_get_context(int width, int height, int cpuCaps){
6ab6c7c3 990 PPContext *c= av_malloc(sizeof(PPContext));
bb270c08
DB
991 int stride= (width+15)&(~15); //assumed / will realloc if needed
992 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
115329f1 993
bb270c08 994 global_init();
134eb1e5 995
bb270c08 996 memset(c, 0, sizeof(PPContext));
e7becfb2 997 c->av_class = &av_codec_context_class;
bb270c08
DB
998 c->cpuCaps= cpuCaps;
999 if(cpuCaps&PP_FORMAT){
1000 c->hChromaSubSample= cpuCaps&0x3;
1001 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1002 }else{
1003 c->hChromaSubSample= 1;
1004 c->vChromaSubSample= 1;
1005 }
88c0bc7e 1006
bb270c08 1007 reallocBuffers(c, width, height, stride, qpStride);
115329f1 1008
bb270c08 1009 c->frameNum=-1;
45b4f285 1010
bb270c08 1011 return c;
45b4f285
MN
1012}
1013
9cb54f43 1014void pp_free_context(void *vc){
bb270c08
DB
1015 PPContext *c = (PPContext*)vc;
1016 int i;
115329f1 1017
6ab6c7c3
LB
1018 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1019 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
115329f1 1020
6ab6c7c3
LB
1021 av_free(c->tempBlocks);
1022 av_free(c->yHistogram);
1023 av_free(c->tempDst);
1024 av_free(c->tempSrc);
1025 av_free(c->deintTemp);
1026 av_free(c->stdQPTable);
1027 av_free(c->nonBQPTable);
1028 av_free(c->forcedQPTable);
115329f1 1029
bb270c08 1030 memset(c, 0, sizeof(PPContext));
88c0bc7e 1031
6ab6c7c3 1032 av_free(c);
9c9e467d
MN
1033}
1034
9cb54f43 1035void pp_postprocess(uint8_t * src[3], int srcStride[3],
9c9e467d 1036 uint8_t * dst[3], int dstStride[3],
ec487e5d 1037 int width, int height,
9c9e467d 1038 QP_STORE_T *QP_store, int QPStride,
bb270c08 1039 pp_mode_t *vm, void *vc, int pict_type)
911879d1 1040{
bb270c08
DB
1041 int mbWidth = (width+15)>>4;
1042 int mbHeight= (height+15)>>4;
1043 PPMode *mode = (PPMode*)vm;
1044 PPContext *c = (PPContext*)vc;
c26abfa5
DB
1045 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1046 int absQPStride = FFABS(QPStride);
bb270c08
DB
1047
1048 // c->stride and c->QPStride are always positive
1049 if(c->stride < minStride || c->qpStride < absQPStride)
1050 reallocBuffers(c, width, height,
8925915f
DB
1051 FFMAX(minStride, c->stride),
1052 FFMAX(c->qpStride, absQPStride));
bb270c08
DB
1053
1054 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1055 {
1056 int i;
1057 QP_store= c->forcedQPTable;
1058 absQPStride = QPStride = 0;
1059 if(mode->lumMode & FORCE_QUANT)
1060 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1061 else
1062 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1063 }
0426af31 1064
bb270c08
DB
1065 if(pict_type & PP_PICT_TYPE_QP2){
1066 int i;
1067 const int count= mbHeight * absQPStride;
1068 for(i=0; i<(count>>2); i++){
1069 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1070 }
1071 for(i<<=2; i<count; i++){
1072 c->stdQPTable[i] = QP_store[i]>>1;
1073 }
0426af31 1074 QP_store= c->stdQPTable;
bb270c08
DB
1075 QPStride= absQPStride;
1076 }
0426af31 1077
ec487e5d
MN
1078if(0){
1079int x,y;
1080for(y=0; y<mbHeight; y++){
bb270c08 1081 for(x=0; x<mbWidth; x++){
e7becfb2 1082 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
bb270c08 1083 }
e7becfb2 1084 av_log(c, AV_LOG_INFO, "\n");
ec487e5d 1085}
e7becfb2 1086 av_log(c, AV_LOG_INFO, "\n");
ec487e5d 1087}
51e19dcc 1088
bb270c08
DB
1089 if((pict_type&7)!=3)
1090 {
1091 if (QPStride >= 0) {
1092 int i;
1093 const int count= mbHeight * QPStride;
1094 for(i=0; i<(count>>2); i++){
1095 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1096 }
1097 for(i<<=2; i<count; i++){
1098 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1099 }
1100 } else {
1101 int i,j;
1102 for(i=0; i<mbHeight; i++) {
1103 for(j=0; j<absQPStride; j++) {
1104 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1105 }
1106 }
1107 }
1108 }
1109
e7becfb2
DB
1110 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1111 mode->lumMode, mode->chromMode);
bb270c08
DB
1112
1113 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1114 width, height, QP_store, QPStride, 0, mode, c);
1115
1116 width = (width )>>c->hChromaSubSample;
1117 height = (height)>>c->vChromaSubSample;
1118
1119 if(mode->chromMode)
1120 {
1121 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1122 width, height, QP_store, QPStride, 1, mode, c);
1123 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1124 width, height, QP_store, QPStride, 2, mode, c);
1125 }
1126 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1127 {
1128 linecpy(dst[1], src[1], height, srcStride[1]);
1129 linecpy(dst[2], src[2], height, srcStride[2]);
1130 }
1131 else
1132 {
1133 int y;
1134 for(y=0; y<height; y++)
1135 {
1136 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1137 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1138 }
1139 }
911879d1
MN
1140}
1141