header for swscale.c
[libav.git] / postproc / postprocess.c
CommitLineData
3057fa66
A
1/*
2 Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
19/*
3b58b885 20 C MMX MMX2 3DNow
3057fa66
A
21isVertDC Ec Ec
22isVertMinMaxOk Ec Ec
3b58b885 23doVertLowPass E e e
3057fa66
A
24doVertDefFilter Ec Ec Ec
25isHorizDC Ec Ec
26isHorizMinMaxOk a
3b58b885 27doHorizLowPass E a a
13e00528 28doHorizDefFilter E ac ac
3057fa66 29deRing
3b58b885
MN
30Vertical RKAlgo1 E a a
31Vertical X1 a E E
32Horizontal X1 a E E
33LinIpolDeinterlace a E E*
34LinBlendDeinterlace a E E*
35MedianDeinterlace a E
d5a1a995 36
3057fa66 37
13e00528 38* i dont have a 3dnow CPU -> its untested
3057fa66
A
39E = Exact implementation
40e = allmost exact implementation
41a = alternative / approximate impl
42c = checked against the other implementations (-vo md5)
43*/
44
45/*
46TODO:
9f45d04d 47verify that everything workes as it should (how?)
3057fa66
A
48reduce the time wasted on the mem transfer
49implement dering
13e00528 50implement everything in C at least (done at the moment but ...)
3057fa66
A
51unroll stuff if instructions depend too much on the prior one
52we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4?
53move YScale thing to the end instead of fixing QP
13e00528 54write a faster and higher quality deblocking filter :)
d5a1a995
MN
55do something about the speed of the horizontal filters
56make the mainloop more flexible (variable number of blocks at once
57 (the if/else stuff per block is slowing things down)
9f45d04d
MN
58compare the quality & speed of all filters
59implement a few simple deinterlacing filters
60split this huge file
3b58b885 61fix warnings (unused vars, ...)
3057fa66
A
62...
63
64Notes:
65
66*/
67
13e00528 68/*
9f45d04d 69Changelog: use the CVS log
3b58b885
MN
70rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
71added deinterlace filters (linear interpolate, linear blend, median)
72minor cleanups (removed some outcommented stuff)
d5a1a995
MN
730.1.3
74 bugfixes: last 3 lines not brightness/contrast corrected
75 brightness statistics messed up with initial black pic
76 changed initial values of the brightness statistics
77 C++ -> C conversation
78 QP range question solved (very likely 1<=QP<=32 according to arpi)
79 new experimental vertical deblocking filter
80 RK filter has 3dNow support now (untested)
13e00528
A
810.1.2
82 fixed a bug in the horizontal default filter
83 3dnow version of the Horizontal & Vertical Lowpass filters
84 mmx version of the Horizontal Default filter
85 mmx2 & C versions of a simple filter described in a paper from ramkishor & karandikar
86 added mode flags & quality2mode function
870.1.1
88*/
89
3057fa66
A
90
91#include <inttypes.h>
92#include <stdio.h>
d5a1a995 93#include <stdlib.h>
3057fa66 94#include "../config.h"
3057fa66 95//#undef HAVE_MMX2
13e00528 96//#define HAVE_3DNOW
3057fa66 97//#undef HAVE_MMX
13e00528 98#include "postprocess.h"
3057fa66 99
e939e1c3
A
100#define MIN(a,b) ((a) > (b) ? (b) : (a))
101#define MAX(a,b) ((a) < (b) ? (b) : (a))
102#define ABS(a) ((a) > 0 ? (a) : (-(a)))
103#define SIGN(a) ((a) > 0 ? 1 : -1)
104
105#ifdef HAVE_MMX2
106#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
107#elif defined (HAVE_3DNOW)
108#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
109#endif
3057fa66
A
110
111static uint64_t packedYOffset= 0x0000000000000000LL;
112static uint64_t packedYScale= 0x0100010001000100LL;
113static uint64_t w05= 0x0005000500050005LL;
114static uint64_t w20= 0x0020002000200020LL;
115static uint64_t w1400= 0x1400140014001400LL;
116static uint64_t bm00000001= 0x00000000000000FFLL;
117static uint64_t bm00010000= 0x000000FF00000000LL;
118static uint64_t bm00001000= 0x00000000FF000000LL;
119static uint64_t bm10000000= 0xFF00000000000000LL;
120static uint64_t bm10000001= 0xFF000000000000FFLL;
121static uint64_t bm11000011= 0xFFFF00000000FFFFLL;
13e00528 122static uint64_t bm00000011= 0x000000000000FFFFLL;
9f45d04d 123static uint64_t bm11111110= 0xFFFFFFFFFFFFFF00LL;
13e00528 124static uint64_t bm11000000= 0xFFFF000000000000LL;
3057fa66
A
125static uint64_t bm00011000= 0x000000FFFF000000LL;
126static uint64_t bm00110011= 0x0000FFFF0000FFFFLL;
127static uint64_t bm11001100= 0xFFFF0000FFFF0000LL;
128static uint64_t b00= 0x0000000000000000LL;
9f45d04d 129static uint64_t b01= 0x0101010101010101LL;
3057fa66
A
130static uint64_t b02= 0x0202020202020202LL;
131static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL;
132static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL;
13e00528
A
133static uint64_t b20= 0x2020202020202020LL;
134static uint64_t b80= 0x8080808080808080LL;
3057fa66
A
135static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL;
136static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL;
137static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL;
138static uint64_t temp0=0;
139static uint64_t temp1=0;
140static uint64_t temp2=0;
141static uint64_t temp3=0;
142static uint64_t temp4=0;
143static uint64_t temp5=0;
144static uint64_t pQPb=0;
145static uint8_t tempBlock[16*16];
146
147int hFlatnessThreshold= 56 - 16;
148int vFlatnessThreshold= 56 - 16;
149
150//amount of "black" u r willing to loose to get a brightness corrected picture
151double maxClippedThreshold= 0.01;
152
153int maxAllowedY=255;
154