BGR24 bugfix
[libav.git] / postproc / postprocess.c
CommitLineData
3057fa66
A
1/*
2 Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
19/*
3b58b885 20 C MMX MMX2 3DNow
3057fa66
A
21isVertDC Ec Ec
22isVertMinMaxOk Ec Ec
3b58b885 23doVertLowPass E e e
3057fa66
A
24doVertDefFilter Ec Ec Ec
25isHorizDC Ec Ec
26isHorizMinMaxOk a
3b58b885 27doHorizLowPass E a a
13e00528 28doHorizDefFilter E ac ac
3057fa66 29deRing
3b58b885
MN
30Vertical RKAlgo1 E a a
31Vertical X1 a E E
32Horizontal X1 a E E
acced553
MN
33LinIpolDeinterlace e E E*
34CubicIpolDeinterlace a e e*
35LinBlendDeinterlace e E E*
a6be8111 36MedianDeinterlace Ec Ec
d5a1a995 37
3057fa66 38
13e00528 39* i dont have a 3dnow CPU -> its untested
3057fa66 40E = Exact implementation
acced553 41e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
42a = alternative / approximate impl
43c = checked against the other implementations (-vo md5)
44*/
45
46/*
47TODO:
9f45d04d 48verify that everything workes as it should (how?)
3057fa66
A
49reduce the time wasted on the mem transfer
50implement dering
13e00528 51implement everything in C at least (done at the moment but ...)
3057fa66
A
52unroll stuff if instructions depend too much on the prior one
53we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4?
54move YScale thing to the end instead of fixing QP
13e00528 55write a faster and higher quality deblocking filter :)
d5a1a995
MN
56do something about the speed of the horizontal filters
57make the mainloop more flexible (variable number of blocks at once
58 (the if/else stuff per block is slowing things down)
9f45d04d 59compare the quality & speed of all filters
9f45d04d 60split this huge file
3b58b885 61fix warnings (unused vars, ...)
a6be8111 62noise reduction filters
3057fa66
A
63...
64
65Notes:
66
13e00528
A
67*/
68
a6be8111 69//Changelog: use the CVS log
3057fa66
A
70
71#include <inttypes.h>
72#include <stdio.h>
d5a1a995 73#include <stdlib.h>
3057fa66 74#include "../config.h"
3057fa66 75//#undef HAVE_MMX2
13e00528 76//#define HAVE_3DNOW
3057fa66 77//#undef HAVE_MMX
13e00528 78#include "postprocess.h"
3057fa66 79
e939e1c3
A
80#define MIN(a,b) ((a) > (b) ? (b) : (a))
81#define MAX(a,b) ((a) < (b) ? (b) : (a))
82#define ABS(a) ((a) > 0 ? (a) : (-(a)))
83#define SIGN(a) ((a) > 0 ? 1 : -1)
84
85#ifdef HAVE_MMX2
86#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
87#elif defined (HAVE_3DNOW)
88#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
89#endif
3057fa66
A
90
91static uint64_t packedYOffset= 0x0000000000000000LL;
92static uint64_t packedYScale= 0x0100010001000100LL;
93static uint64_t w05= 0x0005000500050005LL;
94static uint64_t w20= 0x0020002000200020LL;
95static uint64_t w1400= 0x1400140014001400LL;
96static uint64_t bm00000001= 0x00000000000000FFLL;
97static uint64_t bm00010000= 0x000000FF00000000LL;
98static uint64_t bm00001000= 0x00000000FF000000LL;
99static uint64_t bm10000000= 0xFF00000000000000LL;
100static uint64_t bm10000001= 0xFF000000000000FFLL;
101static uint64_t bm11000011= 0xFFFF00000000FFFFLL;
13e00528 102static uint64_t bm00000011= 0x000000000000FFFFLL;
9f45d04d 103static uint64_t bm11111110= 0xFFFFFFFFFFFFFF00LL;
13e00528 104static uint64_t bm11000000= 0xFFFF000000000000LL;
3057fa66
A
105static uint64_t bm00011000= 0x000000FFFF000000LL;
106static uint64_t bm00110011= 0x0000FFFF0000FFFFLL;
107static uint64_t bm11001100= 0xFFFF0000FFFF0000LL;
108static uint64_t b00= 0x0000000000000000LL;
9f45d04d 109static uint64_t b01= 0x0101010101010101LL;
3057fa66
A
110static uint64_t b02= 0x0202020202020202LL;
111static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL;
112static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL;
13e00528
A
113static uint64_t b20= 0x2020202020202020LL;
114static uint64_t b80= 0x8080808080808080LL;
3057fa66
A
115static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL;
116static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL;
117static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL;
118static uint64_t temp0=0;
119static uint64_t temp1=0;
120static uint64_t temp2=0;
121static uint64_t temp3=0;
122static uint64_t temp4=0;
123static uint64_t temp5=0;
124static uint64_t pQPb=0;
658a85f2 125static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
3057fa66
A
126
127int hFlatnessThreshold= 56 - 16;
128int vFlatnessThreshold= 56 - 16;
129
130//amount of "black" u r willing to loose to get a brightness corrected picture
131double maxClippedThreshold= 0.01;
132
133int maxAllowedY=255;
134