Commit | Line | Data |
---|---|---|
3057fa66 A |
1 | /* |
2 | Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at) | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 | */ | |
18 | ||
19 | /* | |
3b58b885 | 20 | C MMX MMX2 3DNow |
3057fa66 A |
21 | isVertDC Ec Ec |
22 | isVertMinMaxOk Ec Ec | |
3b58b885 | 23 | doVertLowPass E e e |
3057fa66 A |
24 | doVertDefFilter Ec Ec Ec |
25 | isHorizDC Ec Ec | |
26 | isHorizMinMaxOk a | |
3b58b885 | 27 | doHorizLowPass E a a |
13e00528 | 28 | doHorizDefFilter E ac ac |
3057fa66 | 29 | deRing |
3b58b885 MN |
30 | Vertical RKAlgo1 E a a |
31 | Vertical X1 a E E | |
32 | Horizontal X1 a E E | |
33 | LinIpolDeinterlace a E E* | |
34 | LinBlendDeinterlace a E E* | |
35 | MedianDeinterlace a E | |
d5a1a995 | 36 | |
3057fa66 | 37 | |
13e00528 | 38 | * i dont have a 3dnow CPU -> its untested |
3057fa66 A |
39 | E = Exact implementation |
40 | e = allmost exact implementation | |
41 | a = alternative / approximate impl | |
42 | c = checked against the other implementations (-vo md5) | |
43 | */ | |
44 | ||
45 | /* | |
46 | TODO: | |
9f45d04d | 47 | verify that everything workes as it should (how?) |
3057fa66 A |
48 | reduce the time wasted on the mem transfer |
49 | implement dering | |
13e00528 | 50 | implement everything in C at least (done at the moment but ...) |
3057fa66 A |
51 | unroll stuff if instructions depend too much on the prior one |
52 | we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4? | |
53 | move YScale thing to the end instead of fixing QP | |
13e00528 | 54 | write a faster and higher quality deblocking filter :) |
d5a1a995 MN |
55 | do something about the speed of the horizontal filters |
56 | make the mainloop more flexible (variable number of blocks at once | |
57 | (the if/else stuff per block is slowing things down) | |
9f45d04d MN |
58 | compare the quality & speed of all filters |
59 | implement a few simple deinterlacing filters | |
60 | split this huge file | |
3b58b885 | 61 | fix warnings (unused vars, ...) |
3057fa66 A |
62 | ... |
63 | ||
64 | Notes: | |
65 | ||
66 | */ | |
67 | ||
13e00528 | 68 | /* |
9f45d04d | 69 | Changelog: use the CVS log |
3b58b885 MN |
70 | rewrote the horizontal lowpass filter to fix a bug which caused a blocky look |
71 | added deinterlace filters (linear interpolate, linear blend, median) | |
72 | minor cleanups (removed some outcommented stuff) | |
d5a1a995 MN |
73 | 0.1.3 |
74 | bugfixes: last 3 lines not brightness/contrast corrected | |
75 | brightness statistics messed up with initial black pic | |
76 | changed initial values of the brightness statistics | |
77 | C++ -> C conversation | |
78 | QP range question solved (very likely 1<=QP<=32 according to arpi) | |
79 | new experimental vertical deblocking filter | |
80 | RK filter has 3dNow support now (untested) | |
13e00528 A |
81 | 0.1.2 |
82 | fixed a bug in the horizontal default filter | |
83 | 3dnow version of the Horizontal & Vertical Lowpass filters | |
84 | mmx version of the Horizontal Default filter | |
85 | mmx2 & C versions of a simple filter described in a paper from ramkishor & karandikar | |
86 | added mode flags & quality2mode function | |
87 | 0.1.1 | |
88 | */ | |
89 | ||
3057fa66 A |
90 | |
91 | #include <inttypes.h> | |
92 | #include <stdio.h> | |
d5a1a995 | 93 | #include <stdlib.h> |
3057fa66 | 94 | #include "../config.h" |
3057fa66 | 95 | //#undef HAVE_MMX2 |
13e00528 | 96 | //#define HAVE_3DNOW |
3057fa66 | 97 | //#undef HAVE_MMX |
13e00528 | 98 | #include "postprocess.h" |
3057fa66 | 99 | |
e939e1c3 A |
100 | #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
101 | #define MAX(a,b) ((a) < (b) ? (b) : (a)) | |
102 | #define ABS(a) ((a) > 0 ? (a) : (-(a))) | |
103 | #define SIGN(a) ((a) > 0 ? 1 : -1) | |
104 | ||
105 | #ifdef HAVE_MMX2 | |
106 | #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" | |
107 | #elif defined (HAVE_3DNOW) | |
108 | #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" | |
109 | #endif | |
3057fa66 A |
110 | |
111 | static uint64_t packedYOffset= 0x0000000000000000LL; | |
112 | static uint64_t packedYScale= 0x0100010001000100LL; | |
113 | static uint64_t w05= 0x0005000500050005LL; | |
114 | static uint64_t w20= 0x0020002000200020LL; | |
115 | static uint64_t w1400= 0x1400140014001400LL; | |
116 | static uint64_t bm00000001= 0x00000000000000FFLL; | |
117 | static uint64_t bm00010000= 0x000000FF00000000LL; | |
118 | static uint64_t bm00001000= 0x00000000FF000000LL; | |
119 | static uint64_t bm10000000= 0xFF00000000000000LL; | |
120 | static uint64_t bm10000001= 0xFF000000000000FFLL; | |
121 | static uint64_t bm11000011= 0xFFFF00000000FFFFLL; | |
13e00528 | 122 | static uint64_t bm00000011= 0x000000000000FFFFLL; |
9f45d04d | 123 | static uint64_t bm11111110= 0xFFFFFFFFFFFFFF00LL; |
13e00528 | 124 | static uint64_t bm11000000= 0xFFFF000000000000LL; |
3057fa66 A |
125 | static uint64_t bm00011000= 0x000000FFFF000000LL; |
126 | static uint64_t bm00110011= 0x0000FFFF0000FFFFLL; | |
127 | static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; | |
128 | static uint64_t b00= 0x0000000000000000LL; | |
9f45d04d | 129 | static uint64_t b01= 0x0101010101010101LL; |
3057fa66 A |
130 | static uint64_t b02= 0x0202020202020202LL; |
131 | static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; | |
132 | static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; | |
13e00528 A |
133 | static uint64_t b20= 0x2020202020202020LL; |
134 | static uint64_t b80= 0x8080808080808080LL; | |
3057fa66 A |
135 | static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; |
136 | static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; | |
137 | static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL; | |
138 | static uint64_t temp0=0; | |
139 | static uint64_t temp1=0; | |
140 | static uint64_t temp2=0; | |
141 | static uint64_t temp3=0; | |
142 | static uint64_t temp4=0; | |
143 | static uint64_t temp5=0; | |
144 | static uint64_t pQPb=0; | |
145 | static uint8_t tempBlock[16*16]; | |
146 | ||
147 | int hFlatnessThreshold= 56 - 16; | |
148 | int vFlatnessThreshold= 56 - 16; | |
149 | ||
150 | //amount of "black" u r willing to loose to get a brightness corrected picture | |
151 | double maxClippedThreshold= 0.01; | |
152 | ||
153 | int maxAllowedY=255; | |
154 |