Commit | Line | Data |
---|---|---|
3057fa66 A |
1 | /* |
2 | Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at) | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 | */ | |
18 | ||
19 | /* | |
13e00528 | 20 | C MMX MMX2 3DNow* |
3057fa66 A |
21 | isVertDC Ec Ec |
22 | isVertMinMaxOk Ec Ec | |
13e00528 | 23 | doVertLowPass E e e* |
3057fa66 A |
24 | doVertDefFilter Ec Ec Ec |
25 | isHorizDC Ec Ec | |
26 | isHorizMinMaxOk a | |
13e00528 A |
27 | doHorizLowPass E a a* |
28 | doHorizDefFilter E ac ac | |
3057fa66 | 29 | deRing |
d5a1a995 MN |
30 | RKAlgo1 E a a* |
31 | X1 a E E* | |
32 | ||
3057fa66 | 33 | |
13e00528 | 34 | * i dont have a 3dnow CPU -> its untested |
3057fa66 A |
35 | E = Exact implementation |
36 | e = allmost exact implementation | |
37 | a = alternative / approximate impl | |
38 | c = checked against the other implementations (-vo md5) | |
39 | */ | |
40 | ||
41 | /* | |
42 | TODO: | |
43 | verify that everything workes as it should | |
44 | reduce the time wasted on the mem transfer | |
45 | implement dering | |
13e00528 | 46 | implement everything in C at least (done at the moment but ...) |
3057fa66 A |
47 | unroll stuff if instructions depend too much on the prior one |
48 | we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4? | |
49 | move YScale thing to the end instead of fixing QP | |
13e00528 | 50 | write a faster and higher quality deblocking filter :) |
d5a1a995 MN |
51 | do something about the speed of the horizontal filters |
52 | make the mainloop more flexible (variable number of blocks at once | |
53 | (the if/else stuff per block is slowing things down) | |
3057fa66 A |
54 | ... |
55 | ||
56 | Notes: | |
57 | ||
58 | */ | |
59 | ||
13e00528 A |
60 | /* |
61 | Changelog: | |
d5a1a995 MN |
62 | 0.1.3 |
63 | bugfixes: last 3 lines not brightness/contrast corrected | |
64 | brightness statistics messed up with initial black pic | |
65 | changed initial values of the brightness statistics | |
66 | C++ -> C conversation | |
67 | QP range question solved (very likely 1<=QP<=32 according to arpi) | |
68 | new experimental vertical deblocking filter | |
69 | RK filter has 3dNow support now (untested) | |
13e00528 A |
70 | 0.1.2 |
71 | fixed a bug in the horizontal default filter | |
72 | 3dnow version of the Horizontal & Vertical Lowpass filters | |
73 | mmx version of the Horizontal Default filter | |
74 | mmx2 & C versions of a simple filter described in a paper from ramkishor & karandikar | |
75 | added mode flags & quality2mode function | |
76 | 0.1.1 | |
77 | */ | |
78 | ||
3057fa66 A |
79 | |
80 | #include <inttypes.h> | |
81 | #include <stdio.h> | |
d5a1a995 | 82 | #include <stdlib.h> |
3057fa66 | 83 | #include "../config.h" |
3057fa66 | 84 | //#undef HAVE_MMX2 |
13e00528 | 85 | //#define HAVE_3DNOW |
3057fa66 | 86 | //#undef HAVE_MMX |
13e00528 | 87 | #include "postprocess.h" |
3057fa66 A |
88 | |
89 | ||
90 | static uint64_t packedYOffset= 0x0000000000000000LL; | |
91 | static uint64_t packedYScale= 0x0100010001000100LL; | |
92 | static uint64_t w05= 0x0005000500050005LL; | |
93 | static uint64_t w20= 0x0020002000200020LL; | |
94 | static uint64_t w1400= 0x1400140014001400LL; | |
95 | static uint64_t bm00000001= 0x00000000000000FFLL; | |
96 | static uint64_t bm00010000= 0x000000FF00000000LL; | |
97 | static uint64_t bm00001000= 0x00000000FF000000LL; | |
98 | static uint64_t bm10000000= 0xFF00000000000000LL; | |
99 | static uint64_t bm10000001= 0xFF000000000000FFLL; | |
100 | static uint64_t bm11000011= 0xFFFF00000000FFFFLL; | |
13e00528 A |
101 | static uint64_t bm00000011= 0x000000000000FFFFLL; |
102 | static uint64_t bm11000000= 0xFFFF000000000000LL; | |
3057fa66 A |
103 | static uint64_t bm00011000= 0x000000FFFF000000LL; |
104 | static uint64_t bm00110011= 0x0000FFFF0000FFFFLL; | |
105 | static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; | |
106 | static uint64_t b00= 0x0000000000000000LL; | |
107 | static uint64_t b02= 0x0202020202020202LL; | |
108 | static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; | |
109 | static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; | |
13e00528 A |
110 | static uint64_t b20= 0x2020202020202020LL; |
111 | static uint64_t b80= 0x8080808080808080LL; | |
3057fa66 A |
112 | static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; |
113 | static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; | |
114 | static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL; | |
115 | static uint64_t temp0=0; | |
116 | static uint64_t temp1=0; | |
117 | static uint64_t temp2=0; | |
118 | static uint64_t temp3=0; | |
119 | static uint64_t temp4=0; | |
120 | static uint64_t temp5=0; | |
121 | static uint64_t pQPb=0; | |
122 | static uint8_t tempBlock[16*16]; | |
123 | ||
124 | int hFlatnessThreshold= 56 - 16; | |
125 | int vFlatnessThreshold= 56 - 16; | |
126 | ||
127 | //amount of "black" u r willing to loose to get a brightness corrected picture | |
128 | double maxClippedThreshold= 0.01; | |
129 | ||
130 | int maxAllowedY=255; | |
131 |