Commit | Line | Data |
---|---|---|
3057fa66 A |
1 | /* |
2 | Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at) | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 | */ | |
18 | ||
19 | /* | |
13e00528 | 20 | C MMX MMX2 3DNow* |
3057fa66 A |
21 | isVertDC Ec Ec |
22 | isVertMinMaxOk Ec Ec | |
13e00528 | 23 | doVertLowPass E e e* |
3057fa66 A |
24 | doVertDefFilter Ec Ec Ec |
25 | isHorizDC Ec Ec | |
26 | isHorizMinMaxOk a | |
13e00528 A |
27 | doHorizLowPass E a a* |
28 | doHorizDefFilter E ac ac | |
3057fa66 | 29 | deRing |
9f45d04d MN |
30 | Vertical RKAlgo1 E a a* |
31 | Vertical X1 a E E* | |
32 | Horizontal X1 a E E* | |
d5a1a995 | 33 | |
3057fa66 | 34 | |
13e00528 | 35 | * i dont have a 3dnow CPU -> its untested |
3057fa66 A |
36 | E = Exact implementation |
37 | e = allmost exact implementation | |
38 | a = alternative / approximate impl | |
39 | c = checked against the other implementations (-vo md5) | |
40 | */ | |
41 | ||
42 | /* | |
43 | TODO: | |
9f45d04d | 44 | verify that everything workes as it should (how?) |
3057fa66 A |
45 | reduce the time wasted on the mem transfer |
46 | implement dering | |
13e00528 | 47 | implement everything in C at least (done at the moment but ...) |
3057fa66 A |
48 | unroll stuff if instructions depend too much on the prior one |
49 | we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4? | |
50 | move YScale thing to the end instead of fixing QP | |
13e00528 | 51 | write a faster and higher quality deblocking filter :) |
d5a1a995 MN |
52 | do something about the speed of the horizontal filters |
53 | make the mainloop more flexible (variable number of blocks at once | |
54 | (the if/else stuff per block is slowing things down) | |
9f45d04d MN |
55 | compare the quality & speed of all filters |
56 | implement a few simple deinterlacing filters | |
57 | split this huge file | |
3057fa66 A |
58 | ... |
59 | ||
60 | Notes: | |
61 | ||
62 | */ | |
63 | ||
13e00528 | 64 | /* |
9f45d04d | 65 | Changelog: use the CVS log |
d5a1a995 MN |
66 | 0.1.3 |
67 | bugfixes: last 3 lines not brightness/contrast corrected | |
68 | brightness statistics messed up with initial black pic | |
69 | changed initial values of the brightness statistics | |
70 | C++ -> C conversation | |
71 | QP range question solved (very likely 1<=QP<=32 according to arpi) | |
72 | new experimental vertical deblocking filter | |
73 | RK filter has 3dNow support now (untested) | |
13e00528 A |
74 | 0.1.2 |
75 | fixed a bug in the horizontal default filter | |
76 | 3dnow version of the Horizontal & Vertical Lowpass filters | |
77 | mmx version of the Horizontal Default filter | |
78 | mmx2 & C versions of a simple filter described in a paper from ramkishor & karandikar | |
79 | added mode flags & quality2mode function | |
80 | 0.1.1 | |
81 | */ | |
82 | ||
3057fa66 A |
83 | |
84 | #include <inttypes.h> | |
85 | #include <stdio.h> | |
d5a1a995 | 86 | #include <stdlib.h> |
3057fa66 | 87 | #include "../config.h" |
3057fa66 | 88 | //#undef HAVE_MMX2 |
13e00528 | 89 | //#define HAVE_3DNOW |
3057fa66 | 90 | //#undef HAVE_MMX |
13e00528 | 91 | #include "postprocess.h" |
3057fa66 A |
92 | |
93 | ||
94 | static uint64_t packedYOffset= 0x0000000000000000LL; | |
95 | static uint64_t packedYScale= 0x0100010001000100LL; | |
96 | static uint64_t w05= 0x0005000500050005LL; | |
97 | static uint64_t w20= 0x0020002000200020LL; | |
98 | static uint64_t w1400= 0x1400140014001400LL; | |
99 | static uint64_t bm00000001= 0x00000000000000FFLL; | |
100 | static uint64_t bm00010000= 0x000000FF00000000LL; | |
101 | static uint64_t bm00001000= 0x00000000FF000000LL; | |
102 | static uint64_t bm10000000= 0xFF00000000000000LL; | |
103 | static uint64_t bm10000001= 0xFF000000000000FFLL; | |
104 | static uint64_t bm11000011= 0xFFFF00000000FFFFLL; | |
13e00528 | 105 | static uint64_t bm00000011= 0x000000000000FFFFLL; |
9f45d04d | 106 | static uint64_t bm11111110= 0xFFFFFFFFFFFFFF00LL; |
13e00528 | 107 | static uint64_t bm11000000= 0xFFFF000000000000LL; |
3057fa66 A |
108 | static uint64_t bm00011000= 0x000000FFFF000000LL; |
109 | static uint64_t bm00110011= 0x0000FFFF0000FFFFLL; | |
110 | static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; | |
111 | static uint64_t b00= 0x0000000000000000LL; | |
9f45d04d | 112 | static uint64_t b01= 0x0101010101010101LL; |
3057fa66 A |
113 | static uint64_t b02= 0x0202020202020202LL; |
114 | static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; | |
115 | static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; | |
13e00528 A |
116 | static uint64_t b20= 0x2020202020202020LL; |
117 | static uint64_t b80= 0x8080808080808080LL; | |
3057fa66 A |
118 | static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; |
119 | static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; | |
120 | static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL; | |
121 | static uint64_t temp0=0; | |
122 | static uint64_t temp1=0; | |
123 | static uint64_t temp2=0; | |
124 | static uint64_t temp3=0; | |
125 | static uint64_t temp4=0; | |
126 | static uint64_t temp5=0; | |
127 | static uint64_t pQPb=0; | |
128 | static uint8_t tempBlock[16*16]; | |
129 | ||
130 | int hFlatnessThreshold= 56 - 16; | |
131 | int vFlatnessThreshold= 56 - 16; | |
132 | ||
133 | //amount of "black" u r willing to loose to get a brightness corrected picture | |
134 | double maxClippedThreshold= 0.01; | |
135 | ||
136 | int maxAllowedY=255; | |
137 |