Commit | Line | Data |
---|---|---|
3057fa66 A |
1 | /* |
2 | Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at) | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 | */ | |
18 | ||
19 | /* | |
13e00528 | 20 | C MMX MMX2 3DNow* |
3057fa66 A |
21 | isVertDC Ec Ec |
22 | isVertMinMaxOk Ec Ec | |
13e00528 | 23 | doVertLowPass E e e* |
3057fa66 A |
24 | doVertDefFilter Ec Ec Ec |
25 | isHorizDC Ec Ec | |
26 | isHorizMinMaxOk a | |
13e00528 A |
27 | doHorizLowPass E a a* |
28 | doHorizDefFilter E ac ac | |
3057fa66 A |
29 | deRing |
30 | ||
13e00528 | 31 | * i dont have a 3dnow CPU -> its untested |
3057fa66 A |
32 | E = Exact implementation |
33 | e = allmost exact implementation | |
34 | a = alternative / approximate impl | |
35 | c = checked against the other implementations (-vo md5) | |
36 | */ | |
37 | ||
38 | /* | |
39 | TODO: | |
40 | verify that everything workes as it should | |
41 | reduce the time wasted on the mem transfer | |
42 | implement dering | |
13e00528 | 43 | implement everything in C at least (done at the moment but ...) |
3057fa66 A |
44 | figure range of QP out (assuming <256 for now) |
45 | unroll stuff if instructions depend too much on the prior one | |
46 | we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4? | |
47 | move YScale thing to the end instead of fixing QP | |
13e00528 | 48 | write a faster and higher quality deblocking filter :) |
3057fa66 A |
49 | ... |
50 | ||
51 | Notes: | |
52 | ||
53 | */ | |
54 | ||
13e00528 A |
55 | /* |
56 | Changelog: | |
57 | 0.1.2 | |
58 | fixed a bug in the horizontal default filter | |
59 | 3dnow version of the Horizontal & Vertical Lowpass filters | |
60 | mmx version of the Horizontal Default filter | |
61 | mmx2 & C versions of a simple filter described in a paper from ramkishor & karandikar | |
62 | added mode flags & quality2mode function | |
63 | 0.1.1 | |
64 | */ | |
65 | ||
3057fa66 A |
66 | |
67 | #include <inttypes.h> | |
68 | #include <stdio.h> | |
69 | #include "../config.h" | |
3057fa66 | 70 | //#undef HAVE_MMX2 |
13e00528 | 71 | //#define HAVE_3DNOW |
3057fa66 | 72 | //#undef HAVE_MMX |
13e00528 | 73 | #include "postprocess.h" |
3057fa66 A |
74 | |
75 | ||
76 | static uint64_t packedYOffset= 0x0000000000000000LL; | |
77 | static uint64_t packedYScale= 0x0100010001000100LL; | |
78 | static uint64_t w05= 0x0005000500050005LL; | |
79 | static uint64_t w20= 0x0020002000200020LL; | |
80 | static uint64_t w1400= 0x1400140014001400LL; | |
81 | static uint64_t bm00000001= 0x00000000000000FFLL; | |
82 | static uint64_t bm00010000= 0x000000FF00000000LL; | |
83 | static uint64_t bm00001000= 0x00000000FF000000LL; | |
84 | static uint64_t bm10000000= 0xFF00000000000000LL; | |
85 | static uint64_t bm10000001= 0xFF000000000000FFLL; | |
86 | static uint64_t bm11000011= 0xFFFF00000000FFFFLL; | |
13e00528 A |
87 | static uint64_t bm00000011= 0x000000000000FFFFLL; |
88 | static uint64_t bm11000000= 0xFFFF000000000000LL; | |
3057fa66 A |
89 | static uint64_t bm00011000= 0x000000FFFF000000LL; |
90 | static uint64_t bm00110011= 0x0000FFFF0000FFFFLL; | |
91 | static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; | |
92 | static uint64_t b00= 0x0000000000000000LL; | |
93 | static uint64_t b02= 0x0202020202020202LL; | |
94 | static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; | |
95 | static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; | |
13e00528 A |
96 | static uint64_t b20= 0x2020202020202020LL; |
97 | static uint64_t b80= 0x8080808080808080LL; | |
3057fa66 A |
98 | static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; |
99 | static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; | |
100 | static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL; | |
101 | static uint64_t temp0=0; | |
102 | static uint64_t temp1=0; | |
103 | static uint64_t temp2=0; | |
104 | static uint64_t temp3=0; | |
105 | static uint64_t temp4=0; | |
106 | static uint64_t temp5=0; | |
107 | static uint64_t pQPb=0; | |
108 | static uint8_t tempBlock[16*16]; | |
109 | ||
110 | int hFlatnessThreshold= 56 - 16; | |
111 | int vFlatnessThreshold= 56 - 16; | |
112 | ||
113 | //amount of "black" u r willing to loose to get a brightness corrected picture | |
114 | double maxClippedThreshold= 0.01; | |
115 | ||
116 | int maxAllowedY=255; | |
117 |