Commit | Line | Data |
---|---|---|
3057fa66 A |
1 | /* |
2 | Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at) | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 | */ | |
18 | ||
19 | /* | |
3b58b885 | 20 | C MMX MMX2 3DNow |
3057fa66 A |
21 | isVertDC Ec Ec |
22 | isVertMinMaxOk Ec Ec | |
3b58b885 | 23 | doVertLowPass E e e |
3057fa66 A |
24 | doVertDefFilter Ec Ec Ec |
25 | isHorizDC Ec Ec | |
26 | isHorizMinMaxOk a | |
3b58b885 | 27 | doHorizLowPass E a a |
13e00528 | 28 | doHorizDefFilter E ac ac |
3057fa66 | 29 | deRing |
3b58b885 MN |
30 | Vertical RKAlgo1 E a a |
31 | Vertical X1 a E E | |
32 | Horizontal X1 a E E | |
acced553 MN |
33 | LinIpolDeinterlace e E E* |
34 | CubicIpolDeinterlace a e e* | |
35 | LinBlendDeinterlace e E E* | |
a6be8111 | 36 | MedianDeinterlace Ec Ec |
d5a1a995 | 37 | |
3057fa66 | 38 | |
13e00528 | 39 | * i dont have a 3dnow CPU -> its untested |
3057fa66 | 40 | E = Exact implementation |
acced553 | 41 | e = allmost exact implementation (slightly different rounding,...) |
3057fa66 A |
42 | a = alternative / approximate impl |
43 | c = checked against the other implementations (-vo md5) | |
44 | */ | |
45 | ||
46 | /* | |
47 | TODO: | |
9f45d04d | 48 | verify that everything workes as it should (how?) |
3057fa66 A |
49 | reduce the time wasted on the mem transfer |
50 | implement dering | |
13e00528 | 51 | implement everything in C at least (done at the moment but ...) |
3057fa66 A |
52 | unroll stuff if instructions depend too much on the prior one |
53 | we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4? | |
54 | move YScale thing to the end instead of fixing QP | |
13e00528 | 55 | write a faster and higher quality deblocking filter :) |
d5a1a995 MN |
56 | do something about the speed of the horizontal filters |
57 | make the mainloop more flexible (variable number of blocks at once | |
58 | (the if/else stuff per block is slowing things down) | |
9f45d04d | 59 | compare the quality & speed of all filters |
9f45d04d | 60 | split this huge file |
3b58b885 | 61 | fix warnings (unused vars, ...) |
a6be8111 | 62 | noise reduction filters |
3057fa66 A |
63 | ... |
64 | ||
65 | Notes: | |
66 | ||
13e00528 A |
67 | */ |
68 | ||
a6be8111 | 69 | //Changelog: use the CVS log |
3057fa66 A |
70 | |
71 | #include <inttypes.h> | |
72 | #include <stdio.h> | |
d5a1a995 | 73 | #include <stdlib.h> |
3057fa66 | 74 | #include "../config.h" |
3057fa66 | 75 | //#undef HAVE_MMX2 |
13e00528 | 76 | //#define HAVE_3DNOW |
3057fa66 | 77 | //#undef HAVE_MMX |
13e00528 | 78 | #include "postprocess.h" |
3057fa66 | 79 | |
e939e1c3 A |
80 | #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
81 | #define MAX(a,b) ((a) < (b) ? (b) : (a)) | |
82 | #define ABS(a) ((a) > 0 ? (a) : (-(a))) | |
83 | #define SIGN(a) ((a) > 0 ? 1 : -1) | |
84 | ||
85 | #ifdef HAVE_MMX2 | |
86 | #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" | |
87 | #elif defined (HAVE_3DNOW) | |
88 | #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" | |
89 | #endif | |
3057fa66 A |
90 | |
91 | static uint64_t packedYOffset= 0x0000000000000000LL; | |
92 | static uint64_t packedYScale= 0x0100010001000100LL; | |
93 | static uint64_t w05= 0x0005000500050005LL; | |
94 | static uint64_t w20= 0x0020002000200020LL; | |
95 | static uint64_t w1400= 0x1400140014001400LL; | |
96 | static uint64_t bm00000001= 0x00000000000000FFLL; | |
97 | static uint64_t bm00010000= 0x000000FF00000000LL; | |
98 | static uint64_t bm00001000= 0x00000000FF000000LL; | |
99 | static uint64_t bm10000000= 0xFF00000000000000LL; | |
100 | static uint64_t bm10000001= 0xFF000000000000FFLL; | |
101 | static uint64_t bm11000011= 0xFFFF00000000FFFFLL; | |
13e00528 | 102 | static uint64_t bm00000011= 0x000000000000FFFFLL; |
9f45d04d | 103 | static uint64_t bm11111110= 0xFFFFFFFFFFFFFF00LL; |
13e00528 | 104 | static uint64_t bm11000000= 0xFFFF000000000000LL; |
3057fa66 A |
105 | static uint64_t bm00011000= 0x000000FFFF000000LL; |
106 | static uint64_t bm00110011= 0x0000FFFF0000FFFFLL; | |
107 | static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; | |
108 | static uint64_t b00= 0x0000000000000000LL; | |
9f45d04d | 109 | static uint64_t b01= 0x0101010101010101LL; |
3057fa66 A |
110 | static uint64_t b02= 0x0202020202020202LL; |
111 | static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; | |
112 | static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; | |
13e00528 A |
113 | static uint64_t b20= 0x2020202020202020LL; |
114 | static uint64_t b80= 0x8080808080808080LL; | |
3057fa66 A |
115 | static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; |
116 | static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; | |
117 | static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL; | |
118 | static uint64_t temp0=0; | |
119 | static uint64_t temp1=0; | |
120 | static uint64_t temp2=0; | |
121 | static uint64_t temp3=0; | |
122 | static uint64_t temp4=0; | |
123 | static uint64_t temp5=0; | |
124 | static uint64_t pQPb=0; | |
658a85f2 | 125 | static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data |
3057fa66 A |
126 | |
127 | int hFlatnessThreshold= 56 - 16; | |
128 | int vFlatnessThreshold= 56 - 16; | |
129 | ||
130 | //amount of "black" u r willing to loose to get a brightness corrected picture | |
131 | double maxClippedThreshold= 0.01; | |
132 | ||
133 | int maxAllowedY=255; | |
134 |