Commit | Line | Data |
---|---|---|
3057fa66 | 1 | /* |
8aaac435 | 2 | Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at) |
3057fa66 A |
3 | |
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 | */ | |
18 | ||
19 | /* | |
3b58b885 | 20 | C MMX MMX2 3DNow |
3057fa66 A |
21 | isVertDC Ec Ec |
22 | isVertMinMaxOk Ec Ec | |
3b58b885 | 23 | doVertLowPass E e e |
7f16f6e6 | 24 | doVertDefFilter Ec Ec e e |
3057fa66 | 25 | isHorizDC Ec Ec |
4e4dcbc5 MN |
26 | isHorizMinMaxOk a E |
27 | doHorizLowPass E e e | |
7f16f6e6 | 28 | doHorizDefFilter Ec Ec e e |
2e212618 | 29 | deRing E e e* |
3b58b885 | 30 | Vertical RKAlgo1 E a a |
e5c30e06 | 31 | Horizontal RKAlgo1 a a |
117e45b0 MN |
32 | Vertical X1# a E E |
33 | Horizontal X1# a E E | |
acced553 MN |
34 | LinIpolDeinterlace e E E* |
35 | CubicIpolDeinterlace a e e* | |
36 | LinBlendDeinterlace e E E* | |
117e45b0 | 37 | MedianDeinterlace# Ec Ec |
be44a4d7 | 38 | TempDeNoiser# E e e |
d5a1a995 | 39 | |
117e45b0 MN |
40 | * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work |
41 | # more or less selfinvented filters so the exactness isnt too meaningfull | |
3057fa66 | 42 | E = Exact implementation |
acced553 | 43 | e = allmost exact implementation (slightly different rounding,...) |
3057fa66 A |
44 | a = alternative / approximate impl |
45 | c = checked against the other implementations (-vo md5) | |
46 | */ | |
47 | ||
48 | /* | |
49 | TODO: | |
3057fa66 | 50 | reduce the time wasted on the mem transfer |
3057fa66 | 51 | unroll stuff if instructions depend too much on the prior one |
3057fa66 | 52 | move YScale thing to the end instead of fixing QP |
13e00528 | 53 | write a faster and higher quality deblocking filter :) |
d5a1a995 MN |
54 | make the mainloop more flexible (variable number of blocks at once |
55 | (the if/else stuff per block is slowing things down) | |
9f45d04d | 56 | compare the quality & speed of all filters |
9f45d04d | 57 | split this huge file |
8405b3fd | 58 | optimize c versions |
117e45b0 | 59 | try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
3057fa66 | 60 | ... |
13e00528 A |
61 | */ |
62 | ||
a6be8111 | 63 | //Changelog: use the CVS log |
3057fa66 | 64 | |
6c426cff | 65 | #include "../config.h" |
3057fa66 A |
66 | #include <inttypes.h> |
67 | #include <stdio.h> | |
d5a1a995 | 68 | #include <stdlib.h> |
911879d1 | 69 | #include <string.h> |
dda87e9f PL |
70 | #ifdef HAVE_MALLOC_H |
71 | #include <malloc.h> | |
72 | #endif | |
3057fa66 | 73 | //#undef HAVE_MMX2 |
13e00528 | 74 | //#define HAVE_3DNOW |
3057fa66 | 75 | //#undef HAVE_MMX |
cc9b0679 | 76 | //#undef ARCH_X86 |
7f16f6e6 | 77 | //#define DEBUG_BRIGHTNESS |
9c9e467d | 78 | #include "../libvo/fastmemcpy.h" |
13e00528 | 79 | #include "postprocess.h" |
9b464428 | 80 | #include "../mangle.h" |
3057fa66 | 81 | |
e939e1c3 A |
82 | #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
83 | #define MAX(a,b) ((a) < (b) ? (b) : (a)) | |
84 | #define ABS(a) ((a) > 0 ? (a) : (-(a))) | |
85 | #define SIGN(a) ((a) > 0 ? 1 : -1) | |
86 | ||
911879d1 MN |
87 | #define GET_MODE_BUFFER_SIZE 500 |
88 | #define OPTIONS_ARRAY_SIZE 10 | |
9c9e467d MN |
89 | #define BLOCK_SIZE 8 |
90 | #define TEMP_STRIDE 8 | |
91 | //#define NUM_BLOCKS_AT_ONCE 16 //not used yet | |
911879d1 | 92 | |
cc9b0679 | 93 | #ifdef ARCH_X86 |
b28daef8 MN |
94 | static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL; |
95 | static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL; | |
b28daef8 MN |
96 | static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL; |
97 | static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL; | |
98 | static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL; | |
b28daef8 | 99 | static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL; |
b28daef8 | 100 | static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL; |
b28daef8 | 101 | #endif |
3057fa66 | 102 | |
df8d4d0e | 103 | static int verbose= 0; |
45b4f285 | 104 | |
df8d4d0e | 105 | static const int deringThreshold= 20; |
3057fa66 | 106 | |
9c9e467d MN |
107 | struct PPFilter{ |
108 | char *shortName; | |
109 | char *longName; | |
110 | int chromDefault; // is chrominance filtering on by default if this filter is manually activated | |
111 | int minLumQuality; // minimum quality to turn luminance filtering on | |
112 | int minChromQuality; // minimum quality to turn chrominance filtering on | |
113 | int mask; // Bitmask to turn this filter on | |
114 | }; | |
115 | ||
116 | typedef struct PPContext{ | |
117 | uint8_t *tempBlocks; //used for the horizontal code | |
118 | ||
119 |