Commit | Line | Data |
---|---|---|
791e7b83 MN |
1 | /* |
2 | * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> | |
3 | * | |
4 | * This library is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU Lesser General Public | |
6 | * License as published by the Free Software Foundation; either | |
7 | * version 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * This library is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * Lesser General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU Lesser General Public | |
15 | * License along with this library; if not, write to the Free Software | |
5509bffa | 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
791e7b83 MN |
17 | */ |
18 | ||
19 | #include "avcodec.h" | |
20 | #include "common.h" | |
21 | #include "dsputil.h" | |
28869757 MN |
22 | |
23 | #include "rangecoder.h" | |
24 | #define MID_STATE 128 | |
791e7b83 MN |
25 | |
26 | #include "mpegvideo.h" | |
27 | ||
28 | #undef NDEBUG | |
29 | #include <assert.h> | |
30 | ||
31 | #define MAX_DECOMPOSITIONS 8 | |
32 | #define MAX_PLANES 4 | |
33 | #define DWTELEM int | |
a0a74ad9 | 34 | #define QSHIFT 5 |
c97de57c | 35 | #define QROOT (1<<QSHIFT) |
93fbdb5a | 36 | #define LOSSLESS_QLOG -128 |
034aff03 | 37 | #define FRAC_BITS 8 |
791e7b83 MN |
38 | |
39 | static const int8_t quant3[256]={ | |
40 | 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
43 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
44 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
45 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
46 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
47 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
48 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
49 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
50 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
51 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
52 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
53 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
54 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
55 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, | |
56 | }; | |
57 | static const int8_t quant3b[256]={ | |
58 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
59 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
60 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
61 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
62 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
63 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
64 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
65 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
66 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
67 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
68 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
69 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
70 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
71 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
72 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
73 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
74 | }; | |
538a3841 MN |
75 | static const int8_t quant3bA[256]={ |
76 | 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
77 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
78 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
79 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
80 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
81 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
82 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
83 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
84 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
85 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
86 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
87 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
88 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
89 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
90 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
91 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
92 | }; | |
791e7b83 MN |
93 | static const int8_t quant5[256]={ |
94 | 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
95 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
96 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
97 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
98 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
99 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
100 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
101 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
102 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
103 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
104 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
105 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
106 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
107 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
108 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
109 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, | |
110 | }; | |
111 | static const int8_t quant7[256]={ | |
112 | 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
113 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
114 | 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, | |
115 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
116 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
117 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
118 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
119 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
120 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
121 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
122 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
123 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
124 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
125 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, | |
126 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
127 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, | |
128 | }; | |
129 | static const int8_t quant9[256]={ | |
130 | 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
131 | 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
132 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
133 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
134 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
135 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
136 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
137 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
138 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
139 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
140 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
141 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
142 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
143 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
144 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, | |
145 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, | |
146 | }; | |
147 | static const int8_t quant11[256]={ | |
148 | 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, | |
149 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
150 | 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
151 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
152 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
153 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
154 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
155 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
156 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
157 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
158 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
159 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
160 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
161 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, | |
162 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
163 | -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, | |
164 | }; | |
165 | static const int8_t quant13[256]={ | |
166 | 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |
167 | 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
168 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
169 | 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
170 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
171 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
172 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
173 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
174 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
175 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
176 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
177 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
178 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, | |
179 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
180 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
181 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, | |
182 | }; | |
183 | ||
715a97f0 MN |
184 | #define LOG2_OBMC_MAX 6 |
185 | #define OBMC_MAX (1<<(LOG2_OBMC_MAX)) | |
791e7b83 MN |
186 | #if 0 //64*cubic |
187 | static const uint8_t obmc32[1024]={ | |
188 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
189 | 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
190 | 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
191 | 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, | |
192 | 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0, | |
193 | 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0, | |
194 | 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0, | |
195 | 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0, | |
196 | 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0, | |
197 | 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0, | |
198 | 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0, | |
199 | 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0, | |
200 | 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0, | |
201 | 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0, | |
202 | 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0, | |
203 | 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0, | |
204 | 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0, | |
205 | 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0, | |
206 | 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0, | |
207 | 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0, | |
208 | 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0, | |
209 | 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0, | |
210 | 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0, | |
211 | 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0, | |
212 | 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0, | |
213 | 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0, | |
214 | 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0, | |
215 | 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0, | |
216 | 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, | |
217 | 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
218 | 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
219 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
220 | //error:0.000022 | |
221 | }; | |
222 | static const uint8_t obmc16[256]={ | |
223 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
224 | 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0, | |
225 | 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0, | |
226 | 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0, | |
227 | 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0, | |
228 | 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0, | |
229 | 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1, | |
230 | 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1, | |
231 | 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1, | |
232 | 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1, | |
233 | 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0, | |
234 | 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0, | |
235 | 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0, | |
236 | 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0, | |
237 | 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0, | |
238 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
239 | //error:0.000033 | |
240 | }; | |
241 | #elif 1 // 64*linear | |
242 | static const uint8_t obmc32[1024]={ | |
243 | 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, | |
244 | 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0, | |
245 | 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0, | |
246 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
247 | 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1, | |
248 | 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1, | |
249 | 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1, | |
250 | 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1, | |
251 | 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1, | |
252 | 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1, | |
253 | 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1, | |
254 | 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1, | |
255 | 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2, | |
256 | 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2, | |
257 | 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2, | |
258 | 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2, | |
259 | 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2, | |
260 | 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2, | |
261 | 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2, | |
262 | 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2, | |
263 | 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1, | |
264 | 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1, | |
265 | 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1, | |
266 | 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1, | |
267 | 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1, | |
268 | 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1, | |
269 | 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1, | |
270 | 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1, | |
271 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
272 | 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0, | |
273 | 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0, | |
274 | 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, | |
275 | //error:0.000020 | |
276 | }; | |
277 | static const uint8_t obmc16[256]={ | |
278 | 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0, | |
279 | 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1, | |
280 | 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1, | |
281 | 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2, | |
282 | 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2, | |
283 | 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3, | |
284 | 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3, | |
285 | 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4, | |
286 | 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4, | |
287 | 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3, | |
288 | 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3, | |
289 | 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2, | |
290 | 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2, | |
291 | 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1, | |
292 | 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1, | |
293 | 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0, | |
294 | //error:0.000015 | |
295 | }; | |
296 | #else //64*cos | |
297 | static const uint8_t obmc32[1024]={ | |
298 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
299 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
300 | 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
301 | 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0, | |
302 | 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, | |
303 | 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0, | |
304 | 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0, | |
305 | 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0, | |
306 | 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0, | |
307 | 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0, | |
308 | 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0, | |
309 | 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0, | |
310 | 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0, | |
311 | 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0, | |
312 | 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0, | |
313 | 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0, | |
314 | 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0, | |
315 | 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0, | |
316 | 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0, | |
317 | 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0, | |
318 | 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0, | |
319 | 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0, | |
320 | 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0, | |
321 | 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0, | |
322 | 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0, | |
323 | 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0, | |
324 | 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0, | |
325 | 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, | |
326 | 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0, | |
327 | 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
328 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
329 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
330 | //error:0.000022 | |
331 | }; | |
332 | static const uint8_t obmc16[256]={ | |
333 | 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
334 | 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0, | |
335 | 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0, | |
336 | 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0, | |
337 | 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0, | |
338 | 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1, | |
339 | 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1, | |
340 | 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0, | |
341 | 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0, | |
342 | 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1, | |
343 | 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1, | |
344 | 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0, | |
345 | 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0, | |
346 | 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0, | |
347 | 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0, | |
348 | 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
349 | //error:0.000022 | |
350 | }; | |
351 | #endif | |
352 | ||
155ec6ed MN |
353 | //linear *64 |
354 | static const uint8_t obmc8[64]={ | |
355 | 1, 3, 5, 7, 7, 5, 3, 1, | |
356 | 3, 9,15,21,21,15, 9, 3, | |
357 | 5,15,25,35,35,25,15, 5, | |
358 | 7,21,35,49,49,35,21, 7, | |
359 | 7,21,35,49,49,35,21, 7, | |
360 | 5,15,25,35,35,25,15, 5, | |
361 | 3, 9,15,21,21,15, 9, 3, | |
362 | 1, 3, 5, 7, 7, 5, 3, 1, | |
363 | //error:0.000000 | |
364 | }; | |
365 | ||
366 | //linear *64 | |
367 | static const uint8_t obmc4[16]={ | |
368 | 4,12,12, 4, | |
369 | 12,36,36,12, | |
370 | 12,36,36,12, | |
371 | 4,12,12, 4, | |
372 | //error:0.000000 | |
373 | }; | |
374 | ||
375 | static const uint8_t *obmc_tab[4]={ | |
376 | obmc32, obmc16, obmc8, obmc4 | |
377 | }; | |
378 | ||
379 | typedef struct BlockNode{ | |
380 | int16_t mx; | |
381 | int16_t my; | |
382 | uint8_t color[3]; | |
383 | uint8_t type; | |
384 | //#define TYPE_SPLIT 1 | |
385 | #define BLOCK_INTRA 1 | |
51d6a3cf | 386 | #define BLOCK_OPT 2 |
155ec6ed MN |
387 | //#define TYPE_NOCOLOR 4 |
388 | uint8_t level; //FIXME merge into type? | |
389 | }BlockNode; | |
390 | ||
51d6a3cf MN |
391 | static const BlockNode null_block= { //FIXME add border maybe |
392 | .color= {128,128,128}, | |
393 | .mx= 0, | |
394 | .my= 0, | |
395 | .type= 0, | |
396 | .level= 0, | |
397 | }; | |
398 | ||
155ec6ed MN |
399 | #define LOG2_MB_SIZE 4 |
400 | #define MB_SIZE (1<<LOG2_MB_SIZE) | |
401 | ||
a0d1931c Y |
402 | typedef struct x_and_coeff{ |
403 | int16_t x; | |
538a3841 | 404 | uint16_t coeff; |
a0d1931c Y |
405 | } x_and_coeff; |
406 | ||
791e7b83 MN |
407 | typedef struct SubBand{ |
408 | int level; | |
409 | int stride; | |
410 | int width; | |
411 | int height; | |
412 | int qlog; ///< log(qscale)/log[2^(1/6)] | |
413 | DWTELEM *buf; | |
a0d1931c Y |
414 | int buf_x_offset; |
415 | int buf_y_offset; | |
416 | int stride_line; ///< Stride measured in lines, not pixels. | |
417 | x_and_coeff * x_coeff; | |
791e7b83 MN |
418 | struct SubBand *parent; |
419 | uint8_t state[/*7*2*/ 7 + 512][32]; | |
420 | }SubBand; | |
421 | ||
422 | typedef struct Plane{ | |
423 | int width; | |
424 | int height; | |
425 | SubBand band[MAX_DECOMPOSITIONS][4]; | |
426 | }Plane; | |
427 | ||
a0d1931c Y |
428 | /** Used to minimize the amount of memory used in order to optimize cache performance. **/ |
429 | typedef struct { | |
430 | DWTELEM * * line; ///< For use by idwt and predict_slices. | |
431 | DWTELEM * * data_stack; ///< Used for internal purposes. | |
432 | int data_stack_top; | |
433 | int line_count; | |
434 | int line_width; | |
435 | int data_count; | |
436 | DWTELEM * base_buffer; ///< Buffer that this structure is caching. | |
437 | } slice_buffer; | |
438 | ||
791e7b83 MN |
439 | typedef struct SnowContext{ |
440 | // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) | |
441 | ||
442 | AVCodecContext *avctx; | |
28869757 | 443 | RangeCoder c; |
791e7b83 | 444 | DSPContext dsp; |
51d6a3cf MN |
445 | AVFrame new_picture; |
446 | AVFrame input_picture; ///< new_picture with the internal linesizes | |
791e7b83 MN |
447 | AVFrame current_picture; |
448 | AVFrame last_picture; | |
449 | AVFrame mconly_picture; | |
450 | // uint8_t q_context[16]; | |
451 | uint8_t header_state[32]; | |
155ec6ed | 452 | uint8_t block_state[128 + 32*128]; |
791e7b83 | 453 | int keyframe; |
19aa028d | 454 | int always_reset; |
791e7b83 MN |
455 | int version; |
456 | int spatial_decomposition_type; | |
457 | int temporal_decomposition_type; | |
458 | int spatial_decomposition_count; | |
459 | int temporal_decomposition_count; | |
460 | DWTELEM *spatial_dwt_buffer; | |
791e7b83 MN |
461 | int colorspace_type; |
462 | int chroma_h_shift; | |
463 | int chroma_v_shift; | |
464 | int spatial_scalability; | |
465 | int qlog; | |
155ec6ed MN |
466 | int lambda; |
467 | int lambda2; | |
791e7b83 MN |
468 | int mv_scale; |
469 | int qbias; | |
470 | #define QBIAS_SHIFT 3 | |
155ec6ed MN |
471 | int b_width; |
472 | int b_height; | |
473 | int block_max_depth; | |
791e7b83 | 474 | Plane plane[MAX_PLANES]; |
155ec6ed | 475 | BlockNode *block; |
51d6a3cf MN |
476 | #define ME_CACHE_SIZE 1024 |
477 | int me_cache[ME_CACHE_SIZE]; | |
478 | int me_cache_generation; | |
a0d1931c | 479 | slice_buffer sb; |
155ec6ed | 480 | |
791e7b83 MN |
481 | MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) |
482 | }SnowContext; | |
483 | ||
f9e6ebf7 LM |
484 | typedef struct { |
485 | DWTELEM *b0; | |
486 | DWTELEM *b1; | |
487 | DWTELEM *b2; | |
488 | DWTELEM *b3; | |
489 | int y; | |
490 | } dwt_compose_t; | |
491 | ||
a0d1931c Y |
492 | #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
493 | //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) | |
494 | ||
51d6a3cf MN |
495 | static void iterative_me(SnowContext *s); |
496 | ||
a0d1931c Y |
497 | static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer) |
498 | { | |
499 | int i; | |
115329f1 | 500 | |
a0d1931c Y |
501 | buf->base_buffer = base_buffer; |
502 | buf->line_count = line_count; | |
503 | buf->line_width = line_width; | |
504 | buf->data_count = max_allocated_lines; | |
505 | buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count); | |
506 | buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines); | |
115329f1 | 507 | |
a0d1931c Y |
508 | for (i = 0; i < max_allocated_lines; i++) |
509 | { | |
510 | buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width); | |
511 | } | |
115329f1 | 512 | |
a0d1931c Y |
513 | buf->data_stack_top = max_allocated_lines - 1; |
514 | } | |
515 | ||
516 | static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) | |
517 | { | |
a0d1931c Y |
518 | int offset; |
519 | DWTELEM * buffer; | |
115329f1 DB |
520 | |
521 | // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); | |
522 | ||
a0d1931c Y |
523 | assert(buf->data_stack_top >= 0); |
524 | // assert(!buf->line[line]); | |
525 | if (buf->line[line]) | |
526 | return buf->line[line]; | |
115329f1 | 527 | |
a0d1931c Y |
528 | offset = buf->line_width * line; |
529 | buffer = buf->data_stack[buf->data_stack_top]; | |
530 | buf->data_stack_top--; | |
531 | buf->line[line] = buffer; | |
115329f1 | 532 | |
a0d1931c | 533 | // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1); |
115329f1 | 534 | |
a0d1931c Y |
535 | return buffer; |
536 | } | |
537 | ||
538 | static void slice_buffer_release(slice_buffer * buf, int line) | |
539 | { | |
a0d1931c Y |
540 | int offset; |
541 | DWTELEM * buffer; | |
542 | ||
543 | assert(line >= 0 && line < buf->line_count); | |
544 | assert(buf->line[line]); | |
545 | ||
546 | offset = buf->line_width * line; | |
547 | buffer = buf->line[line]; | |
548 | buf->data_stack_top++; | |
549 | buf->data_stack[buf->data_stack_top] = buffer; | |
550 | buf->line[line] = NULL; | |
115329f1 | 551 | |
a0d1931c Y |
552 | // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1); |
553 | } | |
554 | ||
555 | static void slice_buffer_flush(slice_buffer * buf) | |
556 | { | |
557 | int i; | |
558 | for (i = 0; i < buf->line_count; i++) | |
559 | { | |
560 | if (buf->line[i]) | |
561 | { | |
562 | // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i); | |
563 | slice_buffer_release(buf, i); | |
564 | } | |
565 | } | |
566 | } | |
567 | ||
568 | static void slice_buffer_destroy(slice_buffer * buf) | |
569 | { | |
570 | int i; | |
571 | slice_buffer_flush(buf); | |
115329f1 | 572 | |
a0d1931c Y |
573 | for (i = buf->data_count - 1; i >= 0; i--) |
574 | { | |
575 | assert(buf->data_stack[i]); | |
576 | av_free(buf->data_stack[i]); | |
577 | } | |
578 | assert(buf->data_stack); | |
579 | av_free(buf->data_stack); | |
580 | assert(buf->line); | |
581 | av_free(buf->line); | |
582 | } | |
583 | ||
bb270c08 | 584 | #ifdef __sgi |
2554db9b | 585 | // Avoid a name clash on SGI IRIX |
bb270c08 | 586 | #undef qexp |
2554db9b | 587 | #endif |
034aff03 | 588 | #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 |
c97de57c | 589 | static uint8_t qexp[QROOT]; |
791e7b83 MN |
590 | |
591 | static inline int mirror(int v, int m){ | |
13705b69 MN |
592 | while((unsigned)v > (unsigned)m){ |
593 | v=-v; | |
594 | if(v<0) v+= 2*m; | |
595 | } | |
596 | return v; | |
791e7b83 MN |
597 | } |
598 | ||
28869757 | 599 | static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ |
791e7b83 MN |
600 | int i; |
601 | ||
602 | if(v){ | |
603 | const int a= ABS(v); | |
604 | const int e= av_log2(a); | |
605 | #if 1 | |
115329f1 | 606 | const int el= FFMIN(e, 10); |
28869757 | 607 | put_rac(c, state+0, 0); |
791e7b83 MN |
608 | |
609 | for(i=0; i<el; i++){ | |
28869757 | 610 | put_rac(c, state+1+i, 1); //1..10 |
791e7b83 MN |
611 | } |
612 | for(; i<e; i++){ | |
28869757 | 613 | put_rac(c, state+1+9, 1); //1..10 |
791e7b83 | 614 | } |
28869757 | 615 | put_rac(c, state+1+FFMIN(i,9), 0); |
791e7b83 MN |
616 | |
617 | for(i=e-1; i>=el; i--){ | |
28869757 | 618 | put_rac(c, state+22+9, (a>>i)&1); //22..31 |
791e7b83 MN |
619 | } |
620 | for(; i>=0; i--){ | |
28869757 | 621 | put_rac(c, state+22+i, (a>>i)&1); //22..31 |
791e7b83 MN |
622 | } |
623 | ||
624 | if(is_signed) | |
28869757 | 625 | put_rac(c, state+11 + el, v < 0); //11..21 |
791e7b83 | 626 | #else |
115329f1 | 627 | |
28869757 | 628 | put_rac(c, state+0, 0); |
791e7b83 MN |
629 | if(e<=9){ |
630 | for(i=0; i<e; i++){ | |
28869757 | 631 | put_rac(c, state+1+i, 1); //1..10 |
791e7b83 | 632 | } |
28869757 | 633 | put_rac(c, state+1+i, 0); |
791e7b83 MN |
634 | |
635 | for(i=e-1; i>=0; i--){ | |
28869757 | 636 | put_rac(c, state+22+i, (a>>i)&1); //22..31 |
791e7b83 MN |
637 | } |
638 | ||
639 | if(is_signed) | |
28869757 | 640 | put_rac(c, state+11 + e, v < 0); //11..21 |
791e7b83 MN |
641 | }else{ |
642 | for(i=0; i<e; i++){ | |
28869757 | 643 | put_rac(c, state+1+FFMIN(i,9), 1); //1..10 |
791e7b83 | 644 | } |
28869757 | 645 | put_rac(c, state+1+FFMIN(i,9), 0); |
791e7b83 MN |
646 | |
647 | for(i=e-1; i>=0; i--){ | |
28869757 | 648 | put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 |
791e7b83 MN |
649 | } |
650 | ||
651 | if(is_signed) | |
28869757 | 652 | put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 |
791e7b83 MN |
653 | } |
654 | #endif | |
655 | }else{ | |
28869757 | 656 | put_rac(c, state+0, 1); |
791e7b83 MN |
657 | } |
658 | } | |
659 | ||
28869757 MN |
660 | static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ |
661 | if(get_rac(c, state+0)) | |
791e7b83 MN |
662 | return 0; |
663 | else{ | |
7c2425d2 LM |
664 | int i, e, a; |
665 | e= 0; | |
28869757 | 666 | while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 |
7c2425d2 | 667 | e++; |
791e7b83 | 668 | } |
7c2425d2 | 669 | |
791e7b83 | 670 | a= 1; |
7c2425d2 | 671 | for(i=e-1; i>=0; i--){ |
28869757 | 672 | a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 |
791e7b83 MN |
673 | } |
674 | ||
28869757 | 675 | if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 |
791e7b83 MN |
676 | return -a; |
677 | else | |
678 | return a; | |
679 | } | |
680 | } | |
681 | ||
28869757 | 682 | static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ |
4f4e9633 | 683 | int i; |
0635cbfc | 684 | int r= log2>=0 ? 1<<log2 : 1; |
4f4e9633 MN |
685 | |
686 | assert(v>=0); | |
0635cbfc MN |
687 | assert(log2>=-4); |
688 | ||
689 | while(v >= r){ | |
28869757 | 690 | put_rac(c, state+4+log2, 1); |
0635cbfc | 691 | v -= r; |
4f4e9633 | 692 | log2++; |
0635cbfc | 693 | if(log2>0) r+=r; |
4f4e9633 | 694 | } |
28869757 | 695 | put_rac(c, state+4+log2, 0); |
115329f1 | 696 | |
4f4e9633 | 697 | for(i=log2-1; i>=0; i--){ |
28869757 | 698 | put_rac(c, state+31-i, (v>>i)&1); |
4f4e9633 | 699 | } |
4f4e9633 MN |
700 | } |
701 | ||
28869757 | 702 | static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ |
4f4e9633 | 703 | int i; |
0635cbfc | 704 | int r= log2>=0 ? 1<<log2 : 1; |
4f4e9633 MN |
705 | int v=0; |
706 | ||
0635cbfc MN |
707 | assert(log2>=-4); |
708 | ||
28869757 | 709 | while(get_rac(c, state+4+log2)){ |
0635cbfc | 710 | v+= r; |
4f4e9633 | 711 | log2++; |
0635cbfc | 712 | if(log2>0) r+=r; |
4f4e9633 | 713 | } |
115329f1 | 714 | |
4f4e9633 | 715 | for(i=log2-1; i>=0; i--){ |
28869757 | 716 | v+= get_rac(c, state+31-i)<<i; |
4f4e9633 MN |
717 | } |
718 | ||
719 | return v; | |
720 | } | |
721 | ||
791e7b83 MN |
722 | static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ |
723 | const int mirror_left= !highpass; | |
724 | const int mirror_right= (width&1) ^ highpass; | |
725 | const int w= (width>>1) - 1 + (highpass & width); | |
726 | int i; | |
727 | ||
728 | #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) | |
729 | if(mirror_left){ | |
730 | dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); | |
731 | dst += dst_step; | |
732 | src += src_step; | |
733 | } | |
115329f1 | 734 | |
791e7b83 MN |
735 | for(i=0; i<w; i++){ |
736 | dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse); | |
737 | } | |
115329f1 | 738 | |
791e7b83 MN |
739 | if(mirror_right){ |
740 | dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse); | |
741 | } | |
742 | } | |
743 | ||
744 | static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ | |
745 | const int mirror_left= !highpass; | |
746 | const int mirror_right= (width&1) ^ highpass; | |
747 | const int w= (width>>1) - 1 + (highpass & width); | |
748 | int i; | |
749 | ||
750 | if(mirror_left){ | |
751 | int r= 3*2*ref[0]; | |
752 | r += r>>4; | |
753 | r += r>>8; | |
754 | dst[0] = LIFT(src[0], ((r+add)>>shift), inverse); | |
755 | dst += dst_step; | |
756 | src += src_step; | |
757 | } | |
115329f1 | 758 | |
791e7b83 MN |
759 | for(i=0; i<w; i++){ |
760 | int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]); | |
761 | r += r>>4; | |
762 | r += r>>8; | |
763 | dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse); | |
764 | } | |
115329f1 | 765 | |
791e7b83 MN |
766 | if(mirror_right){ |
767 | int r= 3*2*ref[w*ref_step]; | |
768 | r += r>>4; | |
769 | r += r>>8; | |
770 | dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse); | |
771 | } | |
772 | } | |
773 | ||
f5a71928 MN |
774 | static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ |
775 | const int mirror_left= !highpass; | |
776 | const int mirror_right= (width&1) ^ highpass; | |
777 | const int w= (width>>1) - 1 + (highpass & width); | |
778 | int i; | |
779 | ||
780 | assert(shift == 4); | |
781 | #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23)) | |
782 | if(mirror_left){ | |
783 | dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); | |
784 | dst += dst_step; | |
785 | src += src_step; | |
786 | } | |
115329f1 | 787 | |
f5a71928 MN |
788 | for(i=0; i<w; i++){ |
789 | dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse); | |
790 | } | |
115329f1 | 791 | |
f5a71928 MN |
792 | if(mirror_right){ |
793 | dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
794 | } | |
795 | } | |
796 | ||
791e7b83 | 797 | |
aa25a462 | 798 | static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){ |
791e7b83 | 799 | int x, i; |
115329f1 | 800 | |
791e7b83 MN |
801 | for(x=start; x<width; x+=2){ |
802 | int64_t sum=0; | |
803 | ||
804 | for(i=0; i<n; i++){ | |
805 | int x2= x + 2*i - n + 1; | |
806 | if (x2< 0) x2= -x2; | |
807 | else if(x2>=width) x2= 2*width-x2-2; | |
808 | sum += coeffs[i]*(int64_t)dst[x2]; | |
809 | } | |
810 | if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift; | |
811 | else dst[x] += (sum + (1<<shift)/2)>>shift; | |
812 | } | |
813 | } | |
814 | ||
aa25a462 | 815 | static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){ |
791e7b83 MN |
816 | int x, y, i; |
817 | for(y=start; y<height; y+=2){ | |
818 | for(x=0; x<width; x++){ | |
819 | int64_t sum=0; | |
115329f1 | 820 | |
791e7b83 MN |
821 | for(i=0; i<n; i++){ |
822 | int y2= y + 2*i - n + 1; | |
823 | if (y2< 0) y2= -y2; | |
824 | else if(y2>=height) y2= 2*height-y2-2; | |
825 | sum += coeffs[i]*(int64_t)dst[x + y2*stride]; | |
826 | } | |
827 | if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift; | |
828 | else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift; | |
829 | } | |
830 | } | |
831 | } | |
832 | ||
833 | #define SCALEX 1 | |
834 | #define LX0 0 | |
835 | #define LX1 1 | |
836 | ||
de890c9b | 837 | #if 0 // more accurate 9/7 |
791e7b83 MN |
838 | #define N1 2 |
839 | #define SHIFT1 14 | |
840 | #define COEFFS1 (int[]){-25987,-25987} | |
841 | #define N2 2 | |
842 | #define SHIFT2 19 | |
843 | #define COEFFS2 (int[]){-27777,-27777} | |
844 | #define N3 2 | |
845 | #define SHIFT3 15 | |
846 | #define COEFFS3 (int[]){28931,28931} | |
847 | #define N4 2 | |
848 | #define SHIFT4 15 | |
849 | #define COEFFS4 (int[]){14533,14533} | |
850 | #elif 1 // 13/7 CRF | |
851 | #define N1 4 | |
852 | #define SHIFT1 4 | |
853 | #define COEFFS1 (int[]){1,-9,-9,1} | |
de890c9b | 854 | #define N2 4 |
791e7b83 MN |
855 | #define SHIFT2 4 |
856 | #define COEFFS2 (int[]){-1,5,5,-1} | |
857 | #define N3 0 | |
858 | #define SHIFT3 1 | |
859 | #define COEFFS3 NULL | |
860 | #define N4 0 | |
861 | #define SHIFT4 1 | |
862 | #define COEFFS4 NULL | |
863 | #elif 1 // 3/5 | |
864 | #define LX0 1 | |
865 | #define LX1 0 | |
866 | #define SCALEX 0.5 | |
867 | #define N1 2 | |
868 | #define SHIFT1 1 | |
869 | #define COEFFS1 (int[]){1,1} | |
870 | #define N2 2 | |
871 | #define SHIFT2 2 | |
872 | #define COEFFS2 (int[]){-1,-1} | |
873 | #define N3 0 | |
874 | #define SHIFT3 0 | |
875 | #define COEFFS3 NULL | |
876 | #define N4 0 | |
877 | #define SHIFT4 0 | |
878 | #define COEFFS4 NULL | |
115329f1 | 879 | #elif 1 // 11/5 |
791e7b83 MN |
880 | #define N1 0 |
881 | #define SHIFT1 1 | |
882 | #define COEFFS1 NULL | |
883 | #define N2 2 | |
884 | #define SHIFT2 2 | |
885 | #define COEFFS2 (int[]){-1,-1} | |
886 | #define N3 2 | |
887 | #define SHIFT3 0 | |
888 | #define COEFFS3 (int[]){-1,-1} | |
889 | #define N4 4 | |
890 | #define SHIFT4 7 | |
891 | #define COEFFS4 (int[]){-5,29,29,-5} | |
892 | #define SCALEX 4 | |
893 | #elif 1 // 9/7 CDF | |
894 | #define N1 2 | |
895 | #define SHIFT1 7 | |
896 | #define COEFFS1 (int[]){-203,-203} | |
897 | #define N2 2 | |
898 | #define SHIFT2 12 | |
899 | #define COEFFS2 (int[]){-217,-217} | |
900 | #define N3 2 | |
901 | #define SHIFT3 7 | |
902 | #define COEFFS3 (int[]){113,113} | |
903 | #define N4 2 | |
904 | #define SHIFT4 9 | |
905 | #define COEFFS4 (int[]){227,227} | |
906 | #define SCALEX 1 | |
907 | #elif 1 // 7/5 CDF | |
908 | #define N1 0 | |
909 | #define SHIFT1 1 | |
910 | #define COEFFS1 NULL | |
911 | #define N2 2 | |
912 | #define SHIFT2 2 | |
913 | #define COEFFS2 (int[]){-1,-1} | |
914 | #define N3 2 | |
915 | #define SHIFT3 0 | |
916 | #define COEFFS3 (int[]){-1,-1} | |
917 | #define N4 2 | |
918 | #define SHIFT4 4 | |
919 | #define COEFFS4 (int[]){3,3} | |
920 | #elif 1 // 9/7 MN | |
921 | #define N1 4 | |
922 | #define SHIFT1 4 | |
923 | #define COEFFS1 (int[]){1,-9,-9,1} | |
924 | #define N2 2 | |
925 | #define SHIFT2 2 | |
926 | #define COEFFS2 (int[]){1,1} | |
927 | #define N3 0 | |
928 | #define SHIFT3 1 | |
929 | #define COEFFS3 NULL | |
930 | #define N4 0 | |
931 | #define SHIFT4 1 | |
932 | #define COEFFS4 NULL | |
933 | #else // 13/7 CRF | |
934 | #define N1 4 | |
935 | #define SHIFT1 4 | |
936 | #define COEFFS1 (int[]){1,-9,-9,1} | |
937 | #define N2 4 | |
938 | #define SHIFT2 4 | |
939 | #define COEFFS2 (int[]){-1,5,5,-1} | |
940 | #define N3 0 | |
941 | #define SHIFT3 1 | |
942 | #define COEFFS3 NULL | |
943 | #define N4 0 | |
944 | #define SHIFT4 1 | |
945 | #define COEFFS4 NULL | |
946 | #endif | |
aa25a462 RFI |
947 | static void horizontal_decomposeX(DWTELEM *b, int width){ |
948 | DWTELEM temp[width]; | |
791e7b83 MN |
949 | const int width2= width>>1; |
950 | const int w2= (width+1)>>1; | |
62ab0b78 | 951 | int x; |
791e7b83 MN |
952 | |
953 | inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0); | |
954 | inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0); | |
955 | inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0); | |
956 | inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0); | |
115329f1 | 957 | |
791e7b83 MN |
958 | for(x=0; x<width2; x++){ |
959 | temp[x ]= b[2*x ]; | |
960 | temp[x+w2]= b[2*x + 1]; | |
961 | } | |
962 | if(width&1) | |
963 | temp[x ]= b[2*x ]; | |
964 | memcpy(b, temp, width*sizeof(int)); | |
965 | } | |
966 | ||
aa25a462 RFI |
967 | static void horizontal_composeX(DWTELEM *b, int width){ |
968 | DWTELEM temp[width]; | |
791e7b83 | 969 | const int width2= width>>1; |
62ab0b78 | 970 | int x; |
791e7b83 MN |
971 | const int w2= (width+1)>>1; |
972 | ||
973 | memcpy(temp, b, width*sizeof(int)); | |
974 | for(x=0; x<width2; x++){ | |
975 | b[2*x ]= temp[x ]; | |
976 | b[2*x + 1]= temp[x+w2]; | |
977 | } | |
978 | if(width&1) | |
979 | b[2*x ]= temp[x ]; | |
980 | ||
981 | inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1); | |
982 | inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1); | |
983 | inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1); | |
984 | inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1); | |
985 | } | |
986 | ||
aa25a462 | 987 | static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){ |
791e7b83 | 988 | int x, y; |
115329f1 | 989 | |
791e7b83 MN |
990 | for(y=0; y<height; y++){ |
991 | for(x=0; x<width; x++){ | |
992 | buffer[y*stride + x] *= SCALEX; | |
993 | } | |
994 | } | |
995 | ||
996 | for(y=0; y<height; y++){ | |
997 | horizontal_decomposeX(buffer + y*stride, width); | |
998 | } | |
115329f1 | 999 | |
791e7b83 MN |
1000 | inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0); |
1001 | inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0); | |
1002 | inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0); | |
115329f1 | 1003 | inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0); |
791e7b83 MN |
1004 | } |
1005 | ||
aa25a462 | 1006 | static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){ |
791e7b83 | 1007 | int x, y; |
115329f1 | 1008 | |
791e7b83 MN |
1009 | inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1); |
1010 | inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1); | |
1011 | inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1); | |
1012 | inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1); | |
1013 | ||
1014 | for(y=0; y<height; y++){ | |
1015 | horizontal_composeX(buffer + y*stride, width); | |
1016 | } | |
1017 | ||
1018 | for(y=0; y<height; y++){ | |
1019 | for(x=0; x<width; x++){ | |
1020 | buffer[y*stride + x] /= SCALEX; | |
1021 | } | |
1022 | } | |
1023 | } | |
1024 | ||
aa25a462 RFI |
1025 | static void horizontal_decompose53i(DWTELEM *b, int width){ |
1026 | DWTELEM temp[width]; | |
791e7b83 | 1027 | const int width2= width>>1; |
62ab0b78 | 1028 | int x; |
791e7b83 MN |
1029 | const int w2= (width+1)>>1; |
1030 | ||
1031 | for(x=0; x<width2; x++){ | |
1032 | temp[x ]= b[2*x ]; | |
1033 | temp[x+w2]= b[2*x + 1]; | |
1034 | } | |
1035 | if(width&1) | |
1036 | temp[x ]= b[2*x ]; | |
1037 | #if 0 | |
62ab0b78 AJ |
1038 | { |
1039 | int A1,A2,A3,A4; | |
791e7b83 MN |
1040 | A2= temp[1 ]; |
1041 | A4= temp[0 ]; | |
1042 | A1= temp[0+width2]; | |
1043 | A1 -= (A2 + A4)>>1; | |
1044 | A4 += (A1 + 1)>>1; | |
1045 | b[0+width2] = A1; | |
1046 | b[0 ] = A4; | |
1047 | for(x=1; x+1<width2; x+=2){ | |
1048 | A3= temp[x+width2]; | |
1049 | A4= temp[x+1 ]; | |
1050 | A3 -= (A2 + A4)>>1; | |
1051 | A2 += (A1 + A3 + 2)>>2; | |
1052 | b[x+width2] = A3; | |
1053 | b[x ] = A2; | |
1054 | ||
1055 | A1= temp[x+1+width2]; | |
1056 | A2= temp[x+2 ]; | |
1057 | A1 -= (A2 + A4)>>1; | |
1058 | A4 += (A1 + A3 + 2)>>2; | |
1059 | b[x+1+width2] = A1; | |
1060 | b[x+1 ] = A4; | |
1061 | } | |
1062 | A3= temp[width-1]; | |
1063 | A3 -= A2; | |
1064 | A2 += (A1 + A3 + 2)>>2; | |
1065 | b[width -1] = A3; | |
1066 | b[width2-1] = A2; | |
62ab0b78 | 1067 | } |
115329f1 | 1068 | #else |
791e7b83 MN |
1069 | lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); |
1070 | lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); | |
1071 | #endif | |
1072 | } | |
1073 | ||
aa25a462 | 1074 | static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1075 | int i; |
115329f1 | 1076 | |
791e7b83 MN |
1077 | for(i=0; i<width; i++){ |
1078 | b1[i] -= (b0[i] + b2[i])>>1; | |
1079 | } | |
1080 | } | |
1081 | ||
aa25a462 | 1082 | static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1083 | int i; |
115329f1 | 1084 | |
791e7b83 MN |
1085 | for(i=0; i<width; i++){ |
1086 | b1[i] += (b0[i] + b2[i] + 2)>>2; | |
1087 | } | |
1088 | } | |
1089 | ||
aa25a462 | 1090 | static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ |
39c61bbb | 1091 | int y; |
791e7b83 MN |
1092 | DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; |
1093 | DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; | |
115329f1 | 1094 | |
791e7b83 MN |
1095 | for(y=-2; y<height; y+=2){ |
1096 | DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
1097 | DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
1098 | ||
1099 | {START_TIMER | |
13705b69 MN |
1100 | if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); |
1101 | if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); | |
791e7b83 | 1102 | STOP_TIMER("horizontal_decompose53i")} |
115329f1 | 1103 | |
791e7b83 | 1104 | {START_TIMER |
13705b69 MN |
1105 | if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); |
1106 | if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); | |
791e7b83 | 1107 | STOP_TIMER("vertical_decompose53i*")} |
115329f1 | 1108 | |
791e7b83 MN |
1109 | b0=b2; |
1110 | b1=b3; | |
1111 | } | |
1112 | } | |
1113 | ||
f5a71928 | 1114 | #define liftS lift |
791e7b83 MN |
1115 | #define lift5 lift |
1116 | #if 1 | |
1117 | #define W_AM 3 | |
1118 | #define W_AO 0 | |
1119 | #define W_AS 1 | |
1120 | ||
f5a71928 | 1121 | #undef liftS |
791e7b83 MN |
1122 | #define W_BM 1 |
1123 | #define W_BO 8 | |
1124 | #define W_BS 4 | |
1125 | ||
f5a71928 MN |
1126 | #define W_CM 1 |
1127 | #define W_CO 0 | |
1128 | #define W_CS 0 | |
791e7b83 | 1129 | |
f5a71928 MN |
1130 | #define W_DM 3 |
1131 | #define W_DO 4 | |
1132 | #define W_DS 3 | |
791e7b83 MN |
1133 | #elif 0 |
1134 | #define W_AM 55 | |
1135 | #define W_AO 16 | |
1136 | #define W_AS 5 | |
1137 | ||
1138 | #define W_BM 3 | |
1139 | #define W_BO 32 | |
1140 | #define W_BS 6 | |
1141 | ||
1142 | #define W_CM 127 | |
1143 | #define W_CO 64 | |
1144 | #define W_CS 7 | |
1145 | ||
1146 | #define W_DM 7 | |
1147 | #define W_DO 8 | |
1148 | #define W_DS 4 | |
1149 | #elif 0 | |
1150 | #define W_AM 97 | |
1151 | #define W_AO 32 | |
1152 | #define W_AS 6 | |
1153 | ||
1154 | #define W_BM 63 | |
1155 | #define W_BO 512 | |
1156 | #define W_BS 10 | |
1157 | ||
1158 | #define W_CM 13 | |
1159 | #define W_CO 8 | |
1160 | #define W_CS 4 | |
1161 | ||
1162 | #define W_DM 15 | |
1163 | #define W_DO 16 | |
1164 | #define W_DS 5 | |
1165 | ||
1166 | #else | |
1167 | ||
1168 | #define W_AM 203 | |
1169 | #define W_AO 64 | |
1170 | #define W_AS 7 | |
1171 | ||
1172 | #define W_BM 217 | |
1173 | #define W_BO 2048 | |
1174 | #define W_BS 12 | |
1175 | ||
1176 | #define W_CM 113 | |
1177 | #define W_CO 64 | |
1178 | #define W_CS 7 | |
1179 | ||
1180 | #define W_DM 227 | |
1181 | #define W_DO 128 | |
1182 | #define W_DS 9 | |
1183 | #endif | |
aa25a462 RFI |
1184 | static void horizontal_decompose97i(DWTELEM *b, int width){ |
1185 | DWTELEM temp[width]; | |
791e7b83 MN |
1186 | const int w2= (width+1)>>1; |
1187 | ||
1188 | lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0); | |
f5a71928 | 1189 | liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0); |
791e7b83 MN |
1190 | lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); |
1191 | lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); | |
1192 | } | |
1193 | ||
1194 | ||
aa25a462 | 1195 | static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1196 | int i; |
115329f1 | 1197 | |
791e7b83 MN |
1198 | for(i=0; i<width; i++){ |
1199 | b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1200 | } | |
1201 | } | |
1202 | ||
aa25a462 | 1203 | static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1204 | int i; |
115329f1 | 1205 | |
791e7b83 MN |
1206 | for(i=0; i<width; i++){ |
1207 | #ifdef lift5 | |
1208 | b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
1209 | #else | |
1210 | int r= 3*(b0[i] + b2[i]); | |
1211 | r+= r>>4; | |
1212 | r+= r>>8; | |
1213 | b1[i] += (r+W_CO)>>W_CS; | |
1214 | #endif | |
1215 | } | |
1216 | } | |
1217 | ||
aa25a462 | 1218 | static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1219 | int i; |
115329f1 | 1220 | |
791e7b83 | 1221 | for(i=0; i<width; i++){ |
f5a71928 | 1222 | #ifdef liftS |
791e7b83 | 1223 | b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
f5a71928 MN |
1224 | #else |
1225 | b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23); | |
1226 | #endif | |
791e7b83 MN |
1227 | } |
1228 | } | |
1229 | ||
aa25a462 | 1230 | static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1231 | int i; |
115329f1 | 1232 | |
791e7b83 MN |
1233 | for(i=0; i<width; i++){ |
1234 | b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
1235 | } | |
1236 | } | |
1237 | ||
aa25a462 | 1238 | static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ |
39c61bbb | 1239 | int y; |
791e7b83 MN |
1240 | DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; |
1241 | DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; | |
1242 | DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; | |
1243 | DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; | |
115329f1 | 1244 | |
791e7b83 MN |
1245 | for(y=-4; y<height; y+=2){ |
1246 | DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
1247 | DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
1248 | ||
1249 | {START_TIMER | |
13705b69 MN |
1250 | if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); |
1251 | if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); | |
791e7b83 MN |
1252 | if(width>400){ |
1253 | STOP_TIMER("horizontal_decompose97i") | |
1254 | }} | |
115329f1 | 1255 | |
791e7b83 | 1256 | {START_TIMER |
13705b69 MN |
1257 | if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); |
1258 | if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); | |
1259 | if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); | |
1260 | if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); | |
791e7b83 MN |
1261 | |
1262 | if(width>400){ | |
1263 | STOP_TIMER("vertical_decompose97i") | |
1264 | }} | |
115329f1 | 1265 | |
791e7b83 MN |
1266 | b0=b2; |
1267 | b1=b3; | |
1268 | b2=b4; | |
1269 | b3=b5; | |
1270 | } | |
1271 | } | |
1272 | ||
aa25a462 | 1273 | void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
791e7b83 | 1274 | int level; |
115329f1 | 1275 | |
46c281e8 MN |
1276 | for(level=0; level<decomposition_count; level++){ |
1277 | switch(type){ | |
791e7b83 MN |
1278 | case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; |
1279 | case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; | |
1280 | case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break; | |
1281 | } | |
1282 | } | |
1283 | } | |
1284 | ||
aa25a462 RFI |
1285 | static void horizontal_compose53i(DWTELEM *b, int width){ |
1286 | DWTELEM temp[width]; | |
791e7b83 MN |
1287 | const int width2= width>>1; |
1288 | const int w2= (width+1)>>1; | |
62ab0b78 | 1289 | int x; |
791e7b83 MN |
1290 | |
1291 | #if 0 | |
62ab0b78 | 1292 | int A1,A2,A3,A4; |
791e7b83 MN |
1293 | A2= temp[1 ]; |
1294 | A4= temp[0 ]; | |
1295 | A1= temp[0+width2]; | |
1296 | A1 -= (A2 + A4)>>1; | |
1297 | A4 += (A1 + 1)>>1; | |
1298 | b[0+width2] = A1; | |
1299 | b[0 ] = A4; | |
1300 | for(x=1; x+1<width2; x+=2){ | |
1301 | A3= temp[x+width2]; | |
1302 | A4= temp[x+1 ]; | |
1303 | A3 -= (A2 + A4)>>1; | |
1304 | A2 += (A1 + A3 + 2)>>2; | |
1305 | b[x+width2] = A3; | |
1306 | b[x ] = A2; | |
1307 | ||
1308 | A1= temp[x+1+width2]; | |
1309 | A2= temp[x+2 ]; | |
1310 | A1 -= (A2 + A4)>>1; | |
1311 | A4 += (A1 + A3 + 2)>>2; | |
1312 | b[x+1+width2] = A1; | |
1313 | b[x+1 ] = A4; | |
1314 | } | |
1315 | A3= temp[width-1]; | |
1316 | A3 -= A2; | |
1317 | A2 += (A1 + A3 + 2)>>2; | |
1318 | b[width -1] = A3; | |
1319 | b[width2-1] = A2; | |
115329f1 | 1320 | #else |
791e7b83 MN |
1321 | lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); |
1322 | lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); | |
1323 | #endif | |
1324 | for(x=0; x<width2; x++){ | |
1325 | b[2*x ]= temp[x ]; | |
1326 | b[2*x + 1]= temp[x+w2]; | |
1327 | } | |
1328 | if(width&1) | |
1329 | b[2*x ]= temp[x ]; | |
1330 | } | |
1331 | ||
aa25a462 | 1332 | static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1333 | int i; |
115329f1 | 1334 | |
791e7b83 MN |
1335 | for(i=0; i<width; i++){ |
1336 | b1[i] += (b0[i] + b2[i])>>1; | |
1337 | } | |
1338 | } | |
1339 | ||
aa25a462 | 1340 | static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1341 | int i; |
115329f1 | 1342 | |
791e7b83 MN |
1343 | for(i=0; i<width; i++){ |
1344 | b1[i] -= (b0[i] + b2[i] + 2)>>2; | |
1345 | } | |
1346 | } | |
1347 | ||
a0d1931c Y |
1348 | static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
1349 | cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); | |
1350 | cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); | |
1351 | cs->y = -1; | |
1352 | } | |
1353 | ||
f9e6ebf7 LM |
1354 | static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ |
1355 | cs->b0 = buffer + mirror(-1-1, height-1)*stride; | |
1356 | cs->b1 = buffer + mirror(-1 , height-1)*stride; | |
1357 | cs->y = -1; | |
1358 | } | |
1359 | ||
a0d1931c Y |
1360 | static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
1361 | int y= cs->y; | |
115329f1 | 1362 | |
a0d1931c Y |
1363 | DWTELEM *b0= cs->b0; |
1364 | DWTELEM *b1= cs->b1; | |
3b6ab26c MN |
1365 | DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); |
1366 | DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); | |
a0d1931c Y |
1367 | |
1368 | {START_TIMER | |
13705b69 MN |
1369 | if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1370 | if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
a0d1931c Y |
1371 | STOP_TIMER("vertical_compose53i*")} |
1372 | ||
1373 | {START_TIMER | |
13705b69 MN |
1374 | if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1375 | if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
a0d1931c Y |
1376 | STOP_TIMER("horizontal_compose53i")} |
1377 | ||
1378 | cs->b0 = b2; | |
1379 | cs->b1 = b3; | |
1380 | cs->y += 2; | |
1381 | } | |
1382 | ||
f9e6ebf7 LM |
1383 | static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ |
1384 | int y= cs->y; | |
1385 | DWTELEM *b0= cs->b0; | |
1386 | DWTELEM *b1= cs->b1; | |
1387 | DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
1388 | DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
791e7b83 MN |
1389 | |
1390 | {START_TIMER | |
13705b69 MN |
1391 | if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1392 | if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
791e7b83 MN |
1393 | STOP_TIMER("vertical_compose53i*")} |
1394 | ||
1395 | {START_TIMER | |
13705b69 MN |
1396 | if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1397 | if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
791e7b83 MN |
1398 | STOP_TIMER("horizontal_compose53i")} |
1399 | ||
f9e6ebf7 LM |
1400 | cs->b0 = b2; |
1401 | cs->b1 = b3; | |
1402 | cs->y += 2; | |
1403 | } | |
1404 | ||
1405 | static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){ | |
1406 | dwt_compose_t cs; | |
1407 | spatial_compose53i_init(&cs, buffer, height, stride); | |
1408 | while(cs.y <= height) | |
1409 | spatial_compose53i_dy(&cs, buffer, width, height, stride); | |
115329f1 DB |
1410 | } |
1411 | ||
791e7b83 | 1412 | |
aa25a462 RFI |
1413 | static void horizontal_compose97i(DWTELEM *b, int width){ |
1414 | DWTELEM temp[width]; | |
791e7b83 MN |
1415 | const int w2= (width+1)>>1; |
1416 | ||
1417 | lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); | |
1418 | lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); | |
f5a71928 | 1419 | liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1); |
791e7b83 MN |
1420 | lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1); |
1421 | } | |
1422 | ||
aa25a462 | 1423 | static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1424 | int i; |
115329f1 | 1425 | |
791e7b83 MN |
1426 | for(i=0; i<width; i++){ |
1427 | b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1428 | } | |
1429 | } | |
1430 | ||
aa25a462 | 1431 | static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1432 | int i; |
115329f1 | 1433 | |
791e7b83 MN |
1434 | for(i=0; i<width; i++){ |
1435 | #ifdef lift5 | |
1436 | b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
1437 | #else | |
1438 | int r= 3*(b0[i] + b2[i]); | |
1439 | r+= r>>4; | |
1440 | r+= r>>8; | |
1441 | b1[i] -= (r+W_CO)>>W_CS; | |
1442 | #endif | |
1443 | } | |
1444 | } | |
1445 | ||
aa25a462 | 1446 | static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1447 | int i; |
115329f1 | 1448 | |
791e7b83 | 1449 | for(i=0; i<width; i++){ |
f5a71928 | 1450 | #ifdef liftS |
791e7b83 | 1451 | b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
f5a71928 MN |
1452 | #else |
1453 | b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; | |
1454 | #endif | |
791e7b83 MN |
1455 | } |
1456 | } | |
1457 | ||
aa25a462 | 1458 | static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 | 1459 | int i; |
115329f1 | 1460 | |
791e7b83 MN |
1461 | for(i=0; i<width; i++){ |
1462 | b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
1463 | } | |
1464 | } | |
1465 | ||
565a45ac MN |
1466 | static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){ |
1467 | int i; | |
115329f1 | 1468 | |
565a45ac | 1469 | for(i=0; i<width; i++){ |
62ab0b78 | 1470 | #ifndef lift5 |
565a45ac | 1471 | int r; |
62ab0b78 | 1472 | #endif |
565a45ac MN |
1473 | b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; |
1474 | #ifdef lift5 | |
1475 | b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; | |
1476 | #else | |
1477 | r= 3*(b2[i] + b4[i]); | |
1478 | r+= r>>4; | |
1479 | r+= r>>8; | |
1480 | b3[i] -= (r+W_CO)>>W_CS; | |
1481 | #endif | |
f5a71928 | 1482 | #ifdef liftS |
565a45ac | 1483 | b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; |
f5a71928 MN |
1484 | #else |
1485 | b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; | |
1486 | #endif | |
565a45ac MN |
1487 | b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; |
1488 | } | |
1489 | } | |
1490 | ||
a0d1931c Y |
1491 | static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
1492 | cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); | |
1493 | cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); | |
1494 | cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); | |
1495 | cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); | |
1496 | cs->y = -3; | |
1497 | } | |
1498 | ||
f9e6ebf7 LM |
1499 | static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ |
1500 | cs->b0 = buffer + mirror(-3-1, height-1)*stride; | |
1501 | cs->b1 = buffer + mirror(-3 , height-1)*stride; | |
1502 | cs->b2 = buffer + mirror(-3+1, height-1)*stride; | |
1503 | cs->b3 = buffer + mirror(-3+2, height-1)*stride; | |
1504 | cs->y = -3; | |
1505 | } | |
791e7b83 | 1506 | |
a0d1931c Y |
1507 | static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
1508 | int y = cs->y; | |
115329f1 | 1509 | |
a0d1931c Y |
1510 | DWTELEM *b0= cs->b0; |
1511 | DWTELEM *b1= cs->b1; | |
1512 | DWTELEM *b2= cs->b2; | |
1513 | DWTELEM *b3= cs->b3; | |
3b6ab26c MN |
1514 | DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); |
1515 | DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); | |
115329f1 | 1516 | |
a0d1931c | 1517 | {START_TIMER |
565a45ac MN |
1518 | if(y>0 && y+4<height){ |
1519 | vertical_compose97i(b0, b1, b2, b3, b4, b5, width); | |
1520 | }else{ | |
13705b69 MN |
1521 | if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1522 | if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1523 | if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1524 | if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
565a45ac | 1525 | } |
a0d1931c Y |
1526 | if(width>400){ |
1527 | STOP_TIMER("vertical_compose97i")}} | |
a0d1931c Y |
1528 | |
1529 | {START_TIMER | |
13705b69 MN |
1530 | if(y-1<(unsigned)height) horizontal_compose97i(b0, width); |
1531 | if(y+0<(unsigned)height) horizontal_compose97i(b1, width); | |
3b6ab26c | 1532 | if(width>400 && y+0<(unsigned)height){ |
a0d1931c Y |
1533 | STOP_TIMER("horizontal_compose97i")}} |
1534 | ||
1535 | cs->b0=b2; | |
1536 | cs->b1=b3; | |
1537 | cs->b2=b4; | |
1538 | cs->b3=b5; | |
1539 | cs->y += 2; | |
1540 | } | |
1541 | ||
f9e6ebf7 LM |
1542 | static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ |
1543 | int y = cs->y; | |
1544 | DWTELEM *b0= cs->b0; | |
1545 | DWTELEM *b1= cs->b1; | |
1546 | DWTELEM *b2= cs->b2; | |
1547 | DWTELEM *b3= cs->b3; | |
1548 | DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
1549 | DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
791e7b83 | 1550 | |
791e7b83 | 1551 | {START_TIMER |
13705b69 MN |
1552 | if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1553 | if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1554 | if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1555 | if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
791e7b83 MN |
1556 | if(width>400){ |
1557 | STOP_TIMER("vertical_compose97i")}} | |
1558 | ||
1559 | {START_TIMER | |
13705b69 MN |
1560 | if(y-1<(unsigned)height) horizontal_compose97i(b0, width); |
1561 | if(y+0<(unsigned)height) horizontal_compose97i(b1, width); | |
791e7b83 MN |
1562 | if(width>400 && b0 <= b2){ |
1563 | STOP_TIMER("horizontal_compose97i")}} | |
f9e6ebf7 LM |
1564 | |
1565 | cs->b0=b2; | |
1566 | cs->b1=b3; | |
1567 | cs->b2=b4; | |
1568 | cs->b3=b5; | |
1569 | cs->y += 2; | |
1570 | } | |
1571 | ||
1572 | static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){ | |
1573 | dwt_compose_t cs; | |
1574 | spatial_compose97i_init(&cs, buffer, height, stride); | |
1575 | while(cs.y <= height) | |
1576 | spatial_compose97i_dy(&cs, buffer, width, height, stride); | |
1577 | } | |
1578 | ||
a0d1931c Y |
1579 | void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ |
1580 | int level; | |
1581 | for(level=decomposition_count-1; level>=0; level--){ | |
1582 | switch(type){ | |
1583 | case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
1584 | case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
1585 | /* not slicified yet */ | |
1586 | case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/ | |
1587 | av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break; | |
1588 | } | |
1589 | } | |
1590 | } | |
1591 | ||
f9e6ebf7 LM |
1592 | void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
1593 | int level; | |
1594 | for(level=decomposition_count-1; level>=0; level--){ | |
1595 | switch(type){ | |
1596 | case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; | |
1597 | case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
1598 | /* not slicified yet */ | |
1599 | case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break; | |
1600 | } | |
791e7b83 MN |
1601 | } |
1602 | } | |
1603 | ||
f9e6ebf7 LM |
1604 | void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ |
1605 | const int support = type==1 ? 3 : 5; | |
791e7b83 | 1606 | int level; |
f9e6ebf7 | 1607 | if(type==2) return; |
791e7b83 | 1608 | |
46c281e8 | 1609 | for(level=decomposition_count-1; level>=0; level--){ |
f9e6ebf7 LM |
1610 | while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ |
1611 | switch(type){ | |
1612 | case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | |
1613 | break; | |
1614 | case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | |
1615 | break; | |
1616 | case 2: break; | |
1617 | } | |
791e7b83 MN |
1618 | } |
1619 | } | |
1620 | } | |
1621 | ||
a0d1931c Y |
1622 | void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ |
1623 | const int support = type==1 ? 3 : 5; | |
1624 | int level; | |
1625 | if(type==2) return; | |
1626 | ||
1627 | for(level=decomposition_count-1; level>=0; level--){ | |
1628 | while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1629 | switch(type){ | |
1630 | case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); | |
1631 | break; | |
1632 | case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); | |
1633 | break; | |
1634 | case 2: break; | |
1635 | } | |
1636 | } | |
1637 | } | |
1638 | } | |
1639 | ||
f9e6ebf7 LM |
1640 | void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
1641 | if(type==2){ | |
1642 | int level; | |
1643 | for(level=decomposition_count-1; level>=0; level--) | |
1644 | spatial_composeX (buffer, width>>level, height>>level, stride<<level); | |
1645 | }else{ | |
1646 | dwt_compose_t cs[MAX_DECOMPOSITIONS]; | |
1647 | int y; | |
1648 | ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
1649 | for(y=0; y<height; y+=4) | |
1650 | ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
1651 | } | |
1652 | } | |
1653 | ||
0ecca7a4 | 1654 | static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ |
4f4e9633 MN |
1655 | const int w= b->width; |
1656 | const int h= b->height; | |
1657 | int x, y; | |
1658 | ||
791e7b83 | 1659 | if(1){ |
791e7b83 | 1660 | int run=0; |
a8d73e56 | 1661 | int runs[w*h]; |
791e7b83 | 1662 | int run_index=0; |
b44985ba | 1663 | int max_index; |
115329f1 | 1664 | |
791e7b83 MN |
1665 | for(y=0; y<h; y++){ |
1666 | for(x=0; x<w; x++){ | |
78486403 | 1667 | int v, p=0; |
6b2f6646 | 1668 | int /*ll=0, */l=0, lt=0, t=0, rt=0; |
a8d73e56 | 1669 | v= src[x + y*stride]; |
791e7b83 MN |
1670 | |
1671 | if(y){ | |
a8d73e56 | 1672 | t= src[x + (y-1)*stride]; |
791e7b83 | 1673 | if(x){ |
a8d73e56 | 1674 | lt= src[x - 1 + (y-1)*stride]; |
791e7b83 MN |
1675 | } |
1676 | if(x + 1 < w){ | |
a8d73e56 | 1677 | rt= src[x + 1 + (y-1)*stride]; |
791e7b83 MN |
1678 | } |
1679 | } | |
1680 | if(x){ | |
a8d73e56 | 1681 | l= src[x - 1 + y*stride]; |
6b2f6646 MN |
1682 | /*if(x > 1){ |
1683 | if(orientation==1) ll= src[y + (x-2)*stride]; | |
1684 | else ll= src[x - 2 + y*stride]; | |
791e7b83 MN |
1685 | }*/ |
1686 | } | |
78486403 | 1687 | if(parent){ |
a8d73e56 MN |
1688 | int px= x>>1; |
1689 | int py= y>>1; | |
115329f1 | 1690 | if(px<b->parent->width && py<b->parent->height) |
78486403 MN |
1691 | p= parent[px + py*2*stride]; |
1692 | } | |
1693 | if(!(/*ll|*/l|lt|t|rt|p)){ | |
791e7b83 MN |
1694 | if(v){ |
1695 | runs[run_index++]= run; | |
1696 | run=0; | |
1697 | }else{ | |
1698 | run++; | |
1699 | } | |
1700 | } | |
1701 | } | |
1702 | } | |
b44985ba | 1703 | max_index= run_index; |
791e7b83 MN |
1704 | runs[run_index++]= run; |
1705 | run_index=0; | |
1706 | run= runs[run_index++]; | |
1707 | ||
b44985ba MN |
1708 | put_symbol2(&s->c, b->state[30], max_index, 0); |
1709 | if(run_index <= max_index) | |
1710 | put_symbol2(&s->c, b->state[1], run, 3); | |
115329f1 | 1711 | |
791e7b83 | 1712 | for(y=0; y<h; y++){ |
d06c75a8 | 1713 | if(s->c.bytestream_end - s->c.bytestream < w*40){ |
0ecca7a4 MN |
1714 | av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
1715 | return -1; | |
1716 | } | |
791e7b83 | 1717 | for(x=0; x<w; x++){ |
78486403 | 1718 | int v, p=0; |
6b2f6646 | 1719 | int /*ll=0, */l=0, lt=0, t=0, rt=0; |
a8d73e56 | 1720 | v= src[x + y*stride]; |
791e7b83 MN |
1721 | |
1722 | if(y){ | |
a8d73e56 | 1723 | t= src[x + (y-1)*stride]; |
791e7b83 | 1724 | if(x){ |
a8d73e56 | 1725 | lt= src[x - 1 + (y-1)*stride]; |
791e7b83 MN |
1726 | } |
1727 | if(x + 1 < w){ | |
a8d73e56 | 1728 | rt= src[x + 1 + (y-1)*stride]; |
791e7b83 MN |
1729 | } |
1730 | } | |
1731 | if(x){ | |
a8d73e56 | 1732 | l= src[x - 1 + y*stride]; |
6b2f6646 MN |
1733 | /*if(x > 1){ |
1734 | if(orientation==1) ll= src[y + (x-2)*stride]; | |
1735 | else ll= src[x - 2 + y*stride]; | |
791e7b83 MN |
1736 | }*/ |
1737 | } | |
78486403 | 1738 | if(parent){ |
a8d73e56 MN |
1739 | int px= x>>1; |
1740 | int py= y>>1; | |
115329f1 | 1741 | if(px<b->parent->width && py<b->parent->height) |
78486403 MN |
1742 | p= parent[px + py*2*stride]; |
1743 | } | |
1744 | if(/*ll|*/l|lt|t|rt|p){ | |
1745 | int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); | |
6b2f6646 | 1746 | |
28869757 | 1747 | put_rac(&s->c, &b->state[0][context], !!v); |
791e7b83 MN |
1748 | }else{ |
1749 | if(!run){ | |
1750 | run= runs[run_index++]; | |
4f4e9633 | 1751 | |
b44985ba MN |
1752 | if(run_index <= max_index) |
1753 | put_symbol2(&s->c, b->state[1], run, 3); | |
791e7b83 MN |
1754 | assert(v); |
1755 | }else{ | |
1756 | run--; | |
1757 | assert(!v); | |
1758 | } | |
1759 | } | |
1760 | if(v){ | |
78486403 | 1761 | int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); |
538a3841 MN |
1762 | int l2= 2*ABS(l) + (l<0); |
1763 | int t2= 2*ABS(t) + (t<0); | |
6b2f6646 | 1764 | |
0635cbfc | 1765 | put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4); |
538a3841 | 1766 | put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); |
791e7b83 MN |
1767 | } |
1768 | } | |
1769 | } | |
791e7b83 | 1770 | } |
0ecca7a4 | 1771 | return 0; |
791e7b83 MN |
1772 | } |
1773 | ||
115329f1 | 1774 | static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ |
4f4e9633 MN |
1775 | // encode_subband_qtree(s, b, src, parent, stride, orientation); |
1776 | // encode_subband_z0run(s, b, src, parent, stride, orientation); | |
0ecca7a4 | 1777 | return encode_subband_c0run(s, b, src, parent, stride, orientation); |
4f4e9633 MN |
1778 | // encode_subband_dzr(s, b, src, parent, stride, orientation); |
1779 | } | |
1780 | ||
a0d1931c | 1781 | static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
791e7b83 MN |
1782 | const int w= b->width; |
1783 | const int h= b->height; | |
1784 | int x,y; | |
115329f1 | 1785 | |
791e7b83 | 1786 | if(1){ |
b44985ba | 1787 | int run, runs; |
cbb1d2b1 MN |
1788 | x_and_coeff *xc= b->x_coeff; |
1789 | x_and_coeff *prev_xc= NULL; | |
1790 | x_and_coeff *prev2_xc= xc; | |
1791 | x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; | |
1792 | x_and_coeff *prev_parent_xc= parent_xc; | |
791e7b83 | 1793 | |
b44985ba MN |
1794 | runs= get_symbol2(&s->c, b->state[30], 0); |
1795 | if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); | |
1796 | else run= INT_MAX; | |
1797 | ||
791e7b83 | 1798 | for(y=0; y<h; y++){ |
0cea8a03 MN |
1799 | int v=0; |
1800 | int lt=0, t=0, rt=0; | |
1801 | ||
cbb1d2b1 MN |
1802 | if(y && prev_xc->x == 0){ |
1803 | rt= prev_xc->coeff; | |
0cea8a03 | 1804 | } |
791e7b83 | 1805 | for(x=0; x<w; x++){ |
0cea8a03 MN |
1806 | int p=0; |
1807 | const int l= v; | |
115329f1 | 1808 | |
0cea8a03 | 1809 | lt= t; t= rt; |
791e7b83 | 1810 | |
ff765159 | 1811 | if(y){ |
cbb1d2b1 MN |
1812 | if(prev_xc->x <= x) |
1813 | prev_xc++; | |
1814 | if(prev_xc->x == x + 1) | |
1815 | rt= prev_xc->coeff; | |
ff765159 MN |
1816 | else |
1817 | rt=0; | |
1818 | } | |
cbb1d2b1 MN |
1819 | if(parent_xc){ |
1820 | if(x>>1 > parent_xc->x){ | |
1821 | parent_xc++; | |
7b49c309 | 1822 | } |
cbb1d2b1 MN |
1823 | if(x>>1 == parent_xc->x){ |
1824 | p= parent_xc->coeff; | |
ff765159 | 1825 | } |
78486403 MN |
1826 | } |
1827 | if(/*ll|*/l|lt|t|rt|p){ | |
538a3841 | 1828 | int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); |
6b2f6646 | 1829 | |
28869757 | 1830 | v=get_rac(&s->c, &b->state[0][context]); |
3c096ac7 MN |
1831 | if(v){ |
1832 | v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); | |
1833 | v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); | |
115329f1 | 1834 | |
cbb1d2b1 MN |
1835 | xc->x=x; |
1836 | (xc++)->coeff= v; | |
3c096ac7 | 1837 | } |
791e7b83 MN |
1838 | }else{ |
1839 | if(!run){ | |
b44985ba MN |
1840 | if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
1841 | else run= INT_MAX; | |
3c096ac7 MN |
1842 | v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); |
1843 | v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); | |
115329f1 | 1844 | |
cbb1d2b1 MN |
1845 | xc->x=x; |
1846 | (xc++)->coeff= v; | |
791e7b83 | 1847 | }else{ |
99cd59e5 | 1848 | int max_run; |
791e7b83 MN |
1849 | run--; |
1850 | v=0; | |
3c1adccd | 1851 | |
cbb1d2b1 | 1852 | if(y) max_run= FFMIN(run, prev_xc->x - x - 2); |
99cd59e5 | 1853 | else max_run= FFMIN(run, w-x-1); |
cbb1d2b1 MN |
1854 | if(parent_xc) |
1855 | max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); | |
99cd59e5 MN |
1856 | x+= max_run; |
1857 | run-= max_run; | |
791e7b83 MN |
1858 | } |
1859 | } | |
7b49c309 | 1860 | } |
cbb1d2b1 MN |
1861 | (xc++)->x= w+1; //end marker |
1862 | prev_xc= prev2_xc; | |
1863 | prev2_xc= xc; | |
115329f1 | 1864 | |
cbb1d2b1 | 1865 | if(parent_xc){ |
7b49c309 | 1866 | if(y&1){ |
cbb1d2b1 MN |
1867 | while(parent_xc->x != parent->width+1) |
1868 | parent_xc++; | |
1869 | parent_xc++; | |
1870 | prev_parent_xc= parent_xc; | |
7b49c309 | 1871 | }else{ |
cbb1d2b1 | 1872 | parent_xc= prev_parent_xc; |
791e7b83 MN |
1873 | } |
1874 | } | |
1875 | } | |
a0d1931c | 1876 | |
cbb1d2b1 | 1877 | (xc++)->x= w+1; //end marker |
a0d1931c Y |
1878 | } |
1879 | } | |
1880 | ||
1881 | static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ | |
1882 | const int w= b->width; | |
62ab0b78 | 1883 | int y; |
c97de57c MN |
1884 | const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); |
1885 | int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | |
a0d1931c Y |
1886 | int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
1887 | int new_index = 0; | |
115329f1 | 1888 | |
a0d1931c Y |
1889 | START_TIMER |
1890 | ||
1891 | if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){ | |
1892 | qadd= 0; | |
1893 | qmul= 1<<QEXPSHIFT; | |
1894 | } | |
1895 | ||
1896 | /* If we are on the second or later slice, restore our index. */ | |
1897 | if (start_y != 0) | |
1898 | new_index = save_state[0]; | |
1899 | ||
115329f1 | 1900 | |
a0d1931c Y |
1901 | for(y=start_y; y<h; y++){ |
1902 | int x = 0; | |
1903 | int v; | |
1904 | DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; | |
1905 | memset(line, 0, b->width*sizeof(DWTELEM)); | |
1906 | v = b->x_coeff[new_index].coeff; | |
1907 | x = b->x_coeff[new_index++].x; | |
1908 | while(x < w) | |
1909 | { | |
538a3841 MN |
1910 | register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; |
1911 | register int u= -(v&1); | |
1912 | line[x] = (t^u) - u; | |
1913 | ||
a0d1931c Y |
1914 | v = b->x_coeff[new_index].coeff; |
1915 | x = b->x_coeff[new_index++].x; | |
1916 | } | |
791e7b83 | 1917 | } |
a0d1931c Y |
1918 | if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){ |
1919 | STOP_TIMER("decode_subband") | |
1920 | } | |
115329f1 | 1921 | |
a0d1931c Y |
1922 | /* Save our variables for the next slice. */ |
1923 | save_state[0] = new_index; | |
115329f1 | 1924 | |
a0d1931c | 1925 | return; |
791e7b83 MN |
1926 | } |
1927 | ||
1928 | static void reset_contexts(SnowContext *s){ | |
1929 | int plane_index, level, orientation; | |
1930 | ||
19aa028d | 1931 | for(plane_index=0; plane_index<3; plane_index++){ |
791e7b83 MN |
1932 | for(level=0; level<s->spatial_decomposition_count; level++){ |
1933 | for(orientation=level ? 1:0; orientation<4; orientation++){ | |
28869757 | 1934 | memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); |
791e7b83 MN |
1935 | } |
1936 | } | |
1937 | } | |
28869757 MN |
1938 | memset(s->header_state, MID_STATE, sizeof(s->header_state)); |
1939 | memset(s->block_state, MID_STATE, sizeof(s->block_state)); | |
155ec6ed MN |
1940 | } |
1941 | ||
1942 | static int alloc_blocks(SnowContext *s){ | |
1943 | int w= -((-s->avctx->width )>>LOG2_MB_SIZE); | |
1944 | int h= -((-s->avctx->height)>>LOG2_MB_SIZE); | |
115329f1 | 1945 | |
155ec6ed MN |
1946 | s->b_width = w; |
1947 | s->b_height= h; | |
115329f1 | 1948 | |
155ec6ed MN |
1949 | s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); |
1950 | return 0; | |
1951 | } | |
1952 | ||
28869757 MN |
1953 | static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ |
1954 | uint8_t *bytestream= d->bytestream; | |
1955 | uint8_t *bytestream_start= d->bytestream_start; | |
155ec6ed | 1956 | *d= *s; |
28869757 MN |
1957 | d->bytestream= bytestream; |
1958 | d->bytestream_start= bytestream_start; | |
155ec6ed MN |
1959 | } |
1960 | ||
1961 | //near copy & paste from dsputil, FIXME | |
1962 | static int pix_sum(uint8_t * pix, int line_size, int w) | |
1963 | { | |
1964 | int s, i, j; | |
1965 | ||
1966 | s = 0; | |
1967 | for (i = 0; i < w; i++) { | |
1968 | for (j = 0; j < w; j++) { | |
1969 | s += pix[0]; | |
1970 | pix ++; | |
1971 | } | |
1972 | pix += line_size - w; | |
1973 | } | |
1974 | return s; | |
1975 | } | |
1976 | ||
1977 | //near copy & paste from dsputil, FIXME | |
1978 | static int pix_norm1(uint8_t * pix, int line_size, int w) | |
1979 | { | |
1980 | int s, i, j; | |
1981 | uint32_t *sq = squareTbl + 256; | |
1982 | ||
1983 | s = 0; | |
1984 | for (i = 0; i < w; i++) { | |
1985 | for (j = 0; j < w; j ++) { | |
1986 | s += sq[pix[0]]; | |
1987 | pix ++; | |
1988 | } | |
1989 | pix += line_size - w; | |
1990 | } | |
1991 | return s; | |
1992 | } | |
1993 | ||
1994 | static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){ | |
1995 | const int w= s->b_width << s->block_max_depth; | |
1996 | const int rem_depth= s->block_max_depth - level; | |
1997 | const int index= (x + y*w) << rem_depth; | |
1998 | const int block_w= 1<<rem_depth; | |
1999 | BlockNode block; | |
2000 | int i,j; | |
115329f1 | 2001 | |
155ec6ed MN |
2002 | block.color[0]= l; |
2003 | block.color[1]= cb; | |
2004 | block.color[2]= cr; | |
2005 | block.mx= mx; | |
2006 | block.my= my; | |
2007 | block.type= type; | |
2008 | block.level= level; | |
2009 | ||
2010 | for(j=0; j<block_w; j++){ | |
2011 | for(i=0; i<block_w; i++){ | |
2012 | s->block[index + i + j*w]= block; | |
2013 | } | |
2014 | } | |
2015 | } | |
2016 | ||
2017 | static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ | |
2018 | const int offset[3]= { | |
2019 | y*c-> stride + x, | |
2020 | ((y*c->uvstride + x)>>1), | |
2021 | ((y*c->uvstride + x)>>1), | |
2022 | }; | |
2023 | int i; | |
2024 | for(i=0; i<3; i++){ | |
2025 | c->src[0][i]= src [i]; | |
2026 | c->ref[0][i]= ref [i] + offset[i]; | |
2027 | } | |
2028 | assert(!ref_index); | |
2029 | } | |
2030 | ||
2031 | //FIXME copy&paste | |
2032 | #define P_LEFT P[1] | |
2033 | #define P_TOP P[2] | |
2034 | #define P_TOPRIGHT P[3] | |
2035 | #define P_MEDIAN P[4] | |
2036 | #define P_MV1 P[9] | |
2037 | #define FLAG_QPEL 1 //must be 1 | |
2038 | ||
2039 | static int encode_q_branch(SnowContext *s, int level, int x, int y){ | |
2040 | uint8_t p_buffer[1024]; | |
2041 | uint8_t i_buffer[1024]; | |
2042 | uint8_t p_state[sizeof(s->block_state)]; | |
2043 | uint8_t i_state[sizeof(s->block_state)]; | |
28869757 MN |
2044 | RangeCoder pc, ic; |
2045 | uint8_t *pbbak= s->c.bytestream; | |
2046 | uint8_t *pbbak_start= s->c.bytestream_start; | |
155ec6ed MN |
2047 | int score, score2, iscore, i_len, p_len, block_s, sum; |
2048 | const int w= s->b_width << s->block_max_depth; | |
2049 | const int h= s->b_height << s->block_max_depth; | |
2050 | const int rem_depth= s->block_max_depth - level; | |
2051 | const int index= (x + y*w) << rem_depth; | |
2052 | const int block_w= 1<<(LOG2_MB_SIZE - level); | |
155ec6ed MN |
2053 | int trx= (x+1)<<rem_depth; |
2054 | int try= (y+1)<<rem_depth; | |
2055 | BlockNode *left = x ? &s->block[index-1] : &null_block; | |
2056 | BlockNode *top = y ? &s->block[index-w] : &null_block; | |
2057 | BlockNode *right = trx<w ? &s->block[index+1] : &null_block; | |
2058 | BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; | |
2059 | BlockNode *tl = y && x ? &s->block[index-w-1] : left; | |
2060 | BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt | |
2061 | int pl = left->color[0]; | |
2062 | int pcb= left->color[1]; | |
2063 | int pcr= left->color[2]; | |
2064 | int pmx= mid_pred(left->mx, top->mx, tr->mx); | |
2065 | int pmy= mid_pred(left->my, top->my, tr->my); | |
2066 | int mx=0, my=0; | |
51d6a3cf | 2067 | int l,cr,cb; |
155ec6ed MN |
2068 | const int stride= s->current_picture.linesize[0]; |
2069 | const int uvstride= s->current_picture.linesize[1]; | |
51d6a3cf MN |
2070 | uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, |
2071 | s->input_picture.data[1] + (x + y*uvstride)*block_w/2, | |
2072 | s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; | |
155ec6ed MN |
2073 | int P[10][2]; |
2074 | int16_t last_mv[3][2]; | |
2075 | int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused | |
2076 | const int shift= 1+qpel; | |
2077 | MotionEstContext *c= &s->m.me; | |
2078 | int mx_context= av_log2(2*ABS(left->mx - top->mx)); | |
2079 | int my_context= av_log2(2*ABS(left->my - top->my)); | |
2080 | int s_context= 2*left->level + 2*top->level + tl->level + tr->level; | |
2081 | ||
2082 | assert(sizeof(s->block_state) >= 256); | |
2083 | if(s->keyframe){ | |
2084 | set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); | |
2085 | return 0; | |
2086 | } | |
2087 | ||
155ec6ed MN |
2088 | // clip predictors / edge ? |
2089 | ||
2090 | P_LEFT[0]= left->mx; | |
2091 | P_LEFT[1]= left->my; | |
2092 | P_TOP [0]= top->mx; | |
2093 | P_TOP [1]= top->my; | |
2094 | P_TOPRIGHT[0]= tr->mx; | |
2095 | P_TOPRIGHT[1]= tr->my; | |
115329f1 | 2096 | |
155ec6ed MN |
2097 | last_mv[0][0]= s->block[index].mx; |
2098 | last_mv[0][1]= s->block[index].my; | |
2099 | last_mv[1][0]= right->mx; | |
2100 | last_mv[1][1]= right->my; | |
2101 | last_mv[2][0]= bottom->mx; | |
2102 | last_mv[2][1]= bottom->my; | |
115329f1 | 2103 | |
155ec6ed | 2104 | s->m.mb_stride=2; |
115329f1 | 2105 | s->m.mb_x= |
155ec6ed MN |
2106 | s->m.mb_y= 0; |
2107 | s->m.me.skip= 0; | |
2108 | ||
2109 | init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0); | |
115329f1 | 2110 | |
155ec6ed MN |
2111 | assert(s->m.me. stride == stride); |
2112 | assert(s->m.me.uvstride == uvstride); | |
115329f1 | 2113 | |
155ec6ed MN |
2114 | c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); |
2115 | c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); | |
2116 | c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); | |
2117 | c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; | |
115329f1 | 2118 | |
ff158dc9 MN |
2119 | c->xmin = - x*block_w - 16+2; |
2120 | c->ymin = - y*block_w - 16+2; | |
2121 | c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
2122 | c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
155ec6ed MN |
2123 | |
2124 | if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); | |
115329f1 | 2125 | if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); |
155ec6ed MN |
2126 | if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); |
2127 | if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); | |
2128 | if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); | |
2129 | if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip | |
2130 | if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); | |
2131 | ||
2132 | P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); | |
2133 | P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); | |
2134 | ||
2135 | if (!y) { | |
2136 | c->pred_x= P_LEFT[0]; | |
2137 | c->pred_y= P_LEFT[1]; | |
2138 | } else { | |
2139 | c->pred_x = P_MEDIAN[0]; | |
2140 | c->pred_y = P_MEDIAN[1]; | |
2141 | } | |
2142 | ||
115329f1 | 2143 | score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, |
155ec6ed MN |
2144 | (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); |
2145 | ||
2146 | assert(mx >= c->xmin); | |
2147 | assert(mx <= c->xmax); | |
2148 | assert(my >= c->ymin); | |
2149 | assert(my <= c->ymax); | |
115329f1 | 2150 | |
155ec6ed MN |
2151 | score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w); |
2152 | score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); | |
2153 | //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2 | |
115329f1 | 2154 | |
155ec6ed MN |
2155 | // subpel search |
2156 | pc= s->c; | |
28869757 MN |
2157 | pc.bytestream_start= |
2158 | pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo | |
155ec6ed MN |
2159 | memcpy(p_state, s->block_state, sizeof(s->block_state)); |
2160 | ||
2161 | if(level!=s->block_max_depth) | |
28869757 MN |
2162 | put_rac(&pc, &p_state[4 + s_context], 1); |
2163 | put_rac(&pc, &p_state[1 + left->type + top->type], 0); | |
155ec6ed MN |
2164 | put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1); |
2165 | put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1); | |
28869757 MN |
2166 | p_len= pc.bytestream - pc.bytestream_start; |
2167 | score += (s->lambda2*(p_len*8 | |
2168 | + (pc.outstanding_count - s->c.outstanding_count)*8 | |
2169 | + (-av_log2(pc.range) + av_log2(s->c.range)) | |
2170 | ))>>FF_LAMBDA_SHIFT; | |
155ec6ed MN |
2171 | |
2172 | block_s= block_w*block_w; | |
51d6a3cf | 2173 | sum = pix_sum(current_data[0], stride, block_w); |
155ec6ed | 2174 | l= (sum + block_s/2)/block_s; |
51d6a3cf | 2175 | iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; |
115329f1 | 2176 | |
155ec6ed | 2177 | block_s= block_w*block_w>>2; |
51d6a3cf | 2178 | sum = pix_sum(current_data[1], uvstride, block_w>>1); |
155ec6ed MN |
2179 | cb= (sum + block_s/2)/block_s; |
2180 | // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; | |
51d6a3cf | 2181 | sum = pix_sum(current_data[2], uvstride, block_w>>1); |
155ec6ed MN |
2182 | cr= (sum + block_s/2)/block_s; |
2183 | // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; | |
2184 | ||
2185 | ic= s->c; | |
28869757 MN |
2186 | ic.bytestream_start= |
2187 | ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo | |
155ec6ed MN |
2188 | memcpy(i_state, s->block_state, sizeof(s->block_state)); |
2189 | if(level!=s->block_max_depth) | |
28869757 MN |
2190 | put_rac(&ic, &i_state[4 + s_context], 1); |
2191 | put_rac(&ic, &i_state[1 + left->type + top->type], 1); | |
155ec6ed MN |
2192 | put_symbol(&ic, &i_state[32], l-pl , 1); |
2193 | put_symbol(&ic, &i_state[64], cb-pcb, 1); | |
2194 | put_symbol(&ic, &i_state[96], cr-pcr, 1); | |
28869757 MN |
2195 | i_len= ic.bytestream - ic.bytestream_start; |
2196 | iscore += (s->lambda2*(i_len*8 | |
2197 | + (ic.outstanding_count - s->c.outstanding_count)*8 | |
2198 | + (-av_log2(ic.range) + av_log2(s->c.range)) | |
2199 | ))>>FF_LAMBDA_SHIFT; | |
155ec6ed MN |
2200 | |
2201 | // assert(score==256*256*256*64-1); | |
2202 | assert(iscore < 255*255*256 + s->lambda2*10); | |
2203 | assert(iscore >= 0); | |
2204 | assert(l>=0 && l<=255); | |
2205 | assert(pl>=0 && pl<=255); | |
2206 | ||
2207 | if(level==0){ | |
2208 | int varc= iscore >> 8; | |
2209 | int vard= score >> 8; | |
2210 | if (vard <= 64 || vard < varc) | |
2211 | c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); | |
2212 | else | |
2213 | c->scene_change_score+= s->m.qscale; | |
2214 | } | |
115329f1 | 2215 | |
155ec6ed | 2216 | if(level!=s->block_max_depth){ |
28869757 | 2217 | put_rac(&s->c, &s->block_state[4 + s_context], 0); |
155ec6ed MN |
2218 | score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); |
2219 | score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); | |
2220 | score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); | |
2221 | score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); | |
2222 | score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead | |
115329f1 | 2223 | |
155ec6ed MN |
2224 | if(score2 < score && score2 < iscore) |
2225 | return score2; | |
2226 | } | |
115329f1 | 2227 | |
155ec6ed | 2228 | if(iscore < score){ |
28869757 | 2229 | memcpy(pbbak, i_buffer, i_len); |
155ec6ed | 2230 | s->c= ic; |
28869757 MN |
2231 | s->c.bytestream_start= pbbak_start; |
2232 | s->c.bytestream= pbbak + i_len; | |
155ec6ed MN |
2233 | set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA); |
2234 | memcpy(s->block_state, i_state, sizeof(s->block_state)); | |
2235 | return iscore; | |
2236 | }else{ | |
28869757 | 2237 | memcpy(pbbak, p_buffer, p_len); |
155ec6ed | 2238 | s->c= pc; |
28869757 MN |
2239 | s->c.bytestream_start= pbbak_start; |
2240 | s->c.bytestream= pbbak + p_len; | |
155ec6ed MN |
2241 | set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0); |
2242 | memcpy(s->block_state, p_state, sizeof(s->block_state)); | |
2243 | return score; | |
2244 | } | |
2245 | } | |
2246 | ||
51d6a3cf MN |
2247 | static always_inline int same_block(BlockNode *a, BlockNode *b){ |
2248 | if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ | |
2249 | return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); | |
2250 | }else{ | |
2251 | return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA)); | |
2252 | } | |
2253 | } | |
2254 | ||
2255 | static void encode_q_branch2(SnowContext *s, int level, int x, int y){ | |
2256 | const int w= s->b_width << s->block_max_depth; | |
2257 | const int rem_depth= s->block_max_depth - level; | |
2258 | const int index= (x + y*w) << rem_depth; | |
2259 | int trx= (x+1)<<rem_depth; | |
2260 | BlockNode *b= &s->block[index]; | |
2261 | BlockNode *left = x ? &s->block[index-1] : &null_block; | |
2262 | BlockNode *top = y ? &s->block[index-w] : &null_block; | |
2263 | BlockNode *tl = y && x ? &s->block[index-w-1] : left; | |
2264 | BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt | |
2265 | int pl = left->color[0]; | |
2266 | int pcb= left->color[1]; | |
2267 | int pcr= left->color[2]; | |
2268 | int pmx= mid_pred(left->mx, top->mx, tr->mx); | |
2269 | int pmy= mid_pred(left->my, top->my, tr->my); | |
2270 | int mx_context= av_log2(2*ABS(left->mx - top->mx)); | |
2271 | int my_context= av_log2(2*ABS(left->my - top->my)); | |
2272 | int s_context= 2*left->level + 2*top->level + tl->level + tr->level; | |
2273 | ||
2274 | if(s->keyframe){ | |
2275 | set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); | |
2276 | return; | |
2277 | } | |
2278 | ||
2279 | if(level!=s->block_max_depth){ | |
2280 | if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ | |
d6f41eed MN |
2281 | put_rac(&s->c, &s->block_state[4 + s_context], 1); |
2282 | }else{ | |
51d6a3cf MN |
2283 | put_rac(&s->c, &s->block_state[4 + s_context], 0); |
2284 | encode_q_branch2(s, level+1, 2*x+0, 2*y+0); | |
2285 | encode_q_branch2(s, level+1, 2*x+1, 2*y+0); | |
2286 | encode_q_branch2(s, level+1, 2*x+0, 2*y+1); | |
2287 | encode_q_branch2(s, level+1, 2*x+1, 2*y+1); | |
2288 | return; | |
51d6a3cf MN |
2289 | } |
2290 | } | |
2291 | if(b->type & BLOCK_INTRA){ | |
2292 | put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); | |
2293 | put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); | |
2294 | put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); | |
2295 | put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); | |
2296 | set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA); | |
2297 | }else{ | |
2298 | put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); | |
2299 | put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); | |
2300 | put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); | |
2301 | set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0); | |
2302 | } | |
2303 | } | |
2304 | ||
155ec6ed MN |
2305 | static void decode_q_branch(SnowContext *s, int level, int x, int y){ |
2306 | const int w= s->b_width << s->block_max_depth; | |
155ec6ed MN |
2307 | const int rem_depth= s->block_max_depth - level; |
2308 | const int index= (x + y*w) << rem_depth; | |
155ec6ed | 2309 | int trx= (x+1)<<rem_depth; |
155ec6ed MN |
2310 | BlockNode *left = x ? &s->block[index-1] : &null_block; |
2311 | BlockNode *top = y ? &s->block[index-w] : &null_block; | |
2312 | BlockNode *tl = y && x ? &s->block[index-w-1] : left; | |
2313 | BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt | |
2314 | int s_context= 2*left->level + 2*top->level + tl->level + tr->level; | |
115329f1 | 2315 | |
155ec6ed MN |
2316 | if(s->keyframe){ |
2317 | set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA); | |
2318 | return; | |
2319 | } | |
2320 | ||
28869757 | 2321 | if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ |
155ec6ed MN |
2322 | int type; |
2323 | int l = left->color[0]; | |
2324 | int cb= left->color[1]; | |
2325 | int cr= left->color[2]; | |
2326 | int mx= mid_pred(left->mx, top->mx, tr->mx); | |
2327 | int my= mid_pred(left->my, top->my, tr->my); | |
2328 | int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx)); | |
2329 | int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my)); | |
115329f1 | 2330 | |
28869757 | 2331 | type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; |
155ec6ed MN |
2332 | |
2333 | if(type){ | |
2334 | l += get_symbol(&s->c, &s->block_state[32], 1); | |
2335 | cb+= get_symbol(&s->c, &s->block_state[64], 1); | |
2336 | cr+= get_symbol(&s->c, &s->block_state[96], 1); | |
2337 | }else{ | |
2338 | mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1); | |
2339 | my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1); | |
2340 | } | |
2341 | set_blocks(s, level, x, y, l, cb, cr, mx, my, type); | |
2342 | }else{ | |
2343 | decode_q_branch(s, level+1, 2*x+0, 2*y+0); | |
2344 | decode_q_branch(s, level+1, 2*x+1, 2*y+0); | |
2345 | decode_q_branch(s, level+1, 2*x+0, 2*y+1); | |
2346 | decode_q_branch(s, level+1, 2*x+1, 2*y+1); | |
2347 | } | |
2348 | } | |
2349 | ||
2350 | static void encode_blocks(SnowContext *s){ | |
2351 | int x, y; | |
2352 | int w= s->b_width; | |
2353 | int h= s->b_height; | |
2354 | ||
51d6a3cf MN |
2355 | if(s->avctx->me_method == ME_ITER && !s->keyframe) |
2356 | iterative_me(s); | |
2357 | ||
155ec6ed | 2358 | for(y=0; y<h; y++){ |
d06c75a8 | 2359 | if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit |
0ecca7a4 MN |
2360 | av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
2361 | return; | |
2362 | } | |
155ec6ed | 2363 | for(x=0; x<w; x++){ |
51d6a3cf MN |
2364 | if(s->avctx->me_method == ME_ITER) |
2365 | encode_q_branch2(s, 0, x, y); | |
2366 | else | |
2367 | encode_q_branch (s, 0, x, y); | |
155ec6ed MN |
2368 | } |
2369 | } | |
2370 | } | |
2371 | ||
2372 | static void decode_blocks(SnowContext *s){ | |
2373 | int x, y; | |
2374 | int w= s->b_width; | |
2375 | int h= s->b_height; | |
2376 | ||
2377 | for(y=0; y<h; y++){ | |
2378 | for(x=0; x<w; x++){ | |
2379 | decode_q_branch(s, 0, x, y); | |
2380 | } | |
2381 | } | |
791e7b83 MN |
2382 | } |
2383 | ||
2384 | static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ | |
2385 | int x, y; | |
3924dac4 | 2386 | START_TIMER |
791e7b83 MN |
2387 | for(y=0; y < b_h+5; y++){ |
2388 | for(x=0; x < b_w; x++){ | |
3924dac4 MN |
2389 | int a0= src[x ]; |
2390 | int a1= src[x + 1]; | |
2391 | int a2= src[x + 2]; | |
2392 | int a3= src[x + 3]; | |
2393 | int a4= src[x + 4]; | |
2394 | int a5= src[x + 5]; | |
791e7b83 MN |
2395 | // int am= 9*(a1+a2) - (a0+a3); |
2396 | int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); | |
2397 | // int am= 18*(a2+a3) - 2*(a1+a4); | |
2398 | // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; | |
2399 | // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3; | |
2400 | ||
2401 | // if(b_w==16) am= 8*(a1+a2); | |
2402 | ||
8c2515bb Y |
2403 | if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8; |
2404 | else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8; | |
115329f1 | 2405 | |
8c2515bb Y |
2406 | /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/ |
2407 | if(am&(~255)) am= ~(am>>31); | |
115329f1 | 2408 | |
8c2515bb | 2409 | tmp[x] = am; |
791e7b83 MN |
2410 | |
2411 | /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6; | |
2412 | else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6; | |
2413 | else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6; | |
2414 | else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/ | |
2415 | } | |
3924dac4 MN |
2416 | tmp += stride; |
2417 | src += stride; | |
791e7b83 | 2418 | } |
3924dac4 | 2419 | tmp -= (b_h+5)*stride; |
115329f1 | 2420 | |
791e7b83 MN |
2421 | for(y=0; y < b_h; y++){ |
2422 | for(x=0; x < b_w; x++){ | |
3924dac4 MN |
2423 | int a0= tmp[x + 0*stride]; |
2424 | int a1= tmp[x + 1*stride]; | |
2425 | int a2= tmp[x + 2*stride]; | |
2426 | int a3= tmp[x + 3*stride]; | |
2427 | int a4= tmp[x + 4*stride]; | |
2428 | int a5= tmp[x + 5*stride]; | |
791e7b83 MN |
2429 | int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
2430 | // int am= 18*(a2+a3) - 2*(a1+a4); | |
2431 | /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; | |
2432 | int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/ | |
115329f1 | 2433 | |
791e7b83 MN |
2434 | // if(b_w==16) am= 8*(a1+a2); |
2435 | ||
8c2515bb Y |
2436 | if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8; |
2437 | else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8; | |
791e7b83 | 2438 | |
8c2515bb | 2439 | if(am&(~255)) am= ~(am>>31); |
115329f1 | 2440 | |
8c2515bb | 2441 | dst[x] = am; |
791e7b83 MN |
2442 | /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6; |
2443 | else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6; | |
2444 | else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6; | |
2445 | else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/ | |
2446 | } | |
3924dac4 MN |
2447 | dst += stride; |
2448 | tmp += stride; | |
791e7b83 | 2449 | } |
3924dac4 | 2450 | STOP_TIMER("mc_block") |
791e7b83 MN |
2451 | } |
2452 | ||
791e7b83 | 2453 | #define mca(dx,dy,b_w)\ |
d92b5807 | 2454 | static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\ |
791e7b83 MN |
2455 | uint8_t tmp[stride*(b_w+5)];\ |
2456 | assert(h==b_w);\ | |
2457 | mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\ | |
2458 | } | |
2459 | ||
2460 | mca( 0, 0,16) | |
2461 | mca( 8, 0,16) | |
2462 | mca( 0, 8,16) | |
2463 | mca( 8, 8,16) | |
d92b5807 MN |
2464 | mca( 0, 0,8) |
2465 | mca( 8, 0,8) | |
2466 | mca( 0, 8,8) | |
2467 | mca( 8, 8,8) | |
791e7b83 | 2468 | |
ff158dc9 | 2469 | static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
51d6a3cf | 2470 | if(block->type & BLOCK_INTRA){ |
ff158dc9 | 2471 | int x, y; |
2692ceab MN |
2472 | const int color = block->color[plane_index]; |
2473 | const int color4= color*0x01010101; | |
1015631b LM |
2474 | if(b_w==32){ |
2475 | for(y=0; y < b_h; y++){ | |
2476 | *(uint32_t*)&dst[0 + y*stride]= color4; | |
2477 | *(uint32_t*)&dst[4 + y*stride]= color4; | |
2478 | *(uint32_t*)&dst[8 + y*stride]= color4; | |
2479 | *(uint32_t*)&dst[12+ y*stride]= color4; | |
2480 | *(uint32_t*)&dst[16+ y*stride]= color4; | |
2481 | *(uint32_t*)&dst[20+ y*stride]= color4; | |
2482 | *(uint32_t*)&dst[24+ y*stride]= color4; | |
2483 | *(uint32_t*)&dst[28+ y*stride]= color4; | |
2484 | } | |
2485 | }else if(b_w==16){ | |
2692ceab MN |
2486 | for(y=0; y < b_h; y++){ |
2487 | *(uint32_t*)&dst[0 + y*stride]= color4; | |
2488 | *(uint32_t*)&dst[4 + y*stride]= color4; | |
2489 | *(uint32_t*)&dst[8 + y*stride]= color4; | |
2490 | *(uint32_t*)&dst[12+ y*stride]= color4; | |
2491 | } | |
2492 | }else if(b_w==8){ | |
2493 | for(y=0; y < b_h; y++){ | |
2494 | *(uint32_t*)&dst[0 + y*stride]= color4; | |
2495 | *(uint32_t*)&dst[4 + y*stride]= color4; | |
2496 | } | |
2497 | }else if(b_w==4){ | |
2498 | for(y=0; y < b_h; y++){ | |
2499 | *(uint32_t*)&dst[0 + y*stride]= color4; | |
2500 | } | |
2501 | }else{ | |
2502 | for(y=0; y < b_h; y++){ | |
2503 | for(x=0; x < b_w; x++){ | |
2504 | dst[x + y*stride]= color; | |
2505 | } | |
ff158dc9 MN |
2506 | } |
2507 | } | |
2508 | }else{ | |
2509 | const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; | |
2510 | int mx= block->mx*scale; | |
2511 | int my= block->my*scale; | |
ec697587 MN |
2512 | const int dx= mx&15; |
2513 | const int dy= my&15; | |
80e44bc3 | 2514 | const int tab_index= 3 - (b_w>>2) + (b_w>>4); |
ff158dc9 MN |
2515 | sx += (mx>>4) - 2; |
2516 | sy += (my>>4) - 2; | |
2517 | src += sx + sy*stride; | |
2518 | if( (unsigned)sx >= w - b_w - 4 | |
2519 | || (unsigned)sy >= h - b_h - 4){ | |
2520 | ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h); | |
2521 | src= tmp + MB_SIZE; | |
2522 | } | |
2692ceab MN |
2523 | assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); |
2524 | assert(!(b_w&(b_w-1))); | |
2525 | assert(b_w>1 && b_h>1); | |
1015631b | 2526 | assert(tab_index>=0 && tab_index<4 || b_w==32); |
80e44bc3 | 2527 | if((dx&3) || (dy&3)) |
ec697587 | 2528 | mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); |
1015631b LM |
2529 | else if(b_w==32){ |
2530 | int y; | |
2531 | for(y=0; y<b_h; y+=16){ | |
2532 | s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride); | |
2533 | s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride); | |
2534 | } | |
2535 | }else if(b_w==b_h) | |
80e44bc3 | 2536 | s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); |
2692ceab | 2537 | else if(b_w==2*b_h){ |
80e44bc3 MN |
2538 | s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride); |
2539 | s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride); | |
2692ceab MN |
2540 | }else{ |
2541 | assert(2*b_w==b_h); | |
80e44bc3 MN |
2542 | s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride); |
2543 | s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride); | |
2692ceab | 2544 | } |
ff158dc9 MN |
2545 | } |
2546 | } | |
2547 | ||
ff158dc9 | 2548 | //FIXME name clenup (b_w, block_w, b_width stuff) |
a0d1931c Y |
2549 | static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ |
2550 | DWTELEM * dst = NULL; | |
2551 | const int b_width = s->b_width << s->block_max_depth; | |
2552 | const int b_height= s->b_height << s->block_max_depth; | |
2553 | const int b_stride= b_width; | |
2554 | BlockNode *lt= &s->block[b_x + b_y*b_stride]; | |
2555 | BlockNode *rt= lt+1; | |
2556 | BlockNode *lb= lt+b_stride; | |
2557 | BlockNode *rb= lb+1; | |
115329f1 | 2558 | uint8_t *block[4]; |
cc884a35 MN |
2559 | int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; |
2560 | uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align | |
2561 | uint8_t *ptmp; | |
a0d1931c Y |
2562 | int x,y; |
2563 | ||
2564 | if(b_x<0){ | |
2565 | lt= rt; | |
2566 | lb= rb; | |
2567 | }else if(b_x + 1 >= b_width){ | |
2568 | rt= lt; | |
2569 | rb= lb; | |
2570 | } | |
2571 | if(b_y<0){ | |
2572 | lt= lb; | |
2573 | rt= rb; | |
2574 | }else if(b_y + 1 >= b_height){ | |
2575 | lb= lt; | |
2576 | rb= rt; | |
2577 | } | |
115329f1 | 2578 | |
a0d1931c Y |
2579 | if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 |
2580 | obmc -= src_x; | |
2581 | b_w += src_x; | |
2582 | src_x=0; | |
2583 | }else if(src_x + b_w > w){ | |
2584 | b_w = w - src_x; | |
2585 | } | |
2586 | if(src_y<0){ | |
2587 | obmc -= src_y*obmc_stride; | |
2588 | b_h += src_y; | |
2589 | src_y=0; | |
2590 | }else if(src_y + b_h> h){ | |
2591 | b_h = h - src_y; | |
2592 | } | |
115329f1 | 2593 | |
a0d1931c Y |
2594 | if(b_w<=0 || b_h<=0) return; |
2595 | ||
cc884a35 | 2596 | assert(src_stride > 2*MB_SIZE + 5); |
a0d1931c Y |
2597 | // old_dst += src_x + src_y*dst_stride; |
2598 | dst8+= src_x + src_y*src_stride; | |
2599 | // src += src_x + src_y*src_stride; | |
2600 | ||
cc884a35 MN |
2601 | ptmp= tmp + 3*tmp_step; |
2602 | block[0]= ptmp; | |
2603 | ptmp+=tmp_step; | |
115329f1 | 2604 | pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); |
a0d1931c Y |
2605 | |
2606 | if(same_block(lt, rt)){ | |
2607 | block[1]= block[0]; | |
2608 | }else{ | |
cc884a35 MN |
2609 | block[1]= ptmp; |
2610 | ptmp+=tmp_step; | |
a0d1931c Y |
2611 | pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); |
2612 | } | |
115329f1 | 2613 | |
a0d1931c Y |
2614 | if(same_block(lt, lb)){ |
2615 | block[2]= block[0]; | |
2616 | }else if(same_block(rt, lb)){ | |
2617 | block[2]= block[1]; | |
2618 | }else{ | |
cc884a35 MN |
2619 | block[2]= ptmp; |
2620 | ptmp+=tmp_step; | |
a0d1931c Y |
2621 | pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); |
2622 | } | |
2623 | ||
2624 | if(same_block(lt, rb) ){ | |
2625 | block[3]= block[0]; | |
2626 | }else if(same_block(rt, rb)){ | |
2627 | block[3]= block[1]; | |
2628 | }else if(same_block(lb, rb)){ | |
2629 | block[3]= block[2]; | |
2630 | }else{ | |
cc884a35 | 2631 | block[3]= ptmp; |
a0d1931c Y |
2632 | pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); |
2633 | } | |
2634 | #if 0 | |
2635 | for(y=0; y<b_h; y++){ | |
2636 | for(x=0; x<b_w; x++){ | |
2637 | int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2638 | if(add) dst[x + y*dst_stride] += v; | |
2639 | else dst[x + y*dst_stride] -= v; | |
2640 | } | |
2641 | } | |
2642 | for(y=0; y<b_h; y++){ | |
2643 | uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2644 | for(x=0; x<b_w; x++){ | |
2645 | int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2646 | if(add) dst[x + y*dst_stride] += v; | |
2647 | else dst[x + y*dst_stride] -= v; | |
2648 | } | |
2649 | } | |
2650 | for(y=0; y<b_h; y++){ | |
2651 | uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2652 | for(x=0; x<b_w; x++){ | |
2653 | int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2654 | if(add) dst[x + y*dst_stride] += v; | |
2655 | else dst[x + y*dst_stride] -= v; | |
2656 | } | |
2657 | } | |
2658 | for(y=0; y<b_h; y++){ | |
2659 | uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2660 | uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2661 | for(x=0; x<b_w; x++){ | |
2662 | int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2663 | if(add) dst[x + y*dst_stride] += v; | |
2664 | else dst[x + y*dst_stride] -= v; | |
2665 | } | |
2666 | } | |
2667 | #else | |
2668 | { | |
2669 | ||
2670 | START_TIMER | |
115329f1 | 2671 | |
a0d1931c Y |
2672 | for(y=0; y<b_h; y++){ |
2673 | //FIXME ugly missue of obmc_stride | |
2674 | uint8_t *obmc1= obmc + y*obmc_stride; | |
2675 | uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2676 | uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2677 | uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2678 | dst = slice_buffer_get_line(sb, src_y + y); | |
2679 | for(x=0; x<b_w; x++){ | |
2680 | int v= obmc1[x] * block[3][x + y*src_stride] | |
2681 | +obmc2[x] * block[2][x + y*src_stride] | |
2682 | +obmc3[x] * block[1][x + y*src_stride] | |
2683 | +obmc4[x] * block[0][x + y*src_stride]; | |
2684 | ||
2685 | v <<= 8 - LOG2_OBMC_MAX; | |
2686 | if(FRAC_BITS != 8){ | |
2687 | v += 1<<(7 - FRAC_BITS); | |
2688 | v >>= 8 - FRAC_BITS; | |
2689 | } | |
2690 | if(add){ | |
2691 | // v += old_dst[x + y*dst_stride]; | |
2692 | v += dst[x + src_x]; | |
2693 | v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2694 | if(v&(~255)) v= ~(v>>31); | |
2695 | dst8[x + y*src_stride] = v; | |
2696 | }else{ | |
2697 | // old_dst[x + y*dst_stride] -= v; | |
2698 | dst[x + src_x] -= v; | |
2699 | } | |
2700 | } | |
2701 | } | |
2702 | STOP_TIMER("Inner add y block") | |
2703 | } | |
2704 | #endif | |
2705 | } | |
2706 | ||
2707 | //FIXME name clenup (b_w, block_w, b_width stuff) | |
1015631b | 2708 | static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
ff158dc9 MN |
2709 | const int b_width = s->b_width << s->block_max_depth; |
2710 | const int b_height= s->b_height << s->block_max_depth; | |
2711 | const int b_stride= b_width; | |
2712 | BlockNode *lt= &s->block[b_x + b_y*b_stride]; | |
2713 | BlockNode *rt= lt+1; | |
2714 | BlockNode *lb= lt+b_stride; | |
2715 | BlockNode *rb= lb+1; | |
115329f1 | 2716 | uint8_t *block[4]; |
cc884a35 MN |
2717 | int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; |
2718 | uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align | |
2719 | uint8_t *ptmp; | |
791e7b83 MN |
2720 | int x,y; |
2721 | ||
ff158dc9 MN |
2722 | if(b_x<0){ |
2723 | lt= rt; | |
2724 | lb= rb; | |
2725 | }else if(b_x + 1 >= b_width){ | |
2726 | rt= lt; | |
2727 | rb= lb; | |
791e7b83 | 2728 | } |
ff158dc9 MN |
2729 | if(b_y<0){ |
2730 | lt= lb; | |
2731 | rt= rb; | |
2732 | }else if(b_y + 1 >= b_height){ | |
2733 | lb= lt; | |
2734 | rb= rt; | |
2735 | } | |
115329f1 | 2736 | |
ff158dc9 MN |
2737 | if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 |
2738 | obmc -= src_x; | |
2739 | b_w += src_x; | |
1015631b LM |
2740 | if(!offset_dst) |
2741 | dst -= src_x; | |
ff158dc9 MN |
2742 | src_x=0; |
2743 | }else if(src_x + b_w > w){ | |
2744 | b_w = w - src_x; | |
2745 | } | |
2746 | if(src_y<0){ | |
2747 | obmc -= src_y*obmc_stride; | |
2748 | b_h += src_y; | |
1015631b LM |
2749 | if(!offset_dst) |
2750 | dst -= src_y*dst_stride; | |
ff158dc9 MN |
2751 | src_y=0; |
2752 | }else if(src_y + b_h> h){ | |
2753 | b_h = h - src_y; | |
791e7b83 | 2754 | } |
115329f1 | 2755 | |
ff158dc9 | 2756 | if(b_w<=0 || b_h<=0) return; |
155ec6ed | 2757 | |
cc884a35 | 2758 | assert(src_stride > 2*MB_SIZE + 5); |
1015631b LM |
2759 | if(offset_dst) |
2760 | dst += src_x + src_y*dst_stride; | |
715a97f0 | 2761 | dst8+= src_x + src_y*src_stride; |
ff158dc9 MN |
2762 | // src += src_x + src_y*src_stride; |
2763 | ||
cc884a35 MN |
2764 | ptmp= tmp + 3*tmp_step; |
2765 | block[0]= ptmp; | |
2766 | ptmp+=tmp_step; | |
115329f1 | 2767 | pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); |
ff158dc9 MN |
2768 | |
2769 | if(same_block(lt, rt)){ | |
2770 | block[1]= block[0]; | |
791e7b83 | 2771 | }else{ |
cc884a35 MN |
2772 | block[1]= ptmp; |
2773 | ptmp+=tmp_step; | |
ff158dc9 MN |
2774 | pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); |
2775 | } | |
115329f1 | 2776 | |
ff158dc9 MN |
2777 | if(same_block(lt, lb)){ |
2778 | block[2]= block[0]; | |
2779 | }else if(same_block(rt, lb)){ | |
2780 | block[2]= block[1]; | |
2781 | }else{ | |
cc884a35 MN |
2782 | block[2]= ptmp; |
2783 | ptmp+=tmp_step; | |
ff158dc9 MN |
2784 | pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); |
2785 | } | |
791e7b83 | 2786 | |
ff158dc9 MN |
2787 | if(same_block(lt, rb) ){ |
2788 | block[3]= block[0]; | |
2789 | }else if(same_block(rt, rb)){ | |
2790 | block[3]= block[1]; | |
2791 | }else if(same_block(lb, rb)){ | |
2792 | block[3]= block[2]; | |
2793 | }else{ | |
cc884a35 | 2794 | block[3]= ptmp; |
ff158dc9 MN |
2795 | pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); |
2796 | } | |
2797 | #if 0 | |
2798 | for(y=0; y<b_h; y++){ | |
2799 | for(x=0; x<b_w; x++){ | |
2800 | int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2801 | if(add) dst[x + y*dst_stride] += v; | |
2802 | else dst[x + y*dst_stride] -= v; | |
2803 | } | |
2804 | } | |
2805 | for(y=0; y<b_h; y++){ | |
2806 | uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2807 | for(x=0; x<b_w; x++){ | |
2808 | int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2809 | if(add) dst[x + y*dst_stride] += v; | |
2810 | else dst[x + y*dst_stride] -= v; | |
2811 | } | |
2812 | } | |
2813 | for(y=0; y<b_h; y++){ | |
2814 | uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2815 | for(x=0; x<b_w; x++){ | |
2816 | int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2817 | if(add) dst[x + y*dst_stride] += v; | |
2818 | else dst[x + y*dst_stride] -= v; | |
2819 | } | |
2820 | } | |
2821 | for(y=0; y<b_h; y++){ | |
2822 | uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2823 | uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2824 | for(x=0; x<b_w; x++){ | |
2825 | int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2826 | if(add) dst[x + y*dst_stride] += v; | |
2827 | else dst[x + y*dst_stride] -= v; | |
2828 | } | |
2829 | } | |
2830 | #else | |
2831 | for(y=0; y<b_h; y++){ | |
2832 | //FIXME ugly missue of obmc_stride | |
2833 | uint8_t *obmc1= obmc + y*obmc_stride; | |
2834 | uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2835 | uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2836 | uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2837 | for(x=0; x<b_w; x++){ | |
2838 | int v= obmc1[x] * block[3][x + y*src_stride] | |
2839 | +obmc2[x] * block[2][x + y*src_stride] | |
2840 | +obmc3[x] * block[1][x + y*src_stride] | |
2841 | +obmc4[x] * block[0][x + y*src_stride]; | |
115329f1 | 2842 | |
715a97f0 | 2843 | v <<= 8 - LOG2_OBMC_MAX; |
034aff03 MN |
2844 | if(FRAC_BITS != 8){ |
2845 | v += 1<<(7 - FRAC_BITS); | |
2846 | v >>= 8 - FRAC_BITS; | |
2847 | } | |
715a97f0 MN |
2848 | if(add){ |
2849 | v += dst[x + y*dst_stride]; | |
2850 | v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2851 | if(v&(~255)) v= ~(v>>31); | |
2852 | dst8[x + y*src_stride] = v; | |
2853 | }else{ | |
2854 | dst[x + y*dst_stride] -= v; | |
2855 | } | |
791e7b83 MN |
2856 | } |
2857 | } | |
ff158dc9 | 2858 | #endif |
791e7b83 MN |
2859 | } |
2860 | ||
a0d1931c Y |
2861 | static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){ |
2862 | Plane *p= &s->plane[plane_index]; | |
2863 | const int mb_w= s->b_width << s->block_max_depth; | |
2864 | const int mb_h= s->b_height << s->block_max_depth; | |
2865 | int x, y, mb_x; | |
2866 | int block_size = MB_SIZE >> s->block_max_depth; | |
2867 | int block_w = plane_index ? block_size/2 : block_size; | |
2868 | const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2869 | int obmc_stride= plane_index ? block_size : 2*block_size; | |
2870 | int ref_stride= s->current_picture.linesize[plane_index]; | |
2871 | uint8_t *ref = s->last_picture.data[plane_index]; | |
2872 | uint8_t *dst8= s->current_picture.data[plane_index]; | |
2873 | int w= p->width; | |
2874 | int h= p->height; | |
2875 | START_TIMER | |
115329f1 | 2876 | |
a0d1931c Y |
2877 | if(s->keyframe || (s->avctx->debug&512)){ |
2878 | if(mb_y==mb_h) | |
2879 | return; | |
2880 | ||
2881 | if(add){ | |
86e59cc0 | 2882 | for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) |
a0d1931c Y |
2883 | { |
2884 | // DWTELEM * line = slice_buffer_get_line(sb, y); | |
2885 | DWTELEM * line = sb->line[y]; | |
2886 | for(x=0; x<w; x++) | |
2887 | { | |
2888 | // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2889 | int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2890 | v >>= FRAC_BITS; | |
2891 | if(v&(~255)) v= ~(v>>31); | |
2892 | dst8[x + y*ref_stride]= v; | |
2893 | } | |
2894 | } | |
2895 | }else{ | |
86e59cc0 | 2896 | for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) |
a0d1931c Y |
2897 | { |
2898 | // DWTELEM * line = slice_buffer_get_line(sb, y); | |
2899 | DWTELEM * line = sb->line[y]; | |
2900 | for(x=0; x<w; x++) | |
2901 | { | |
2902 | line[x] -= 128 << FRAC_BITS; | |
2903 | // buf[x + y*w]-= 128<<FRAC_BITS; | |
2904 | } | |
2905 | } | |
2906 | } | |
2907 | ||
2908 | return; | |
2909 | } | |
115329f1 | 2910 | |
a0d1931c Y |
2911 | for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2912 | START_TIMER | |
2913 | ||
115329f1 | 2914 | add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, |
a0d1931c Y |
2915 | block_w*mb_x - block_w/2, |
2916 | block_w*mb_y - block_w/2, | |
2917 | block_w, block_w, | |
2918 | w, h, | |
2919 | w, ref_stride, obmc_stride, | |
2920 | mb_x - 1, mb_y - 1, | |
2921 | add, plane_index); | |
115329f1 | 2922 | |
a0d1931c Y |
2923 | STOP_TIMER("add_yblock") |
2924 | } | |
115329f1 | 2925 | |
a0d1931c Y |
2926 | STOP_TIMER("predict_slice") |
2927 | } | |
2928 | ||
f9e6ebf7 | 2929 | static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ |
791e7b83 | 2930 | Plane *p= &s->plane[plane_index]; |
155ec6ed MN |
2931 | const int mb_w= s->b_width << s->block_max_depth; |
2932 | const int mb_h= s->b_height << s->block_max_depth; | |
f9e6ebf7 | 2933 | int x, y, mb_x; |
155ec6ed MN |
2934 | int block_size = MB_SIZE >> s->block_max_depth; |
2935 | int block_w = plane_index ? block_size/2 : block_size; | |
ff158dc9 | 2936 | const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
51d6a3cf | 2937 | const int obmc_stride= plane_index ? block_size : 2*block_size; |
715a97f0 | 2938 | int ref_stride= s->current_picture.linesize[plane_index]; |
791e7b83 | 2939 | uint8_t *ref = s->last_picture.data[plane_index]; |
715a97f0 | 2940 | uint8_t *dst8= s->current_picture.data[plane_index]; |
791e7b83 MN |
2941 | int w= p->width; |
2942 | int h= p->height; | |
fff6d4ea | 2943 | START_TIMER |
115329f1 | 2944 | |
ff158dc9 | 2945 | if(s->keyframe || (s->avctx->debug&512)){ |
f9e6ebf7 LM |
2946 | if(mb_y==mb_h) |
2947 | return; | |
2948 | ||
715a97f0 | 2949 | if(add){ |
86e59cc0 | 2950 | for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
715a97f0 MN |
2951 | for(x=0; x<w; x++){ |
2952 | int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2953 | v >>= FRAC_BITS; | |
2954 | if(v&(~255)) v= ~(v>>31); | |
2955 | dst8[x + y*ref_stride]= v; | |
2956 | } | |
2957 | } | |
2958 | }else{ | |
86e59cc0 | 2959 | for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
715a97f0 MN |
2960 | for(x=0; x<w; x++){ |
2961 | buf[x + y*w]-= 128<<FRAC_BITS; | |
2962 | } | |
ff158dc9 | 2963 | } |
791e7b83 | 2964 | } |
ff158dc9 MN |
2965 | |
2966 | return; | |
791e7b83 | 2967 | } |
115329f1 | 2968 | |
ff158dc9 | 2969 | for(mb_x=0; mb_x<=mb_w; mb_x++){ |
fff6d4ea | 2970 | START_TIMER |
ff158dc9 | 2971 | |
115329f1 | 2972 | add_yblock(s, buf, dst8, ref, obmc, |
ff158dc9 | 2973 | block_w*mb_x - block_w/2, |
791e7b83 | 2974 | block_w*mb_y - block_w/2, |
ff158dc9 | 2975 | block_w, block_w, |
791e7b83 | 2976 | w, h, |
ff158dc9 MN |
2977 | w, ref_stride, obmc_stride, |
2978 | mb_x - 1, mb_y - 1, | |
1015631b | 2979 | add, 1, plane_index); |
115329f1 | 2980 | |
ff158dc9 | 2981 | STOP_TIMER("add_yblock") |
791e7b83 | 2982 | } |
115329f1 | 2983 | |
f9e6ebf7 LM |
2984 | STOP_TIMER("predict_slice") |
2985 | } | |
2986 | ||
2987 | static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ | |
2988 | const int mb_h= s->b_height << s->block_max_depth; | |
2989 | int mb_y; | |
2990 | for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2991 | predict_slice(s, buf, plane_index, add, mb_y); | |
791e7b83 MN |
2992 | } |
2993 | ||
51d6a3cf MN |
2994 | static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ |
2995 | int i, x2, y2; | |
2996 | Plane *p= &s->plane[plane_index]; | |
2997 | const int block_size = MB_SIZE >> s->block_max_depth; | |
2998 | const int block_w = plane_index ? block_size/2 : block_size; | |
2999 | const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
3000 | const int obmc_stride= plane_index ? block_size : 2*block_size; | |
3001 | const int ref_stride= s->current_picture.linesize[plane_index]; | |
3002 | uint8_t *ref= s-> last_picture.data[plane_index]; | |
51d6a3cf | 3003 | uint8_t *src= s-> input_picture.data[plane_index]; |
1015631b | 3004 | DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
51d6a3cf MN |
3005 | const int b_stride = s->b_width << s->block_max_depth; |
3006 | const int w= p->width; | |
3007 | const int h= p->height; | |
3008 | int index= mb_x + mb_y*b_stride; | |
3009 | BlockNode *b= &s->block[index]; | |
3010 | BlockNode backup= *b; | |
3011 | int ab=0; | |
3012 | int aa=0; | |
3013 | ||
3014 | b->type|= BLOCK_INTRA; | |
3015 | b->color[plane_index]= 0; | |
1015631b | 3016 | memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM)); |
51d6a3cf MN |
3017 | |
3018 | for(i=0; i<4; i++){ | |
3019 | int mb_x2= mb_x + (i &1) - 1; | |
3020 | int mb_y2= mb_y + (i>>1) - 1; | |
3021 | int x= block_w*mb_x2 + block_w/2; | |
3022 | int y= block_w*mb_y2 + block_w/2; | |
3023 | ||
1015631b LM |
3024 | add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc, |
3025 | x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); | |
51d6a3cf MN |
3026 | |
3027 | for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ | |
3028 | for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ | |
3029 | int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; | |
3030 | int obmc_v= obmc[index]; | |
1015631b | 3031 | int d; |
51d6a3cf MN |
3032 | if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; |
3033 | if(x<0) obmc_v += obmc[index + block_w]; | |
3034 | if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; | |
3035 | if(x+block_w>w) obmc_v += obmc[index - block_w]; | |
3036 | //FIXME precalc this or simplify it somehow else | |
3037 | ||
1015631b LM |
3038 | d = -dst[index] + (1<<(FRAC_BITS-1)); |
3039 | dst[index] = d; | |
3040 | ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; | |
51d6a3cf MN |
3041 | aa += obmc_v * obmc_v; //FIXME precalclate this |
3042 | } | |
3043 | } | |
3044 | } | |
3045 | *b= backup; | |
3046 | ||
3047 | return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping | |
3048 | } | |
3049 | ||
b104969f LM |
3050 | static inline int get_block_bits(SnowContext *s, int x, int y, int w){ |
3051 | const int b_stride = s->b_width << s->block_max_depth; | |
3052 | const int b_height = s->b_height<< s->block_max_depth; | |
3053 | int index= x + y*b_stride; | |
3054 | BlockNode *b = &s->block[index]; | |
3055 | BlockNode *left = x ? &s->block[index-1] : &null_block; | |
3056 | BlockNode *top = y ? &s->block[index-b_stride] : &null_block; | |
3057 | BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; | |
3058 | BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; | |
3059 | int dmx, dmy; | |
3060 | // int mx_context= av_log2(2*ABS(left->mx - top->mx)); | |
3061 | // int my_context= av_log2(2*ABS(left->my - top->my)); | |
3062 | ||
3063 | if(x<0 || x>=b_stride || y>=b_height) | |
3064 | return 0; | |
3065 | dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx); | |
3066 | dmy= b->my - mid_pred(left->my, top->my, tr->my); | |
3067 | /* | |
3068 | 1 0 0 | |
3069 | 01X 1-2 1 | |
3070 | 001XX 3-6 2-3 | |
3071 | 0001XXX 7-14 4-7 | |
3072 | 00001XXXX 15-30 8-15 | |
3073 | */ | |
3074 | //FIXME try accurate rate | |
3075 | //FIXME intra and inter predictors if surrounding blocks arent the same type | |
3076 | if(b->type & BLOCK_INTRA){ | |
3077 | return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0])) | |
3078 | + av_log2(2*ABS(left->color[1] - b->color[1])) | |
3079 | + av_log2(2*ABS(left->color[2] - b->color[2]))); | |
3080 | }else | |
3081 | return 2*(1 + av_log2(2*ABS(dmx)) | |
3082 | + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda | |
3083 | } | |
3084 | ||
1015631b | 3085 | static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ |
51d6a3cf MN |
3086 | Plane *p= &s->plane[plane_index]; |
3087 | const int block_size = MB_SIZE >> s->block_max_depth; | |
3088 | const int block_w = plane_index ? block_size/2 : block_size; | |
3089 | const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
3090 | const int obmc_stride= plane_index ? block_size : 2*block_size; | |
3091 | const int ref_stride= s->current_picture.linesize[plane_index]; | |
3092 | uint8_t *ref= s-> last_picture.data[plane_index]; | |
3093 | uint8_t *dst= s->current_picture.data[plane_index]; | |
1015631b LM |
3094 | uint8_t *src= s-> input_picture.data[plane_index]; |
3095 | DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; | |
3096 | uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment | |
3097 | uint8_t tmp[ref_stride*(2*MB_SIZE+5)]; | |
51d6a3cf MN |
3098 | const int b_stride = s->b_width << s->block_max_depth; |
3099 | const int b_height = s->b_height<< s->block_max_depth; | |
3100 | const int w= p->width; | |
3101 | const int h= p->height; | |
1015631b | 3102 | int distortion; |
51d6a3cf MN |
3103 | int rate= 0; |
3104 | const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | |
1015631b LM |
3105 | int sx= block_w*mb_x - block_w/2; |
3106 | int sy= block_w*mb_y - block_w/2; | |
3107 | const int x0= FFMAX(0,-sx); | |
3108 | const int y0= FFMAX(0,-sy); | |
3109 | const int x1= FFMIN(block_w*2, w-sx); | |
3110 | const int y1= FFMIN(block_w*2, h-sy); | |
3111 | int i,x,y; | |
3112 | ||
3113 | pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); | |
3114 | ||
3115 | for(y=y0; y<y1; y++){ | |
3116 | const uint8_t *obmc1= obmc_edged + y*obmc_stride; | |
3117 | const DWTELEM *pred1 = pred + y*obmc_stride; | |
3118 | uint8_t *cur1 = cur + y*ref_stride; | |
3119 | uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; | |
3120 | for(x=x0; x<x1; x++){ | |
3121 | int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); | |
3122 | v = (v + pred1[x]) >> FRAC_BITS; | |
3123 | if(v&(~255)) v= ~(v>>31); | |
3124 | dst1[x] = v; | |
51d6a3cf | 3125 | } |
1015631b | 3126 | } |
51d6a3cf | 3127 | |
1015631b LM |
3128 | //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block |
3129 | if(block_w==16){ | |
3130 | distortion = 0; | |
3131 | for(i=0; i<4; i++){ | |
3132 | int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; | |
3133 | distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); | |
3134 | } | |
3135 | }else{ | |
3136 | assert(block_w==8); | |
3137 | distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); | |
51d6a3cf MN |
3138 | } |
3139 | ||
3140 | if(plane_index==0){ | |
3141 | for(i=0; i<4; i++){ | |
3142 | /* ..RRr | |
3143 | * .RXx. | |
3144 | * rxx.. | |
3145 | */ | |
b104969f LM |
3146 | rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); |
3147 | } | |
3148 | } | |
3149 | return distortion + rate*penalty_factor; | |
3150 | } | |
3151 | ||
3152 | static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ | |
3153 | int i, y2; | |
3154 | Plane *p= &s->plane[plane_index]; | |
3155 | const int block_size = MB_SIZE >> s->block_max_depth; | |
3156 | const int block_w = plane_index ? block_size/2 : block_size; | |
3157 | const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
3158 | const int obmc_stride= plane_index ? block_size : 2*block_size; | |
3159 | const int ref_stride= s->current_picture.linesize[plane_index]; | |
3160 | uint8_t *ref= s-> last_picture.data[plane_index]; | |
3161 | uint8_t *dst= s->current_picture.data[plane_index]; | |
3162 | uint8_t *src= s-> input_picture.data[plane_index]; | |
3163 | const static DWTELEM zero_dst[4096]; //FIXME | |
3164 | const int b_stride = s->b_width << s->block_max_depth; | |
3165 | const int b_height = s->b_height<< s->block_max_depth; | |
3166 | const int w= p->width; | |
3167 | const int h= p->height; | |
3168 | int distortion= 0; | |
3169 | int rate= 0; | |
3170 | const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | |
3171 | ||
3172 | for(i=0; i<9; i++){ | |
3173 | int mb_x2= mb_x + (i%3) - 1; | |
3174 | int mb_y2= mb_y + (i/3) - 1; | |
3175 | int x= block_w*mb_x2 + block_w/2; | |
3176 | int y= block_w*mb_y2 + block_w/2; | |
3177 | ||
3178 | add_yblock(s, zero_dst, dst, ref, obmc, | |
3179 | x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); | |
3180 | ||
3181 | //FIXME find a cleaner/simpler way to skip the outside stuff | |
3182 | for(y2= y; y2<0; y2++) | |
3183 | memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
3184 | for(y2= h; y2<y+block_w; y2++) | |
3185 | memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
3186 | if(x<0){ | |
3187 | for(y2= y; y2<y+block_w; y2++) | |
3188 | memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); | |
51d6a3cf | 3189 | } |
b104969f LM |
3190 | if(x+block_w > w){ |
3191 | for(y2= y; y2<y+block_w; y2++) | |
3192 | memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); | |
3193 | } | |
3194 | ||
3195 | assert(block_w== 8 || block_w==16); | |
3196 | distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); | |
51d6a3cf MN |
3197 | } |
3198 | ||
b104969f LM |
3199 | if(plane_index==0){ |
3200 | BlockNode *b= &s->block[mb_x+mb_y*b_stride]; | |
3201 | int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); | |
3202 | ||
3203 | /* ..RRRr | |
3204 | * .RXXx. | |
3205 | * .RXXx. | |
3206 | * rxxx. | |
3207 | */ | |
3208 | if(merged) | |
3209 | rate = get_block_bits(s, mb_x, mb_y, 2); | |
3210 | for(i=merged?4:0; i<9; i++){ | |
3211 | static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; | |
3212 | rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); | |
3213 | } | |
3214 | } | |
51d6a3cf MN |
3215 | return distortion + rate*penalty_factor; |
3216 | } | |
3217 | ||
1015631b | 3218 | static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ |
51d6a3cf MN |
3219 | const int b_stride= s->b_width << s->block_max_depth; |
3220 | BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | |
3221 | BlockNode backup= *block; | |
3222 | int rd, index, value; | |
3223 | ||
3224 | assert(mb_x>=0 && mb_y>=0); | |
735f9f34 | 3225 | assert(mb_x<b_stride); |
51d6a3cf MN |
3226 | |
3227 | if(intra){ | |
3228 | block->color[0] = p[0]; | |
3229 | block->color[1] = p[1]; | |
3230 | block->color[2] = p[2]; | |
3231 | block->type |= BLOCK_INTRA; | |
3232 | }else{ | |
3233 | index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); | |
3234 | value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6); | |
3235 | if(s->me_cache[index] == value) | |
3236 | return 0; | |
3237 | s->me_cache[index]= value; | |
3238 | ||
3239 | block->mx= p[0]; | |
3240 | block->my= p[1]; | |
3241 | block->type &= ~BLOCK_INTRA; | |
3242 | } | |
3243 | ||
1015631b | 3244 | rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); |
51d6a3cf MN |
3245 | |
3246 | //FIXME chroma | |
3247 | if(rd < *best_rd){ | |
3248 | *best_rd= rd; | |
3249 | return 1; | |
3250 | }else{ | |
3251 | *block= backup; | |
3252 | return 0; | |
3253 | } | |
3254 | } | |
3255 | ||
52137f2f | 3256 | /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */ |
1015631b | 3257 | static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, const uint8_t *obmc_edged, int *best_rd){ |
52137f2f | 3258 | int p[2] = {p0, p1}; |
1015631b | 3259 | return check_block(s, mb_x, mb_y, p, intra, obmc_edged, best_rd); |
52137f2f FR |
3260 | } |
3261 | ||
b104969f LM |
3262 | static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int *best_rd){ |
3263 | const int b_stride= s->b_width << s->block_max_depth; | |
3264 | BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | |
3265 | BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; | |
3266 | int rd, index, value; | |
3267 | ||
3268 | assert(mb_x>=0 && mb_y>=0); | |
3269 | assert(mb_x<b_stride); | |
3270 | assert(((mb_x|mb_y)&1) == 0); | |
3271 | ||
3272 | index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); | |
3273 | value= s->me_cache_generation + (p0>>10) + (p1<<6); | |
3274 | if(s->me_cache[index] == value) | |
3275 | return 0; | |
3276 | s->me_cache[index]= value; | |
3277 | ||
3278 | block->mx= p0; | |
3279 | block->my= p1; | |
3280 | block->type &= ~BLOCK_INTRA; | |
3281 | block[1]= block[b_stride]= block[b_stride+1]= *block; | |
3282 | ||
3283 | rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3284 | ||
3285 | //FIXME chroma | |
3286 | if(rd < *best_rd){ | |
3287 | *best_rd= rd; | |
3288 | return 1; | |
3289 | }else{ | |
3290 | block[0]= backup[0]; | |
3291 | block[1]= backup[1]; | |
3292 | block[b_stride]= backup[2]; | |
3293 | block[b_stride+1]= backup[3]; | |
3294 | return 0; | |
3295 | } | |
3296 | } | |
3297 | ||
51d6a3cf MN |
3298 | static void iterative_me(SnowContext *s){ |
3299 | int pass, mb_x, mb_y; | |
3300 | const int b_width = s->b_width << s->block_max_depth; | |
3301 | const int b_height= s->b_height << s->block_max_depth; | |
3302 | const int b_stride= b_width; | |
3303 | int color[3]; | |
3304 | ||
3305 | for(pass=0; pass<50; pass++){ | |
3306 | int change= 0; | |
3307 | ||
3308 | for(mb_y= 0; mb_y<b_height; mb_y++){ | |
3309 | for(mb_x= 0; mb_x<b_width; mb_x++){ | |
3310 | int dia_change, i, j; | |
3311 | int best_rd= INT_MAX; | |
3312 | BlockNode backup; | |
3313 | const int index= mb_x + mb_y * b_stride; | |
3314 | BlockNode *block= &s->block[index]; | |
c8a596d2 MN |
3315 | BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block; |
3316 | BlockNode *lb = mb_x ? &s->block[index -1] : &null_block; | |
3317 | BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : &null_block; | |
3318 | BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : &null_block; | |
3319 | BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block; | |
3320 | BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block; | |
3321 | BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : &null_block; | |
3322 | BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : &null_block; | |
1015631b LM |
3323 | const int b_w= (MB_SIZE >> s->block_max_depth); |
3324 | uint8_t obmc_edged[b_w*2][b_w*2]; | |
51d6a3cf MN |
3325 | |
3326 | if(pass && (block->type & BLOCK_OPT)) | |
3327 | continue; | |
3328 | block->type |= BLOCK_OPT; | |
3329 | ||
3330 | backup= *block; | |
3331 | ||
3332 | if(!s->me_cache_generation) | |
3333 | memset(s->me_cache, 0, sizeof(s->me_cache)); | |
3334 | s->me_cache_generation += 1<<22; | |
3335 | ||
1015631b LM |
3336 | //FIXME precalc |
3337 | { | |
3338 | int x, y; | |
3339 | memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); | |
3340 | if(mb_x==0) | |
3341 | for(y=0; y<b_w*2; y++) | |
3342 | memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); | |
3343 | if(mb_x==b_stride-1) | |
3344 | for(y=0; y<b_w*2; y++) | |
3345 | memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); | |
3346 | if(mb_y==0){ | |
3347 | for(x=0; x<b_w*2; x++) | |
3348 | obmc_edged[0][x] += obmc_edged[b_w-1][x]; | |
3349 | for(y=1; y<b_w; y++) | |
3350 | memcpy(obmc_edged[y], obmc_edged[0], b_w*2); | |
3351 | } | |
3352 | if(mb_y==b_height-1){ | |
3353 | for(x=0; x<b_w*2; x++) | |
3354 | obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; | |
3355 | for(y=b_w; y<b_w*2-1; y++) | |
3356 | memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); | |
3357 | } | |
3358 | } | |
3359 | ||
3360 | //skip stuff outside the picture | |
3361 | if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1) | |
3362 | { | |
3363 | uint8_t *src= s-> input_picture.data[0]; | |
3364 | uint8_t *dst= s->current_picture.data[0]; | |
3365 | const int stride= s->current_picture.linesize[0]; | |
3366 | const int block_w= MB_SIZE >> s->block_max_depth; | |
3367 | const int sx= block_w*mb_x - block_w/2; | |
3368 | const int sy= block_w*mb_y - block_w/2; | |
3369 | const int w= s->plane[0].width; | |
3370 | const int h= s->plane[0].height; | |
3371 | int y; | |
3372 | ||
3373 | for(y=sy; y<0; y++) | |
3374 | memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); | |
3375 | for(y=h; y<sy+block_w*2; y++) | |
3376 | memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); | |
3377 | if(sx<0){ | |
3378 | for(y=sy; y<sy+block_w*2; y++) | |
3379 | memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); | |
3380 | } | |
3381 | if(sx+block_w*2 > w){ | |
3382 | for(y=sy; y<sy+block_w*2; y++) | |
3383 | memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); | |
3384 | } | |
3385 | } | |
3386 | ||
3387 | // intra(black) = neighbors' contribution to the current block | |
3388 | for(i=0; i<3; i++) | |
3389 | color[i]= get_dc(s, mb_x, mb_y, i); | |
3390 | ||
51d6a3cf | 3391 | // get previous score (cant be cached due to OBMC) |
1015631b LM |
3392 | check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, *obmc_edged, &best_rd); |
3393 | check_block_inter(s, mb_x, mb_y, 0, 0, 0, *obmc_edged, &best_rd); | |
3394 | check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, *obmc_edged, &best_rd); | |
3395 | check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, *obmc_edged, &best_rd); | |
3396 | check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, *obmc_edged, &best_rd); | |
3397 | check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, *obmc_edged, &best_rd); | |
51d6a3cf MN |
3398 | |
3399 | /* fullpel ME */ | |
3400 | //FIXME avoid subpel interpol / round to nearest integer | |
3401 | do{ | |
3402 | dia_change=0; | |
3403 | for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ | |
3404 | for(j=0; j<i; j++){ | |
1015631b LM |
3405 | dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd); |
3406 | dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd); | |
3407 | dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd); | |
3408 | dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd); | |
51d6a3cf MN |
3409 | } |
3410 | } | |
3411 | }while(dia_change); | |
3412 | /* subpel ME */ | |
3413 | do{ | |
3414 | static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; | |
3415 | dia_change=0; | |
3416 | for(i=0; i<8; i++) | |
1015631b | 3417 | dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, *obmc_edged, &best_rd); |
51d6a3cf MN |
3418 | }while(dia_change); |
3419 | //FIXME or try the standard 2 pass qpel or similar | |
13705b69 | 3420 | #if 1 |
1015631b | 3421 | check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); |
51d6a3cf | 3422 | //FIXME RD style color selection |
13705b69 | 3423 | #endif |
51d6a3cf MN |
3424 | if(!same_block(block, &backup)){ |
3425 | if(tb != &null_block) tb ->type &= ~BLOCK_OPT; | |
3426 | if(lb != &null_block) lb ->type &= ~BLOCK_OPT; | |
3427 | if(rb != &null_block) rb ->type &= ~BLOCK_OPT; | |
3428 | if(bb != &null_block) bb ->type &= ~BLOCK_OPT; | |
3429 | if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT; | |
3430 | if(trb!= &null_block) trb->type &= ~BLOCK_OPT; | |
3431 | if(blb!= &null_block) blb->type &= ~BLOCK_OPT; | |
3432 | if(brb!= &null_block) brb->type &= ~BLOCK_OPT; | |
3433 | change ++; | |
3434 | } | |
3435 | } | |
3436 | } | |
3437 | av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); | |
3438 | if(!change) | |
3439 | break; | |
3440 | } | |
b104969f LM |
3441 | |
3442 | if(s->block_max_depth == 1){ | |
3443 | int change= 0; | |
3444 | for(mb_y= 0; mb_y<b_height; mb_y+=2){ | |
3445 | for(mb_x= 0; mb_x<b_width; mb_x+=2){ | |
3446 | int dia_change, i, j; | |
3447 | int best_rd, init_rd; | |
3448 | const int index= mb_x + mb_y * b_stride; | |
3449 | BlockNode *b[4]; | |
3450 | ||
3451 | b[0]= &s->block[index]; | |
3452 | b[1]= b[0]+1; | |
3453 | b[2]= b[0]+b_stride; | |
3454 | b[3]= b[2]+1; | |
3455 | if(same_block(b[0], b[1]) && | |
3456 | same_block(b[0], b[2]) && | |
3457 | same_block(b[0], b[3])) | |
3458 | continue; | |
3459 | ||
3460 | if(!s->me_cache_generation) | |
3461 | memset(s->me_cache, 0, sizeof(s->me_cache)); | |
3462 | s->me_cache_generation += 1<<22; | |
3463 | ||
3464 | init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3465 | ||
3466 | check_4block_inter(s, mb_x, mb_y, | |
3467 | (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, | |
3468 | (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, &best_rd); | |
3469 | ||
3470 | for(i=0; i<4; i++) | |
3471 | if(!(b[i]->type&BLOCK_INTRA)) | |
3472 | check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, &best_rd); | |
3473 | ||
3474 | if(init_rd != best_rd) | |
3475 | change++; | |
3476 | } | |
3477 | } | |
3478 | av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | |
3479 | } | |
51d6a3cf MN |
3480 | } |
3481 | ||
791e7b83 MN |
3482 | static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){ |
3483 | const int level= b->level; | |
3484 | const int w= b->width; | |
3485 | const int h= b->height; | |
c97de57c MN |
3486 | const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); |
3487 | const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | |
da66b631 | 3488 | int x,y, thres1, thres2; |
62ab0b78 | 3489 | // START_TIMER |
791e7b83 | 3490 | |
93fbdb5a | 3491 | if(s->qlog == LOSSLESS_QLOG) return; |
115329f1 | 3492 | |
791e7b83 | 3493 | bias= bias ? 0 : (3*qmul)>>3; |
da66b631 MN |
3494 | thres1= ((qmul - bias)>>QEXPSHIFT) - 1; |
3495 | thres2= 2*thres1; | |
115329f1 | 3496 | |
791e7b83 MN |
3497 | if(!bias){ |
3498 | for(y=0; y<h; y++){ | |
3499 | for(x=0; x<w; x++){ | |
da66b631 | 3500 | int i= src[x + y*stride]; |
115329f1 | 3501 | |
da66b631 MN |
3502 | if((unsigned)(i+thres1) > thres2){ |
3503 | if(i>=0){ | |
3504 | i<<= QEXPSHIFT; | |
3505 | i/= qmul; //FIXME optimize | |
3506 | src[x + y*stride]= i; | |
3507 | }else{ | |
3508 | i= -i; | |
3509 | i<<= QEXPSHIFT; | |
3510 | i/= qmul; //FIXME optimize | |
3511 | src[x + y*stride]= -i; | |
3512 | } | |
3513 | }else | |
3514 | src[x + y*stride]= 0; | |
791e7b83 MN |
3515 | } |
3516 | } | |
3517 | }else{ | |
3518 | for(y=0; y<h; y++){ | |
3519 | for(x=0; x<w; x++){ | |
115329f1 DB |
3520 | int i= src[x + y*stride]; |
3521 | ||
da66b631 MN |
3522 | if((unsigned)(i+thres1) > thres2){ |
3523 | if(i>=0){ | |
3524 | i<<= QEXPSHIFT; | |
3525 | i= (i + bias) / qmul; //FIXME optimize | |
3526 | src[x + y*stride]= i; | |
3527 | }else{ | |
3528 | i= -i; | |
3529 | i<<= QEXPSHIFT; | |
3530 | i= (i + bias) / qmul; / |