Commit | Line | Data |
---|---|---|
791e7b83 MN |
1 | /* |
2 | * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> | |
3 | * | |
4 | * This library is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU Lesser General Public | |
6 | * License as published by the Free Software Foundation; either | |
7 | * version 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * This library is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * Lesser General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU Lesser General Public | |
15 | * License along with this library; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 | */ | |
18 | ||
19 | #include "avcodec.h" | |
20 | #include "common.h" | |
21 | #include "dsputil.h" | |
28869757 MN |
22 | |
23 | #include "rangecoder.h" | |
24 | #define MID_STATE 128 | |
791e7b83 MN |
25 | |
26 | #include "mpegvideo.h" | |
27 | ||
28 | #undef NDEBUG | |
29 | #include <assert.h> | |
30 | ||
31 | #define MAX_DECOMPOSITIONS 8 | |
32 | #define MAX_PLANES 4 | |
33 | #define DWTELEM int | |
a0a74ad9 | 34 | #define QSHIFT 5 |
c97de57c | 35 | #define QROOT (1<<QSHIFT) |
93fbdb5a | 36 | #define LOSSLESS_QLOG -128 |
034aff03 | 37 | #define FRAC_BITS 8 |
791e7b83 MN |
38 | |
39 | static const int8_t quant3[256]={ | |
40 | 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
43 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
44 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
45 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
46 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
47 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
48 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
49 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
50 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
51 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
52 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
53 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
54 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
55 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, | |
56 | }; | |
57 | static const int8_t quant3b[256]={ | |
58 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
59 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
60 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
61 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
62 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
63 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
64 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
65 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
66 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
67 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
68 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
69 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
70 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
71 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
72 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
73 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
74 | }; | |
538a3841 MN |
75 | static const int8_t quant3bA[256]={ |
76 | 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
77 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
78 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
79 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
80 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
81 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
82 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
83 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
84 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
85 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
86 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
87 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
88 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
89 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
90 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
91 | 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
92 | }; | |
791e7b83 MN |
93 | static const int8_t quant5[256]={ |
94 | 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
95 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
96 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
97 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
98 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
99 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
100 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
101 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
102 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
103 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
104 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
105 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
106 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
107 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
108 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
109 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, | |
110 | }; | |
111 | static const int8_t quant7[256]={ | |
112 | 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
113 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
114 | 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, | |
115 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
116 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
117 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
118 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
119 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
120 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
121 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
122 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
123 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
124 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
125 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, | |
126 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
127 | -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, | |
128 | }; | |
129 | static const int8_t quant9[256]={ | |
130 | 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
131 | 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
132 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
133 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
134 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
135 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
136 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
137 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
138 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
139 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
140 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
141 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
142 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
143 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
144 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, | |
145 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, | |
146 | }; | |
147 | static const int8_t quant11[256]={ | |
148 | 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, | |
149 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
150 | 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
151 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
152 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
153 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
154 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
155 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
156 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
157 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
158 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
159 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
160 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
161 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, | |
162 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
163 | -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, | |
164 | }; | |
165 | static const int8_t quant13[256]={ | |
166 | 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |
167 | 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
168 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
169 | 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
170 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
171 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
172 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
173 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
174 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
175 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
176 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
177 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
178 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, | |
179 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
180 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
181 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, | |
182 | }; | |
183 | ||
715a97f0 MN |
184 | #define LOG2_OBMC_MAX 6 |
185 | #define OBMC_MAX (1<<(LOG2_OBMC_MAX)) | |
791e7b83 MN |
186 | #if 0 //64*cubic |
187 | static const uint8_t obmc32[1024]={ | |
188 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
189 | 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
190 | 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
191 | 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, | |
192 | 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0, | |
193 | 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0, | |
194 | 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0, | |
195 | 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0, | |
196 | 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0, | |
197 | 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0, | |
198 | 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0, | |
199 | 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0, | |
200 | 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0, | |
201 | 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0, | |
202 | 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0, | |
203 | 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0, | |
204 | 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0, | |
205 | 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0, | |
206 | 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0, | |
207 | 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0, | |
208 | 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0, | |
209 | 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0, | |
210 | 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0, | |
211 | 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0, | |
212 | 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0, | |
213 | 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0, | |
214 | 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0, | |
215 | 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0, | |
216 | 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, | |
217 | 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
218 | 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
219 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
220 | //error:0.000022 | |
221 | }; | |
222 | static const uint8_t obmc16[256]={ | |
223 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
224 | 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0, | |
225 | 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0, | |
226 | 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0, | |
227 | 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0, | |
228 | 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0, | |
229 | 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1, | |
230 | 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1, | |
231 | 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1, | |
232 | 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1, | |
233 | 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0, | |
234 | 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0, | |
235 | 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0, | |
236 | 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0, | |
237 | 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0, | |
238 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
239 | //error:0.000033 | |
240 | }; | |
241 | #elif 1 // 64*linear | |
242 | static const uint8_t obmc32[1024]={ | |
243 | 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, | |
244 | 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0, | |
245 | 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0, | |
246 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
247 | 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1, | |
248 | 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1, | |
249 | 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1, | |
250 | 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1, | |
251 | 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1, | |
252 | 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1, | |
253 | 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1, | |
254 | 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1, | |
255 | 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2, | |
256 | 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2, | |
257 | 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2, | |
258 | 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2, | |
259 | 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2, | |
260 | 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2, | |
261 | 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2, | |
262 | 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2, | |
263 | 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1, | |
264 | 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1, | |
265 | 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1, | |
266 | 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1, | |
267 | 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1, | |
268 | 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1, | |
269 | 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1, | |
270 | 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1, | |
271 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
272 | 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0, | |
273 | 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0, | |
274 | 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, | |
275 | //error:0.000020 | |
276 | }; | |
277 | static const uint8_t obmc16[256]={ | |
278 | 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0, | |
279 | 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1, | |
280 | 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1, | |
281 | 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2, | |
282 | 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2, | |
283 | 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3, | |
284 | 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3, | |
285 | 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4, | |
286 | 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4, | |
287 | 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3, | |
288 | 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3, | |
289 | 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2, | |
290 | 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2, | |
291 | 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1, | |
292 | 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1, | |
293 | 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0, | |
294 | //error:0.000015 | |
295 | }; | |
296 | #else //64*cos | |
297 | static const uint8_t obmc32[1024]={ | |
298 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
299 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
300 | 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
301 | 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0, | |
302 | 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, | |
303 | 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0, | |
304 | 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0, | |
305 | 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0, | |
306 | 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0, | |
307 | 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0, | |
308 | 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0, | |
309 | 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0, | |
310 | 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0, | |
311 | 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0, | |
312 | 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0, | |
313 | 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0, | |
314 | 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0, | |
315 | 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0, | |
316 | 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0, | |
317 | 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0, | |
318 | 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0, | |
319 | 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0, | |
320 | 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0, | |
321 | 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0, | |
322 | 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0, | |
323 | 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0, | |
324 | 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0, | |
325 | 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, | |
326 | 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0, | |
327 | 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
328 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
329 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
330 | //error:0.000022 | |
331 | }; | |
332 | static const uint8_t obmc16[256]={ | |
333 | 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
334 | 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0, | |
335 | 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0, | |
336 | 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0, | |
337 | 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0, | |
338 | 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1, | |
339 | 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1, | |
340 | 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0, | |
341 | 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0, | |
342 | 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1, | |
343 | 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1, | |
344 | 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0, | |
345 | 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0, | |
346 | 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0, | |
347 | 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0, | |
348 | 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
349 | //error:0.000022 | |
350 | }; | |
351 | #endif | |
352 | ||
155ec6ed MN |
353 | //linear *64 |
354 | static const uint8_t obmc8[64]={ | |
355 | 1, 3, 5, 7, 7, 5, 3, 1, | |
356 | 3, 9,15,21,21,15, 9, 3, | |
357 | 5,15,25,35,35,25,15, 5, | |
358 | 7,21,35,49,49,35,21, 7, | |
359 | 7,21,35,49,49,35,21, 7, | |
360 | 5,15,25,35,35,25,15, 5, | |
361 | 3, 9,15,21,21,15, 9, 3, | |
362 | 1, 3, 5, 7, 7, 5, 3, 1, | |
363 | //error:0.000000 | |
364 | }; | |
365 | ||
366 | //linear *64 | |
367 | static const uint8_t obmc4[16]={ | |
368 | 4,12,12, 4, | |
369 | 12,36,36,12, | |
370 | 12,36,36,12, | |
371 | 4,12,12, 4, | |
372 | //error:0.000000 | |
373 | }; | |
374 | ||
375 | static const uint8_t *obmc_tab[4]={ | |
376 | obmc32, obmc16, obmc8, obmc4 | |
377 | }; | |
378 | ||
379 | typedef struct BlockNode{ | |
380 | int16_t mx; | |
381 | int16_t my; | |
382 | uint8_t color[3]; | |
383 | uint8_t type; | |
384 | //#define TYPE_SPLIT 1 | |
385 | #define BLOCK_INTRA 1 | |
386 | //#define TYPE_NOCOLOR 4 | |
387 | uint8_t level; //FIXME merge into type? | |
388 | }BlockNode; | |
389 | ||
390 | #define LOG2_MB_SIZE 4 | |
391 | #define MB_SIZE (1<<LOG2_MB_SIZE) | |
392 | ||
a0d1931c Y |
393 | typedef struct x_and_coeff{ |
394 | int16_t x; | |
538a3841 | 395 | uint16_t coeff; |
a0d1931c Y |
396 | } x_and_coeff; |
397 | ||
791e7b83 MN |
398 | typedef struct SubBand{ |
399 | int level; | |
400 | int stride; | |
401 | int width; | |
402 | int height; | |
403 | int qlog; ///< log(qscale)/log[2^(1/6)] | |
404 | DWTELEM *buf; | |
a0d1931c Y |
405 | int buf_x_offset; |
406 | int buf_y_offset; | |
407 | int stride_line; ///< Stride measured in lines, not pixels. | |
408 | x_and_coeff * x_coeff; | |
791e7b83 MN |
409 | struct SubBand *parent; |
410 | uint8_t state[/*7*2*/ 7 + 512][32]; | |
411 | }SubBand; | |
412 | ||
413 | typedef struct Plane{ | |
414 | int width; | |
415 | int height; | |
416 | SubBand band[MAX_DECOMPOSITIONS][4]; | |
417 | }Plane; | |
418 | ||
a0d1931c Y |
419 | /** Used to minimize the amount of memory used in order to optimize cache performance. **/ |
420 | typedef struct { | |
421 | DWTELEM * * line; ///< For use by idwt and predict_slices. | |
422 | DWTELEM * * data_stack; ///< Used for internal purposes. | |
423 | int data_stack_top; | |
424 | int line_count; | |
425 | int line_width; | |
426 | int data_count; | |
427 | DWTELEM * base_buffer; ///< Buffer that this structure is caching. | |
428 | } slice_buffer; | |
429 | ||
791e7b83 MN |
430 | typedef struct SnowContext{ |
431 | // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) | |
432 | ||
433 | AVCodecContext *avctx; | |
28869757 | 434 | RangeCoder c; |
791e7b83 MN |
435 | DSPContext dsp; |
436 | AVFrame input_picture; | |
437 | AVFrame current_picture; | |
438 | AVFrame last_picture; | |
439 | AVFrame mconly_picture; | |
440 | // uint8_t q_context[16]; | |
441 | uint8_t header_state[32]; | |
155ec6ed | 442 | uint8_t block_state[128 + 32*128]; |
791e7b83 | 443 | int keyframe; |
19aa028d | 444 | int always_reset; |
791e7b83 MN |
445 | int version; |
446 | int spatial_decomposition_type; | |
447 | int temporal_decomposition_type; | |
448 | int spatial_decomposition_count; | |
449 | int temporal_decomposition_count; | |
450 | DWTELEM *spatial_dwt_buffer; | |
791e7b83 MN |
451 | int colorspace_type; |
452 | int chroma_h_shift; | |
453 | int chroma_v_shift; | |
454 | int spatial_scalability; | |
455 | int qlog; | |
155ec6ed MN |
456 | int lambda; |
457 | int lambda2; | |
791e7b83 MN |
458 | int mv_scale; |
459 | int qbias; | |
460 | #define QBIAS_SHIFT 3 | |
155ec6ed MN |
461 | int b_width; |
462 | int b_height; | |
463 | int block_max_depth; | |
791e7b83 | 464 | Plane plane[MAX_PLANES]; |
155ec6ed | 465 | BlockNode *block; |
a0d1931c | 466 | slice_buffer sb; |
155ec6ed | 467 | |
791e7b83 MN |
468 | MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) |
469 | }SnowContext; | |
470 | ||
f9e6ebf7 LM |
471 | typedef struct { |
472 | DWTELEM *b0; | |
473 | DWTELEM *b1; | |
474 | DWTELEM *b2; | |
475 | DWTELEM *b3; | |
476 | int y; | |
477 | } dwt_compose_t; | |
478 | ||
a0d1931c Y |
479 | #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
480 | //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) | |
481 | ||
482 | static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer) | |
483 | { | |
484 | int i; | |
485 | ||
486 | buf->base_buffer = base_buffer; | |
487 | buf->line_count = line_count; | |
488 | buf->line_width = line_width; | |
489 | buf->data_count = max_allocated_lines; | |
490 | buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count); | |
491 | buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines); | |
492 | ||
493 | for (i = 0; i < max_allocated_lines; i++) | |
494 | { | |
495 | buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width); | |
496 | } | |
497 | ||
498 | buf->data_stack_top = max_allocated_lines - 1; | |
499 | } | |
500 | ||
501 | static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) | |
502 | { | |
503 | int i; | |
504 | int offset; | |
505 | DWTELEM * buffer; | |
506 | ||
507 | // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); | |
508 | ||
509 | assert(buf->data_stack_top >= 0); | |
510 | // assert(!buf->line[line]); | |
511 | if (buf->line[line]) | |
512 | return buf->line[line]; | |
513 | ||
514 | offset = buf->line_width * line; | |
515 | buffer = buf->data_stack[buf->data_stack_top]; | |
516 | buf->data_stack_top--; | |
517 | buf->line[line] = buffer; | |
518 | ||
519 | // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1); | |
520 | ||
521 | return buffer; | |
522 | } | |
523 | ||
524 | static void slice_buffer_release(slice_buffer * buf, int line) | |
525 | { | |
526 | int i; | |
527 | int offset; | |
528 | DWTELEM * buffer; | |
529 | ||
530 | assert(line >= 0 && line < buf->line_count); | |
531 | assert(buf->line[line]); | |
532 | ||
533 | offset = buf->line_width * line; | |
534 | buffer = buf->line[line]; | |
535 | buf->data_stack_top++; | |
536 | buf->data_stack[buf->data_stack_top] = buffer; | |
537 | buf->line[line] = NULL; | |
538 | ||
539 | // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1); | |
540 | } | |
541 | ||
542 | static void slice_buffer_flush(slice_buffer * buf) | |
543 | { | |
544 | int i; | |
545 | for (i = 0; i < buf->line_count; i++) | |
546 | { | |
547 | if (buf->line[i]) | |
548 | { | |
549 | // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i); | |
550 | slice_buffer_release(buf, i); | |
551 | } | |
552 | } | |
553 | } | |
554 | ||
555 | static void slice_buffer_destroy(slice_buffer * buf) | |
556 | { | |
557 | int i; | |
558 | slice_buffer_flush(buf); | |
559 | ||
560 | for (i = buf->data_count - 1; i >= 0; i--) | |
561 | { | |
562 | assert(buf->data_stack[i]); | |
563 | av_free(buf->data_stack[i]); | |
564 | } | |
565 | assert(buf->data_stack); | |
566 | av_free(buf->data_stack); | |
567 | assert(buf->line); | |
568 | av_free(buf->line); | |
569 | } | |
570 | ||
2554db9b MN |
571 | #ifdef __sgi |
572 | // Avoid a name clash on SGI IRIX | |
573 | #undef qexp | |
574 | #endif | |
034aff03 | 575 | #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 |
c97de57c | 576 | static uint8_t qexp[QROOT]; |
791e7b83 MN |
577 | |
578 | static inline int mirror(int v, int m){ | |
579 | if (v<0) return -v; | |
580 | else if(v>m) return 2*m-v; | |
581 | else return v; | |
582 | } | |
583 | ||
28869757 | 584 | static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ |
791e7b83 MN |
585 | int i; |
586 | ||
587 | if(v){ | |
588 | const int a= ABS(v); | |
589 | const int e= av_log2(a); | |
590 | #if 1 | |
591 | const int el= FFMIN(e, 10); | |
28869757 | 592 | put_rac(c, state+0, 0); |
791e7b83 MN |
593 | |
594 | for(i=0; i<el; i++){ | |
28869757 | 595 | put_rac(c, state+1+i, 1); //1..10 |
791e7b83 MN |
596 | } |
597 | for(; i<e; i++){ | |
28869757 | 598 | put_rac(c, state+1+9, 1); //1..10 |
791e7b83 | 599 | } |
28869757 | 600 | put_rac(c, state+1+FFMIN(i,9), 0); |
791e7b83 MN |
601 | |
602 | for(i=e-1; i>=el; i--){ | |
28869757 | 603 | put_rac(c, state+22+9, (a>>i)&1); //22..31 |
791e7b83 MN |
604 | } |
605 | for(; i>=0; i--){ | |
28869757 | 606 | put_rac(c, state+22+i, (a>>i)&1); //22..31 |
791e7b83 MN |
607 | } |
608 | ||
609 | if(is_signed) | |
28869757 | 610 | put_rac(c, state+11 + el, v < 0); //11..21 |
791e7b83 MN |
611 | #else |
612 | ||
28869757 | 613 | put_rac(c, state+0, 0); |
791e7b83 MN |
614 | if(e<=9){ |
615 | for(i=0; i<e; i++){ | |
28869757 | 616 | put_rac(c, state+1+i, 1); //1..10 |
791e7b83 | 617 | } |
28869757 | 618 | put_rac(c, state+1+i, 0); |
791e7b83 MN |
619 | |
620 | for(i=e-1; i>=0; i--){ | |
28869757 | 621 | put_rac(c, state+22+i, (a>>i)&1); //22..31 |
791e7b83 MN |
622 | } |
623 | ||
624 | if(is_signed) | |
28869757 | 625 | put_rac(c, state+11 + e, v < 0); //11..21 |
791e7b83 MN |
626 | }else{ |
627 | for(i=0; i<e; i++){ | |
28869757 | 628 | put_rac(c, state+1+FFMIN(i,9), 1); //1..10 |
791e7b83 | 629 | } |
28869757 | 630 | put_rac(c, state+1+FFMIN(i,9), 0); |
791e7b83 MN |
631 | |
632 | for(i=e-1; i>=0; i--){ | |
28869757 | 633 | put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 |
791e7b83 MN |
634 | } |
635 | ||
636 | if(is_signed) | |
28869757 | 637 | put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 |
791e7b83 MN |
638 | } |
639 | #endif | |
640 | }else{ | |
28869757 | 641 | put_rac(c, state+0, 1); |
791e7b83 MN |
642 | } |
643 | } | |
644 | ||
28869757 MN |
645 | static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ |
646 | if(get_rac(c, state+0)) | |
791e7b83 MN |
647 | return 0; |
648 | else{ | |
7c2425d2 LM |
649 | int i, e, a; |
650 | e= 0; | |
28869757 | 651 | while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 |
7c2425d2 | 652 | e++; |
791e7b83 | 653 | } |
7c2425d2 | 654 | |
791e7b83 | 655 | a= 1; |
7c2425d2 | 656 | for(i=e-1; i>=0; i--){ |
28869757 | 657 | a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 |
791e7b83 MN |
658 | } |
659 | ||
28869757 | 660 | if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 |
791e7b83 MN |
661 | return -a; |
662 | else | |
663 | return a; | |
664 | } | |
665 | } | |
666 | ||
28869757 | 667 | static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ |
4f4e9633 | 668 | int i; |
0635cbfc | 669 | int r= log2>=0 ? 1<<log2 : 1; |
4f4e9633 MN |
670 | |
671 | assert(v>=0); | |
0635cbfc MN |
672 | assert(log2>=-4); |
673 | ||
674 | while(v >= r){ | |
28869757 | 675 | put_rac(c, state+4+log2, 1); |
0635cbfc | 676 | v -= r; |
4f4e9633 | 677 | log2++; |
0635cbfc | 678 | if(log2>0) r+=r; |
4f4e9633 | 679 | } |
28869757 | 680 | put_rac(c, state+4+log2, 0); |
4f4e9633 MN |
681 | |
682 | for(i=log2-1; i>=0; i--){ | |
28869757 | 683 | put_rac(c, state+31-i, (v>>i)&1); |
4f4e9633 | 684 | } |
4f4e9633 MN |
685 | } |
686 | ||
28869757 | 687 | static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ |
4f4e9633 | 688 | int i; |
0635cbfc | 689 | int r= log2>=0 ? 1<<log2 : 1; |
4f4e9633 MN |
690 | int v=0; |
691 | ||
0635cbfc MN |
692 | assert(log2>=-4); |
693 | ||
28869757 | 694 | while(get_rac(c, state+4+log2)){ |
0635cbfc | 695 | v+= r; |
4f4e9633 | 696 | log2++; |
0635cbfc | 697 | if(log2>0) r+=r; |
4f4e9633 MN |
698 | } |
699 | ||
700 | for(i=log2-1; i>=0; i--){ | |
28869757 | 701 | v+= get_rac(c, state+31-i)<<i; |
4f4e9633 MN |
702 | } |
703 | ||
704 | return v; | |
705 | } | |
706 | ||
791e7b83 MN |
707 | static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ |
708 | const int mirror_left= !highpass; | |
709 | const int mirror_right= (width&1) ^ highpass; | |
710 | const int w= (width>>1) - 1 + (highpass & width); | |
711 | int i; | |
712 | ||
713 | #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) | |
714 | if(mirror_left){ | |
715 | dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); | |
716 | dst += dst_step; | |
717 | src += src_step; | |
718 | } | |
719 | ||
720 | for(i=0; i<w; i++){ | |
721 | dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse); | |
722 | } | |
723 | ||
724 | if(mirror_right){ | |
725 | dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse); | |
726 | } | |
727 | } | |
728 | ||
729 | static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ | |
730 | const int mirror_left= !highpass; | |
731 | const int mirror_right= (width&1) ^ highpass; | |
732 | const int w= (width>>1) - 1 + (highpass & width); | |
733 | int i; | |
734 | ||
735 | if(mirror_left){ | |
736 | int r= 3*2*ref[0]; | |
737 | r += r>>4; | |
738 | r += r>>8; | |
739 | dst[0] = LIFT(src[0], ((r+add)>>shift), inverse); | |
740 | dst += dst_step; | |
741 | src += src_step; | |
742 | } | |
743 | ||
744 | for(i=0; i<w; i++){ | |
745 | int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]); | |
746 | r += r>>4; | |
747 | r += r>>8; | |
748 | dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse); | |
749 | } | |
750 | ||
751 | if(mirror_right){ | |
752 | int r= 3*2*ref[w*ref_step]; | |
753 | r += r>>4; | |
754 | r += r>>8; | |
755 | dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse); | |
756 | } | |
757 | } | |
758 | ||
759 | ||
aa25a462 | 760 | static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){ |
791e7b83 MN |
761 | int x, i; |
762 | ||
763 | for(x=start; x<width; x+=2){ | |
764 | int64_t sum=0; | |
765 | ||
766 | for(i=0; i<n; i++){ | |
767 | int x2= x + 2*i - n + 1; | |
768 | if (x2< 0) x2= -x2; | |
769 | else if(x2>=width) x2= 2*width-x2-2; | |
770 | sum += coeffs[i]*(int64_t)dst[x2]; | |
771 | } | |
772 | if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift; | |
773 | else dst[x] += (sum + (1<<shift)/2)>>shift; | |
774 | } | |
775 | } | |
776 | ||
aa25a462 | 777 | static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){ |
791e7b83 MN |
778 | int x, y, i; |
779 | for(y=start; y<height; y+=2){ | |
780 | for(x=0; x<width; x++){ | |
781 | int64_t sum=0; | |
782 | ||
783 | for(i=0; i<n; i++){ | |
784 | int y2= y + 2*i - n + 1; | |
785 | if (y2< 0) y2= -y2; | |
786 | else if(y2>=height) y2= 2*height-y2-2; | |
787 | sum += coeffs[i]*(int64_t)dst[x + y2*stride]; | |
788 | } | |
789 | if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift; | |
790 | else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift; | |
791 | } | |
792 | } | |
793 | } | |
794 | ||
795 | #define SCALEX 1 | |
796 | #define LX0 0 | |
797 | #define LX1 1 | |
798 | ||
de890c9b | 799 | #if 0 // more accurate 9/7 |
791e7b83 MN |
800 | #define N1 2 |
801 | #define SHIFT1 14 | |
802 | #define COEFFS1 (int[]){-25987,-25987} | |
803 | #define N2 2 | |
804 | #define SHIFT2 19 | |
805 | #define COEFFS2 (int[]){-27777,-27777} | |
806 | #define N3 2 | |
807 | #define SHIFT3 15 | |
808 | #define COEFFS3 (int[]){28931,28931} | |
809 | #define N4 2 | |
810 | #define SHIFT4 15 | |
811 | #define COEFFS4 (int[]){14533,14533} | |
812 | #elif 1 // 13/7 CRF | |
813 | #define N1 4 | |
814 | #define SHIFT1 4 | |
815 | #define COEFFS1 (int[]){1,-9,-9,1} | |
de890c9b | 816 | #define N2 4 |
791e7b83 MN |
817 | #define SHIFT2 4 |
818 | #define COEFFS2 (int[]){-1,5,5,-1} | |
819 | #define N3 0 | |
820 | #define SHIFT3 1 | |
821 | #define COEFFS3 NULL | |
822 | #define N4 0 | |
823 | #define SHIFT4 1 | |
824 | #define COEFFS4 NULL | |
825 | #elif 1 // 3/5 | |
826 | #define LX0 1 | |
827 | #define LX1 0 | |
828 | #define SCALEX 0.5 | |
829 | #define N1 2 | |
830 | #define SHIFT1 1 | |
831 | #define COEFFS1 (int[]){1,1} | |
832 | #define N2 2 | |
833 | #define SHIFT2 2 | |
834 | #define COEFFS2 (int[]){-1,-1} | |
835 | #define N3 0 | |
836 | #define SHIFT3 0 | |
837 | #define COEFFS3 NULL | |
838 | #define N4 0 | |
839 | #define SHIFT4 0 | |
840 | #define COEFFS4 NULL | |
841 | #elif 1 // 11/5 | |
842 | #define N1 0 | |
843 | #define SHIFT1 1 | |
844 | #define COEFFS1 NULL | |
845 | #define N2 2 | |
846 | #define SHIFT2 2 | |
847 | #define COEFFS2 (int[]){-1,-1} | |
848 | #define N3 2 | |
849 | #define SHIFT3 0 | |
850 | #define COEFFS3 (int[]){-1,-1} | |
851 | #define N4 4 | |
852 | #define SHIFT4 7 | |
853 | #define COEFFS4 (int[]){-5,29,29,-5} | |
854 | #define SCALEX 4 | |
855 | #elif 1 // 9/7 CDF | |
856 | #define N1 2 | |
857 | #define SHIFT1 7 | |
858 | #define COEFFS1 (int[]){-203,-203} | |
859 | #define N2 2 | |
860 | #define SHIFT2 12 | |
861 | #define COEFFS2 (int[]){-217,-217} | |
862 | #define N3 2 | |
863 | #define SHIFT3 7 | |
864 | #define COEFFS3 (int[]){113,113} | |
865 | #define N4 2 | |
866 | #define SHIFT4 9 | |
867 | #define COEFFS4 (int[]){227,227} | |
868 | #define SCALEX 1 | |
869 | #elif 1 // 7/5 CDF | |
870 | #define N1 0 | |
871 | #define SHIFT1 1 | |
872 | #define COEFFS1 NULL | |
873 | #define N2 2 | |
874 | #define SHIFT2 2 | |
875 | #define COEFFS2 (int[]){-1,-1} | |
876 | #define N3 2 | |
877 | #define SHIFT3 0 | |
878 | #define COEFFS3 (int[]){-1,-1} | |
879 | #define N4 2 | |
880 | #define SHIFT4 4 | |
881 | #define COEFFS4 (int[]){3,3} | |
882 | #elif 1 // 9/7 MN | |
883 | #define N1 4 | |
884 | #define SHIFT1 4 | |
885 | #define COEFFS1 (int[]){1,-9,-9,1} | |
886 | #define N2 2 | |
887 | #define SHIFT2 2 | |
888 | #define COEFFS2 (int[]){1,1} | |
889 | #define N3 0 | |
890 | #define SHIFT3 1 | |
891 | #define COEFFS3 NULL | |
892 | #define N4 0 | |
893 | #define SHIFT4 1 | |
894 | #define COEFFS4 NULL | |
895 | #else // 13/7 CRF | |
896 | #define N1 4 | |
897 | #define SHIFT1 4 | |
898 | #define COEFFS1 (int[]){1,-9,-9,1} | |
899 | #define N2 4 | |
900 | #define SHIFT2 4 | |
901 | #define COEFFS2 (int[]){-1,5,5,-1} | |
902 | #define N3 0 | |
903 | #define SHIFT3 1 | |
904 | #define COEFFS3 NULL | |
905 | #define N4 0 | |
906 | #define SHIFT4 1 | |
907 | #define COEFFS4 NULL | |
908 | #endif | |
aa25a462 RFI |
909 | static void horizontal_decomposeX(DWTELEM *b, int width){ |
910 | DWTELEM temp[width]; | |
791e7b83 MN |
911 | const int width2= width>>1; |
912 | const int w2= (width+1)>>1; | |
913 | int A1,A2,A3,A4, x; | |
914 | ||
915 | inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0); | |
916 | inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0); | |
917 | inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0); | |
918 | inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0); | |
919 | ||
920 | for(x=0; x<width2; x++){ | |
921 | temp[x ]= b[2*x ]; | |
922 | temp[x+w2]= b[2*x + 1]; | |
923 | } | |
924 | if(width&1) | |
925 | temp[x ]= b[2*x ]; | |
926 | memcpy(b, temp, width*sizeof(int)); | |
927 | } | |
928 | ||
aa25a462 RFI |
929 | static void horizontal_composeX(DWTELEM *b, int width){ |
930 | DWTELEM temp[width]; | |
791e7b83 MN |
931 | const int width2= width>>1; |
932 | int A1,A2,A3,A4, x; | |
933 | const int w2= (width+1)>>1; | |
934 | ||
935 | memcpy(temp, b, width*sizeof(int)); | |
936 | for(x=0; x<width2; x++){ | |
937 | b[2*x ]= temp[x ]; | |
938 | b[2*x + 1]= temp[x+w2]; | |
939 | } | |
940 | if(width&1) | |
941 | b[2*x ]= temp[x ]; | |
942 | ||
943 | inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1); | |
944 | inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1); | |
945 | inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1); | |
946 | inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1); | |
947 | } | |
948 | ||
aa25a462 | 949 | static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){ |
791e7b83 MN |
950 | int x, y; |
951 | ||
952 | for(y=0; y<height; y++){ | |
953 | for(x=0; x<width; x++){ | |
954 | buffer[y*stride + x] *= SCALEX; | |
955 | } | |
956 | } | |
957 | ||
958 | for(y=0; y<height; y++){ | |
959 | horizontal_decomposeX(buffer + y*stride, width); | |
960 | } | |
961 | ||
962 | inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0); | |
963 | inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0); | |
964 | inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0); | |
965 | inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0); | |
966 | } | |
967 | ||
aa25a462 | 968 | static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){ |
791e7b83 MN |
969 | int x, y; |
970 | ||
971 | inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1); | |
972 | inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1); | |
973 | inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1); | |
974 | inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1); | |
975 | ||
976 | for(y=0; y<height; y++){ | |
977 | horizontal_composeX(buffer + y*stride, width); | |
978 | } | |
979 | ||
980 | for(y=0; y<height; y++){ | |
981 | for(x=0; x<width; x++){ | |
982 | buffer[y*stride + x] /= SCALEX; | |
983 | } | |
984 | } | |
985 | } | |
986 | ||
aa25a462 RFI |
987 | static void horizontal_decompose53i(DWTELEM *b, int width){ |
988 | DWTELEM temp[width]; | |
791e7b83 MN |
989 | const int width2= width>>1; |
990 | int A1,A2,A3,A4, x; | |
991 | const int w2= (width+1)>>1; | |
992 | ||
993 | for(x=0; x<width2; x++){ | |
994 | temp[x ]= b[2*x ]; | |
995 | temp[x+w2]= b[2*x + 1]; | |
996 | } | |
997 | if(width&1) | |
998 | temp[x ]= b[2*x ]; | |
999 | #if 0 | |
1000 | A2= temp[1 ]; | |
1001 | A4= temp[0 ]; | |
1002 | A1= temp[0+width2]; | |
1003 | A1 -= (A2 + A4)>>1; | |
1004 | A4 += (A1 + 1)>>1; | |
1005 | b[0+width2] = A1; | |
1006 | b[0 ] = A4; | |
1007 | for(x=1; x+1<width2; x+=2){ | |
1008 | A3= temp[x+width2]; | |
1009 | A4= temp[x+1 ]; | |
1010 | A3 -= (A2 + A4)>>1; | |
1011 | A2 += (A1 + A3 + 2)>>2; | |
1012 | b[x+width2] = A3; | |
1013 | b[x ] = A2; | |
1014 | ||
1015 | A1= temp[x+1+width2]; | |
1016 | A2= temp[x+2 ]; | |
1017 | A1 -= (A2 + A4)>>1; | |
1018 | A4 += (A1 + A3 + 2)>>2; | |
1019 | b[x+1+width2] = A1; | |
1020 | b[x+1 ] = A4; | |
1021 | } | |
1022 | A3= temp[width-1]; | |
1023 | A3 -= A2; | |
1024 | A2 += (A1 + A3 + 2)>>2; | |
1025 | b[width -1] = A3; | |
1026 | b[width2-1] = A2; | |
1027 | #else | |
1028 | lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); | |
1029 | lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); | |
1030 | #endif | |
1031 | } | |
1032 | ||
aa25a462 | 1033 | static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1034 | int i; |
1035 | ||
1036 | for(i=0; i<width; i++){ | |
1037 | b1[i] -= (b0[i] + b2[i])>>1; | |
1038 | } | |
1039 | } | |
1040 | ||
aa25a462 | 1041 | static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1042 | int i; |
1043 | ||
1044 | for(i=0; i<width; i++){ | |
1045 | b1[i] += (b0[i] + b2[i] + 2)>>2; | |
1046 | } | |
1047 | } | |
1048 | ||
aa25a462 | 1049 | static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ |
39c61bbb | 1050 | int y; |
791e7b83 MN |
1051 | DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; |
1052 | DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; | |
1053 | ||
1054 | for(y=-2; y<height; y+=2){ | |
1055 | DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
1056 | DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
1057 | ||
1058 | {START_TIMER | |
1059 | if(b1 <= b3) horizontal_decompose53i(b2, width); | |
1060 | if(y+2 < height) horizontal_decompose53i(b3, width); | |
1061 | STOP_TIMER("horizontal_decompose53i")} | |
1062 | ||
1063 | {START_TIMER | |
1064 | if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width); | |
1065 | if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width); | |
1066 | STOP_TIMER("vertical_decompose53i*")} | |
1067 | ||
1068 | b0=b2; | |
1069 | b1=b3; | |
1070 | } | |
1071 | } | |
1072 | ||
1073 | #define lift5 lift | |
1074 | #if 1 | |
1075 | #define W_AM 3 | |
1076 | #define W_AO 0 | |
1077 | #define W_AS 1 | |
1078 | ||
1079 | #define W_BM 1 | |
1080 | #define W_BO 8 | |
1081 | #define W_BS 4 | |
1082 | ||
1083 | #undef lift5 | |
1084 | #define W_CM 9999 | |
1085 | #define W_CO 2 | |
1086 | #define W_CS 2 | |
1087 | ||
1088 | #define W_DM 15 | |
1089 | #define W_DO 16 | |
1090 | #define W_DS 5 | |
1091 | #elif 0 | |
1092 | #define W_AM 55 | |
1093 | #define W_AO 16 | |
1094 | #define W_AS 5 | |
1095 | ||
1096 | #define W_BM 3 | |
1097 | #define W_BO 32 | |
1098 | #define W_BS 6 | |
1099 | ||
1100 | #define W_CM 127 | |
1101 | #define W_CO 64 | |
1102 | #define W_CS 7 | |
1103 | ||
1104 | #define W_DM 7 | |
1105 | #define W_DO 8 | |
1106 | #define W_DS 4 | |
1107 | #elif 0 | |
1108 | #define W_AM 97 | |
1109 | #define W_AO 32 | |
1110 | #define W_AS 6 | |
1111 | ||
1112 | #define W_BM 63 | |
1113 | #define W_BO 512 | |
1114 | #define W_BS 10 | |
1115 | ||
1116 | #define W_CM 13 | |
1117 | #define W_CO 8 | |
1118 | #define W_CS 4 | |
1119 | ||
1120 | #define W_DM 15 | |
1121 | #define W_DO 16 | |
1122 | #define W_DS 5 | |
1123 | ||
1124 | #else | |
1125 | ||
1126 | #define W_AM 203 | |
1127 | #define W_AO 64 | |
1128 | #define W_AS 7 | |
1129 | ||
1130 | #define W_BM 217 | |
1131 | #define W_BO 2048 | |
1132 | #define W_BS 12 | |
1133 | ||
1134 | #define W_CM 113 | |
1135 | #define W_CO 64 | |
1136 | #define W_CS 7 | |
1137 | ||
1138 | #define W_DM 227 | |
1139 | #define W_DO 128 | |
1140 | #define W_DS 9 | |
1141 | #endif | |
aa25a462 RFI |
1142 | static void horizontal_decompose97i(DWTELEM *b, int width){ |
1143 | DWTELEM temp[width]; | |
791e7b83 MN |
1144 | const int w2= (width+1)>>1; |
1145 | ||
1146 | lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0); | |
1147 | lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0); | |
1148 | lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); | |
1149 | lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); | |
1150 | } | |
1151 | ||
1152 | ||
aa25a462 | 1153 | static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1154 | int i; |
1155 | ||
1156 | for(i=0; i<width; i++){ | |
1157 | b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1158 | } | |
1159 | } | |
1160 | ||
aa25a462 | 1161 | static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1162 | int i; |
1163 | ||
1164 | for(i=0; i<width; i++){ | |
1165 | #ifdef lift5 | |
1166 | b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
1167 | #else | |
1168 | int r= 3*(b0[i] + b2[i]); | |
1169 | r+= r>>4; | |
1170 | r+= r>>8; | |
1171 | b1[i] += (r+W_CO)>>W_CS; | |
1172 | #endif | |
1173 | } | |
1174 | } | |
1175 | ||
aa25a462 | 1176 | static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1177 | int i; |
1178 | ||
1179 | for(i=0; i<width; i++){ | |
1180 | b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; | |
1181 | } | |
1182 | } | |
1183 | ||
aa25a462 | 1184 | static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1185 | int i; |
1186 | ||
1187 | for(i=0; i<width; i++){ | |
1188 | b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
1189 | } | |
1190 | } | |
1191 | ||
aa25a462 | 1192 | static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ |
39c61bbb | 1193 | int y; |
791e7b83 MN |
1194 | DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; |
1195 | DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; | |
1196 | DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; | |
1197 | DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; | |
1198 | ||
1199 | for(y=-4; y<height; y+=2){ | |
1200 | DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
1201 | DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
1202 | ||
1203 | {START_TIMER | |
1204 | if(b3 <= b5) horizontal_decompose97i(b4, width); | |
1205 | if(y+4 < height) horizontal_decompose97i(b5, width); | |
1206 | if(width>400){ | |
1207 | STOP_TIMER("horizontal_decompose97i") | |
1208 | }} | |
1209 | ||
1210 | {START_TIMER | |
1211 | if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width); | |
1212 | if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width); | |
1213 | if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width); | |
1214 | if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width); | |
1215 | ||
1216 | if(width>400){ | |
1217 | STOP_TIMER("vertical_decompose97i") | |
1218 | }} | |
1219 | ||
1220 | b0=b2; | |
1221 | b1=b3; | |
1222 | b2=b4; | |
1223 | b3=b5; | |
1224 | } | |
1225 | } | |
1226 | ||
aa25a462 | 1227 | void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
791e7b83 MN |
1228 | int level; |
1229 | ||
46c281e8 MN |
1230 | for(level=0; level<decomposition_count; level++){ |
1231 | switch(type){ | |
791e7b83 MN |
1232 | case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; |
1233 | case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; | |
1234 | case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break; | |
1235 | } | |
1236 | } | |
1237 | } | |
1238 | ||
aa25a462 RFI |
1239 | static void horizontal_compose53i(DWTELEM *b, int width){ |
1240 | DWTELEM temp[width]; | |
791e7b83 MN |
1241 | const int width2= width>>1; |
1242 | const int w2= (width+1)>>1; | |
1243 | int A1,A2,A3,A4, x; | |
1244 | ||
1245 | #if 0 | |
1246 | A2= temp[1 ]; | |
1247 | A4= temp[0 ]; | |
1248 | A1= temp[0+width2]; | |
1249 | A1 -= (A2 + A4)>>1; | |
1250 | A4 += (A1 + 1)>>1; | |
1251 | b[0+width2] = A1; | |
1252 | b[0 ] = A4; | |
1253 | for(x=1; x+1<width2; x+=2){ | |
1254 | A3= temp[x+width2]; | |
1255 | A4= temp[x+1 ]; | |
1256 | A3 -= (A2 + A4)>>1; | |
1257 | A2 += (A1 + A3 + 2)>>2; | |
1258 | b[x+width2] = A3; | |
1259 | b[x ] = A2; | |
1260 | ||
1261 | A1= temp[x+1+width2]; | |
1262 | A2= temp[x+2 ]; | |
1263 | A1 -= (A2 + A4)>>1; | |
1264 | A4 += (A1 + A3 + 2)>>2; | |
1265 | b[x+1+width2] = A1; | |
1266 | b[x+1 ] = A4; | |
1267 | } | |
1268 | A3= temp[width-1]; | |
1269 | A3 -= A2; | |
1270 | A2 += (A1 + A3 + 2)>>2; | |
1271 | b[width -1] = A3; | |
1272 | b[width2-1] = A2; | |
1273 | #else | |
1274 | lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); | |
1275 | lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); | |
1276 | #endif | |
1277 | for(x=0; x<width2; x++){ | |
1278 | b[2*x ]= temp[x ]; | |
1279 | b[2*x + 1]= temp[x+w2]; | |
1280 | } | |
1281 | if(width&1) | |
1282 | b[2*x ]= temp[x ]; | |
1283 | } | |
1284 | ||
aa25a462 | 1285 | static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1286 | int i; |
1287 | ||
1288 | for(i=0; i<width; i++){ | |
1289 | b1[i] += (b0[i] + b2[i])>>1; | |
1290 | } | |
1291 | } | |
1292 | ||
aa25a462 | 1293 | static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1294 | int i; |
1295 | ||
1296 | for(i=0; i<width; i++){ | |
1297 | b1[i] -= (b0[i] + b2[i] + 2)>>2; | |
1298 | } | |
1299 | } | |
1300 | ||
a0d1931c Y |
1301 | static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
1302 | cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); | |
1303 | cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); | |
1304 | cs->y = -1; | |
1305 | } | |
1306 | ||
f9e6ebf7 LM |
1307 | static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ |
1308 | cs->b0 = buffer + mirror(-1-1, height-1)*stride; | |
1309 | cs->b1 = buffer + mirror(-1 , height-1)*stride; | |
1310 | cs->y = -1; | |
1311 | } | |
1312 | ||
a0d1931c Y |
1313 | static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
1314 | int y= cs->y; | |
1315 | int mirror0 = mirror(y-1, height-1); | |
1316 | int mirror1 = mirror(y , height-1); | |
1317 | int mirror2 = mirror(y+1, height-1); | |
1318 | int mirror3 = mirror(y+2, height-1); | |
1319 | ||
1320 | DWTELEM *b0= cs->b0; | |
1321 | DWTELEM *b1= cs->b1; | |
1322 | DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line); | |
1323 | DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line); | |
1324 | ||
1325 | {START_TIMER | |
1326 | if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width); | |
1327 | if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width); | |
1328 | STOP_TIMER("vertical_compose53i*")} | |
1329 | ||
1330 | {START_TIMER | |
1331 | if(y-1 >= 0) horizontal_compose53i(b0, width); | |
1332 | if(mirror0 <= mirror2) horizontal_compose53i(b1, width); | |
1333 | STOP_TIMER("horizontal_compose53i")} | |
1334 | ||
1335 | cs->b0 = b2; | |
1336 | cs->b1 = b3; | |
1337 | cs->y += 2; | |
1338 | } | |
1339 | ||
f9e6ebf7 LM |
1340 | static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ |
1341 | int y= cs->y; | |
1342 | DWTELEM *b0= cs->b0; | |
1343 | DWTELEM *b1= cs->b1; | |
1344 | DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
1345 | DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
791e7b83 MN |
1346 | |
1347 | {START_TIMER | |
1348 | if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width); | |
1349 | if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width); | |
1350 | STOP_TIMER("vertical_compose53i*")} | |
1351 | ||
1352 | {START_TIMER | |
1353 | if(y-1 >= 0) horizontal_compose53i(b0, width); | |
1354 | if(b0 <= b2) horizontal_compose53i(b1, width); | |
1355 | STOP_TIMER("horizontal_compose53i")} | |
1356 | ||
f9e6ebf7 LM |
1357 | cs->b0 = b2; |
1358 | cs->b1 = b3; | |
1359 | cs->y += 2; | |
1360 | } | |
1361 | ||
1362 | static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){ | |
1363 | dwt_compose_t cs; | |
1364 | spatial_compose53i_init(&cs, buffer, height, stride); | |
1365 | while(cs.y <= height) | |
1366 | spatial_compose53i_dy(&cs, buffer, width, height, stride); | |
791e7b83 MN |
1367 | } |
1368 | ||
1369 | ||
aa25a462 RFI |
1370 | static void horizontal_compose97i(DWTELEM *b, int width){ |
1371 | DWTELEM temp[width]; | |
791e7b83 MN |
1372 | const int w2= (width+1)>>1; |
1373 | ||
1374 | lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); | |
1375 | lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); | |
1376 | lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1); | |
1377 | lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1); | |
1378 | } | |
1379 | ||
aa25a462 | 1380 | static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1381 | int i; |
1382 | ||
1383 | for(i=0; i<width; i++){ | |
1384 | b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1385 | } | |
1386 | } | |
1387 | ||
aa25a462 | 1388 | static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1389 | int i; |
1390 | ||
1391 | for(i=0; i<width; i++){ | |
1392 | #ifdef lift5 | |
1393 | b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
1394 | #else | |
1395 | int r= 3*(b0[i] + b2[i]); | |
1396 | r+= r>>4; | |
1397 | r+= r>>8; | |
1398 | b1[i] -= (r+W_CO)>>W_CS; | |
1399 | #endif | |
1400 | } | |
1401 | } | |
1402 | ||
aa25a462 | 1403 | static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1404 | int i; |
1405 | ||
1406 | for(i=0; i<width; i++){ | |
1407 | b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; | |
1408 | } | |
1409 | } | |
1410 | ||
aa25a462 | 1411 | static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
791e7b83 MN |
1412 | int i; |
1413 | ||
1414 | for(i=0; i<width; i++){ | |
1415 | b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
1416 | } | |
1417 | } | |
1418 | ||
565a45ac MN |
1419 | static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){ |
1420 | int i; | |
1421 | ||
1422 | for(i=0; i<width; i++){ | |
1423 | int r; | |
1424 | b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; | |
1425 | #ifdef lift5 | |
1426 | b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; | |
1427 | #else | |
1428 | r= 3*(b2[i] + b4[i]); | |
1429 | r+= r>>4; | |
1430 | r+= r>>8; | |
1431 | b3[i] -= (r+W_CO)>>W_CS; | |
1432 | #endif | |
1433 | b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; | |
1434 | b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1435 | } | |
1436 | } | |
1437 | ||
a0d1931c Y |
1438 | static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
1439 | cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); | |
1440 | cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); | |
1441 | cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); | |
1442 | cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); | |
1443 | cs->y = -3; | |
1444 | } | |
1445 | ||
f9e6ebf7 LM |
1446 | static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ |
1447 | cs->b0 = buffer + mirror(-3-1, height-1)*stride; | |
1448 | cs->b1 = buffer + mirror(-3 , height-1)*stride; | |
1449 | cs->b2 = buffer + mirror(-3+1, height-1)*stride; | |
1450 | cs->b3 = buffer + mirror(-3+2, height-1)*stride; | |
1451 | cs->y = -3; | |
1452 | } | |
791e7b83 | 1453 | |
a0d1931c Y |
1454 | static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
1455 | int y = cs->y; | |
1456 | ||
1457 | int mirror0 = mirror(y - 1, height - 1); | |
1458 | int mirror1 = mirror(y + 0, height - 1); | |
1459 | int mirror2 = mirror(y + 1, height - 1); | |
1460 | int mirror3 = mirror(y + 2, height - 1); | |
1461 | int mirror4 = mirror(y + 3, height - 1); | |
1462 | int mirror5 = mirror(y + 4, height - 1); | |
1463 | DWTELEM *b0= cs->b0; | |
1464 | DWTELEM *b1= cs->b1; | |
1465 | DWTELEM *b2= cs->b2; | |
1466 | DWTELEM *b3= cs->b3; | |
1467 | DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line); | |
1468 | DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line); | |
1469 | ||
a0d1931c | 1470 | {START_TIMER |
565a45ac MN |
1471 | if(y>0 && y+4<height){ |
1472 | vertical_compose97i(b0, b1, b2, b3, b4, b5, width); | |
1473 | }else{ | |
1474 | if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width); | |
1475 | if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width); | |
1476 | if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width); | |
1477 | if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width); | |
1478 | } | |
a0d1931c Y |
1479 | if(width>400){ |
1480 | STOP_TIMER("vertical_compose97i")}} | |
a0d1931c Y |
1481 | |
1482 | {START_TIMER | |
1483 | if(y-1>= 0) horizontal_compose97i(b0, width); | |
1484 | if(mirror0 <= mirror2) horizontal_compose97i(b1, width); | |
1485 | if(width>400 && mirror0 <= mirror2){ | |
1486 | STOP_TIMER("horizontal_compose97i")}} | |
1487 | ||
1488 | cs->b0=b2; | |
1489 | cs->b1=b3; | |
1490 | cs->b2=b4; | |
1491 | cs->b3=b5; | |
1492 | cs->y += 2; | |
1493 | } | |
1494 | ||
f9e6ebf7 LM |
1495 | static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ |
1496 | int y = cs->y; | |
1497 | DWTELEM *b0= cs->b0; | |
1498 | DWTELEM *b1= cs->b1; | |
1499 | DWTELEM *b2= cs->b2; | |
1500 | DWTELEM *b3= cs->b3; | |
1501 | DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
1502 | DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
791e7b83 MN |
1503 | |
1504 | if(stride == width && y+4 < height && 0){ | |
1505 | int x; | |
1506 | for(x=0; x<width/2; x++) | |
1507 | b5[x] += 64*2; | |
1508 | for(; x<width; x++) | |
1509 | b5[x] += 169*2; | |
1510 | } | |
1511 | ||
1512 | {START_TIMER | |
1513 | if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width); | |
1514 | if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width); | |
1515 | if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width); | |
1516 | if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width); | |
1517 | if(width>400){ | |
1518 | STOP_TIMER("vertical_compose97i")}} | |
1519 | ||
1520 | {START_TIMER | |
1521 | if(y-1>= 0) horizontal_compose97i(b0, width); | |
1522 | if(b0 <= b2) horizontal_compose97i(b1, width); | |
1523 | if(width>400 && b0 <= b2){ | |
1524 | STOP_TIMER("horizontal_compose97i")}} | |
f9e6ebf7 LM |
1525 | |
1526 | cs->b0=b2; | |
1527 | cs->b1=b3; | |
1528 | cs->b2=b4; | |
1529 | cs->b3=b5; | |
1530 | cs->y += 2; | |
1531 | } | |
1532 | ||
1533 | static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){ | |
1534 | dwt_compose_t cs; | |
1535 | spatial_compose97i_init(&cs, buffer, height, stride); | |
1536 | while(cs.y <= height) | |
1537 | spatial_compose97i_dy(&cs, buffer, width, height, stride); | |
1538 | } | |
1539 | ||
a0d1931c Y |
1540 | void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ |
1541 | int level; | |
1542 | for(level=decomposition_count-1; level>=0; level--){ | |
1543 | switch(type){ | |
1544 | case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
1545 | case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
1546 | /* not slicified yet */ | |
1547 | case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/ | |
1548 | av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break; | |
1549 | } | |
1550 | } | |
1551 | } | |
1552 | ||
f9e6ebf7 LM |
1553 | void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
1554 | int level; | |
1555 | for(level=decomposition_count-1; level>=0; level--){ | |
1556 | switch(type){ | |
1557 | case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; | |
1558 | case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
1559 | /* not slicified yet */ | |
1560 | case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break; | |
1561 | } | |
791e7b83 MN |
1562 | } |
1563 | } | |
1564 | ||
f9e6ebf7 LM |
1565 | void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ |
1566 | const int support = type==1 ? 3 : 5; | |
791e7b83 | 1567 | int level; |
f9e6ebf7 | 1568 | if(type==2) return; |
791e7b83 | 1569 | |
46c281e8 | 1570 | for(level=decomposition_count-1; level>=0; level--){ |
f9e6ebf7 LM |
1571 | while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ |
1572 | switch(type){ | |
1573 | case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | |
1574 | break; | |
1575 | case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | |
1576 | break; | |
1577 | case 2: break; | |
1578 | } | |
791e7b83 MN |
1579 | } |
1580 | } | |
1581 | } | |
1582 | ||
a0d1931c Y |
1583 | void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ |
1584 | const int support = type==1 ? 3 : 5; | |
1585 | int level; | |
1586 | if(type==2) return; | |
1587 | ||
1588 | for(level=decomposition_count-1; level>=0; level--){ | |
1589 | while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1590 | switch(type){ | |
1591 | case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); | |
1592 | break; | |
1593 | case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); | |
1594 | break; | |
1595 | case 2: break; | |
1596 | } | |
1597 | } | |
1598 | } | |
1599 | } | |
1600 | ||
f9e6ebf7 LM |
1601 | void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
1602 | if(type==2){ | |
1603 | int level; | |
1604 | for(level=decomposition_count-1; level>=0; level--) | |
1605 | spatial_composeX (buffer, width>>level, height>>level, stride<<level); | |
1606 | }else{ | |
1607 | dwt_compose_t cs[MAX_DECOMPOSITIONS]; | |
1608 | int y; | |
1609 | ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
1610 | for(y=0; y<height; y+=4) | |
1611 | ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
1612 | } | |
1613 | } | |
1614 | ||
0ecca7a4 | 1615 | static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ |
4f4e9633 MN |
1616 | const int w= b->width; |
1617 | const int h= b->height; | |
1618 | int x, y; | |
1619 | ||
791e7b83 | 1620 | if(1){ |
791e7b83 | 1621 | int run=0; |
a8d73e56 | 1622 | int runs[w*h]; |
791e7b83 MN |
1623 | int run_index=0; |
1624 | ||
791e7b83 MN |
1625 | for(y=0; y<h; y++){ |
1626 | for(x=0; x<w; x++){ | |
78486403 | 1627 | int v, p=0; |
6b2f6646 | 1628 | int /*ll=0, */l=0, lt=0, t=0, rt=0; |
a8d73e56 | 1629 | v= src[x + y*stride]; |
791e7b83 MN |
1630 | |
1631 | if(y){ | |
a8d73e56 | 1632 | t= src[x + (y-1)*stride]; |
791e7b83 | 1633 | if(x){ |
a8d73e56 | 1634 | lt= src[x - 1 + (y-1)*stride]; |
791e7b83 MN |
1635 | } |
1636 | if(x + 1 < w){ | |
a8d73e56 | 1637 | rt= src[x + 1 + (y-1)*stride]; |
791e7b83 MN |
1638 | } |
1639 | } | |
1640 | if(x){ | |
a8d73e56 | 1641 | l= src[x - 1 + y*stride]; |
6b2f6646 MN |
1642 | /*if(x > 1){ |
1643 | if(orientation==1) ll= src[y + (x-2)*stride]; | |
1644 | else ll= src[x - 2 + y*stride]; | |
791e7b83 MN |
1645 | }*/ |
1646 | } | |
78486403 | 1647 | if(parent){ |
a8d73e56 MN |
1648 | int px= x>>1; |
1649 | int py= y>>1; | |
78486403 MN |
1650 | if(px<b->parent->width && py<b->parent->height) |
1651 | p= parent[px + py*2*stride]; | |
1652 | } | |
1653 | if(!(/*ll|*/l|lt|t|rt|p)){ | |
791e7b83 MN |
1654 | if(v){ |
1655 | runs[run_index++]= run; | |
1656 | run=0; | |
1657 | }else{ | |
1658 | run++; | |
1659 | } | |
1660 | } | |
1661 | } | |
1662 | } | |
1663 | runs[run_index++]= run; | |
1664 | run_index=0; | |
1665 | run= runs[run_index++]; | |
1666 | ||
4f4e9633 | 1667 | put_symbol2(&s->c, b->state[1], run, 3); |
791e7b83 MN |
1668 | |
1669 | for(y=0; y<h; y++){ | |
d06c75a8 | 1670 | if(s->c.bytestream_end - s->c.bytestream < w*40){ |
0ecca7a4 MN |
1671 | av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
1672 | return -1; | |
1673 | } | |
791e7b83 | 1674 | for(x=0; x<w; x++){ |
78486403 | 1675 | int v, p=0; |
6b2f6646 | 1676 | int /*ll=0, */l=0, lt=0, t=0, rt=0; |
a8d73e56 | 1677 | v= src[x + y*stride]; |
791e7b83 MN |
1678 | |
1679 | if(y){ | |
a8d73e56 | 1680 | t= src[x + (y-1)*stride]; |
791e7b83 | 1681 | if(x){ |
a8d73e56 | 1682 | lt= src[x - 1 + (y-1)*stride]; |
791e7b83 MN |
1683 | } |
1684 | if(x + 1 < w){ | |
a8d73e56 | 1685 | rt= src[x + 1 + (y-1)*stride]; |
791e7b83 MN |
1686 | } |
1687 | } | |
1688 | if(x){ | |
a8d73e56 | 1689 | l= src[x - 1 + y*stride]; |
6b2f6646 MN |
1690 | /*if(x > 1){ |
1691 | if(orientation==1) ll= src[y + (x-2)*stride]; | |
1692 | else ll= src[x - 2 + y*stride]; | |
791e7b83 MN |
1693 | }*/ |
1694 | } | |
78486403 | 1695 | if(parent){ |
a8d73e56 MN |
1696 | int px= x>>1; |
1697 | int py= y>>1; | |
78486403 MN |
1698 | if(px<b->parent->width && py<b->parent->height) |
1699 | p= parent[px + py*2*stride]; | |
1700 | } | |
1701 | if(/*ll|*/l|lt|t|rt|p){ | |
1702 | int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); | |
6b2f6646 | 1703 | |
28869757 | 1704 | put_rac(&s->c, &b->state[0][context], !!v); |
791e7b83 MN |
1705 | }else{ |
1706 | if(!run){ | |
1707 | run= runs[run_index++]; | |
4f4e9633 MN |
1708 | |
1709 | put_symbol2(&s->c, b->state[1], run, 3); | |
791e7b83 MN |
1710 | assert(v); |
1711 | }else{ | |
1712 | run--; | |
1713 | assert(!v); | |
1714 | } | |
1715 | } | |
1716 | if(v){ | |
78486403 | 1717 | int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); |
538a3841 MN |
1718 | int l2= 2*ABS(l) + (l<0); |
1719 | int t2= 2*ABS(t) + (t<0); | |
6b2f6646 | 1720 | |
0635cbfc | 1721 | put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4); |
538a3841 | 1722 | put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); |
791e7b83 MN |
1723 | } |
1724 | } | |
1725 | } | |
791e7b83 | 1726 | } |
0ecca7a4 | 1727 | return 0; |
791e7b83 MN |
1728 | } |
1729 | ||
0ecca7a4 | 1730 | static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ |
4f4e9633 MN |
1731 | // encode_subband_qtree(s, b, src, parent, stride, orientation); |
1732 | // encode_subband_z0run(s, b, src, parent, stride, orientation); | |
0ecca7a4 | 1733 | return encode_subband_c0run(s, b, src, parent, stride, orientation); |
4f4e9633 MN |
1734 | // encode_subband_dzr(s, b, src, parent, stride, orientation); |
1735 | } | |
1736 | ||
a0d1931c | 1737 | static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
791e7b83 MN |
1738 | const int w= b->width; |
1739 | const int h= b->height; | |
1740 | int x,y; | |
ea7d9cd4 | 1741 | |
791e7b83 | 1742 | if(1){ |
791e7b83 | 1743 | int run; |
7b49c309 MN |
1744 | int index=0; |
1745 | int prev_index=-1; | |
1746 | int prev2_index=0; | |
1747 | int parent_index= 0; | |
1748 | int prev_parent_index= 0; | |
791e7b83 | 1749 | |
4f4e9633 | 1750 | run= get_symbol2(&s->c, b->state[1], 3); |
791e7b83 | 1751 | for(y=0; y<h; y++){ |
0cea8a03 MN |
1752 | int v=0; |
1753 | int lt=0, t=0, rt=0; | |
1754 | ||
a0d1931c Y |
1755 | if(y && b->x_coeff[prev_index].x == 0){ |
1756 | rt= b->x_coeff[prev_index].coeff; | |
0cea8a03 | 1757 | } |
791e7b83 | 1758 | for(x=0; x<w; x++){ |
0cea8a03 MN |
1759 | int p=0; |
1760 | const int l= v; | |
1761 | ||
1762 | lt= t; t= rt; | |
791e7b83 | 1763 | |
ff765159 | 1764 | if(y){ |
a0d1931c | 1765 | if(b->x_coeff[prev_index].x <= x) |
ff765159 | 1766 | prev_index++; |
a0d1931c Y |
1767 | if(b->x_coeff[prev_index].x == x + 1) |
1768 | rt= b->x_coeff[prev_index].coeff; | |
ff765159 MN |
1769 | else |
1770 | rt=0; | |
1771 | } | |
78486403 | 1772 | if(parent){ |
a0d1931c | 1773 | if(x>>1 > parent->x_coeff[parent_index].x){ |
7b49c309 MN |
1774 | parent_index++; |
1775 | } | |
a0d1931c Y |
1776 | if(x>>1 == parent->x_coeff[parent_index].x){ |
1777 | p= parent->x_coeff[parent_index].coeff; | |
ff765159 | 1778 | } |
78486403 MN |
1779 | } |
1780 | if(/*ll|*/l|lt|t|rt|p){ | |
538a3841 | 1781 | int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); |
6b2f6646 | 1782 | |
28869757 | 1783 | v=get_rac(&s->c, &b->state[0][context]); |
791e7b83 MN |
1784 | }else{ |
1785 | if(!run){ | |
4f4e9633 | 1786 | run= get_symbol2(&s->c, b->state[1], 3); |
791e7b83 MN |
1787 | v=1; |
1788 | }else{ | |
1789 | run--; | |
1790 | v=0; | |
3c1adccd | 1791 | |
7b49c309 MN |
1792 | if(y && parent){ |
1793 | int max_run; | |
7b49c309 | 1794 | |
a0d1931c Y |
1795 | max_run= FFMIN(run, b->x_coeff[prev_index].x - x - 2); |
1796 | max_run= FFMIN(max_run, 2*parent->x_coeff[parent_index].x - x - 1); | |
7b49c309 MN |
1797 | x+= max_run; |
1798 | run-= max_run; | |
3c1adccd | 1799 | } |
791e7b83 MN |
1800 | } |
1801 | } | |
1802 | if(v){ | |
538a3841 MN |
1803 | int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); |
1804 | v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); | |
1805 | v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); | |
1806 | ||
a0d1931c Y |
1807 | b->x_coeff[index].x=x; |
1808 | b->x_coeff[index++].coeff= v; | |
7b49c309 MN |
1809 | } |
1810 | } | |
a0d1931c | 1811 | b->x_coeff[index++].x= w+1; //end marker |
7b49c309 MN |
1812 | prev_index= prev2_index; |
1813 | prev2_index= index; | |
1814 | ||
1815 | if(parent){ | |
7b49c309 | 1816 | if(y&1){ |
a0d1931c Y |
1817 | while(parent->x_coeff[parent_index].x != parent->width+1) |
1818 | parent_index++; | |
1819 | parent_index++; | |
7b49c309 MN |
1820 | prev_parent_index= parent_index; |
1821 | }else{ | |
1822 | parent_index= prev_parent_index; | |
791e7b83 MN |
1823 | } |
1824 | } | |
1825 | } | |
a0d1931c Y |
1826 | |
1827 | b->x_coeff[index++].x= w+1; //end marker | |
1828 | } | |
1829 | } | |
1830 | ||
1831 | static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ | |
1832 | const int w= b->width; | |
1833 | int x,y; | |
c97de57c MN |
1834 | const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); |
1835 | int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | |
a0d1931c Y |
1836 | int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
1837 | int new_index = 0; | |
1838 | ||
1839 | START_TIMER | |
1840 | ||
1841 | if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){ | |
1842 | qadd= 0; | |
1843 | qmul= 1<<QEXPSHIFT; | |
1844 | } | |
1845 | ||
1846 | /* If we are on the second or later slice, restore our index. */ | |
1847 | if (start_y != 0) | |
1848 | new_index = save_state[0]; | |
1849 | ||
791e7b83 | 1850 | |
a0d1931c Y |
1851 | for(y=start_y; y<h; y++){ |
1852 | int x = 0; | |
1853 | int v; | |
1854 | DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; | |
1855 | memset(line, 0, b->width*sizeof(DWTELEM)); | |
1856 | v = b->x_coeff[new_index].coeff; | |
1857 | x = b->x_coeff[new_index++].x; | |
1858 | while(x < w) | |
1859 | { | |
538a3841 MN |
1860 | register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; |
1861 | register int u= -(v&1); | |
1862 | line[x] = (t^u) - u; | |
1863 | ||
a0d1931c Y |
1864 | v = b->x_coeff[new_index].coeff; |
1865 | x = b->x_coeff[new_index++].x; | |
1866 | } | |
791e7b83 | 1867 | } |
a0d1931c Y |
1868 | if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){ |
1869 | STOP_TIMER("decode_subband") | |
1870 | } | |
1871 | ||
1872 | /* Save our variables for the next slice. */ | |
1873 | save_state[0] = new_index; | |
1874 | ||
1875 | return; | |
791e7b83 MN |
1876 | } |
1877 | ||
1878 | static void reset_contexts(SnowContext *s){ | |
1879 | int plane_index, level, orientation; | |
1880 | ||
19aa028d | 1881 | for(plane_index=0; plane_index<3; plane_index++){ |
791e7b83 MN |
1882 | for(level=0; level<s->spatial_decomposition_count; level++){ |
1883 | for(orientation=level ? 1:0; orientation<4; orientation++){ | |
28869757 | 1884 | memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); |
791e7b83 MN |
1885 | } |
1886 | } | |
1887 | } | |
28869757 MN |
1888 | memset(s->header_state, MID_STATE, sizeof(s->header_state)); |
1889 | memset(s->block_state, MID_STATE, sizeof(s->block_state)); | |
155ec6ed MN |
1890 | } |
1891 | ||
1892 | static int alloc_blocks(SnowContext *s){ | |
1893 | int w= -((-s->avctx->width )>>LOG2_MB_SIZE); | |
1894 | int h= -((-s->avctx->height)>>LOG2_MB_SIZE); | |
1895 | ||
1896 | s->b_width = w; | |
1897 | s->b_height= h; | |
1898 | ||
1899 | s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); | |
1900 | return 0; | |
1901 | } | |
1902 | ||
28869757 MN |
1903 | static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ |
1904 | uint8_t *bytestream= d->bytestream; | |
1905 | uint8_t *bytestream_start= d->bytestream_start; | |
155ec6ed | 1906 | *d= *s; |
28869757 MN |
1907 | d->bytestream= bytestream; |
1908 | d->bytestream_start= bytestream_start; | |
155ec6ed MN |
1909 | } |
1910 | ||
1911 | //near copy & paste from dsputil, FIXME | |
1912 | static int pix_sum(uint8_t * pix, int line_size, int w) | |
1913 | { | |
1914 | int s, i, j; | |
1915 | ||
1916 | s = 0; | |
1917 | for (i = 0; i < w; i++) { | |
1918 | for (j = 0; j < w; j++) { | |
1919 | s += pix[0]; | |
1920 | pix ++; | |
1921 | } | |
1922 | pix += line_size - w; | |
1923 | } | |
1924 | return s; | |
1925 | } | |
1926 | ||
1927 | //near copy & paste from dsputil, FIXME | |
1928 | static int pix_norm1(uint8_t * pix, int line_size, int w) | |
1929 | { | |
1930 | int s, i, j; | |
1931 | uint32_t *sq = squareTbl + 256; | |
1932 | ||
1933 | s = 0; | |
1934 | for (i = 0; i < w; i++) { | |
1935 | for (j = 0; j < w; j ++) { | |
1936 | s += sq[pix[0]]; | |
1937 | pix ++; | |
1938 | } | |
1939 | pix += line_size - w; | |
1940 | } | |
1941 | return s; | |
1942 | } | |
1943 | ||
1944 | static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){ | |
1945 | const int w= s->b_width << s->block_max_depth; | |
1946 | const int rem_depth= s->block_max_depth - level; | |
1947 | const int index= (x + y*w) << rem_depth; | |
1948 | const int block_w= 1<<rem_depth; | |
1949 | BlockNode block; | |
1950 | int i,j; | |
1951 | ||
1952 | block.color[0]= l; | |
1953 | block.color[1]= cb; | |
1954 | block.color[2]= cr; | |
1955 | block.mx= mx; | |
1956 | block.my= my; | |
1957 | block.type= type; | |
1958 | block.level= level; | |
1959 | ||
1960 | for(j=0; j<block_w; j++){ | |
1961 | for(i=0; i<block_w; i++){ | |
1962 | s->block[index + i + j*w]= block; | |
1963 | } | |
1964 | } | |
1965 | } | |
1966 | ||
1967 | static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ | |
1968 | const int offset[3]= { | |
1969 | y*c-> stride + x, | |
1970 | ((y*c->uvstride + x)>>1), | |
1971 | ((y*c->uvstride + x)>>1), | |
1972 | }; | |
1973 | int i; | |
1974 | for(i=0; i<3; i++){ | |
1975 | c->src[0][i]= src [i]; | |
1976 | c->ref[0][i]= ref [i] + offset[i]; | |
1977 | } | |
1978 | assert(!ref_index); | |
1979 | } | |
1980 | ||
1981 | //FIXME copy&paste | |
1982 | #define P_LEFT P[1] | |
1983 | #define P_TOP P[2] | |
1984 | #define P_TOPRIGHT P[3] | |
1985 | #define P_MEDIAN P[4] | |
1986 | #define P_MV1 P[9] | |
1987 | #define FLAG_QPEL 1 //must be 1 | |
1988 | ||
1989 | static int encode_q_branch(SnowContext *s, int level, int x, int y){ | |
1990 | uint8_t p_buffer[1024]; | |
1991 | uint8_t i_buffer[1024]; | |
1992 | uint8_t p_state[sizeof(s->block_state)]; | |
1993 | uint8_t i_state[sizeof(s->block_state)]; | |
28869757 MN |
1994 | RangeCoder pc, ic; |
1995 | uint8_t *pbbak= s->c.bytestream; | |
1996 | uint8_t *pbbak_start= s->c.bytestream_start; | |
155ec6ed MN |
1997 | int score, score2, iscore, i_len, p_len, block_s, sum; |
1998 | const int w= s->b_width << s->block_max_depth; | |
1999 | const int h= s->b_height << s->block_max_depth; | |
2000 | const int rem_depth= s->block_max_depth - level; | |
2001 | const int index= (x + y*w) << rem_depth; | |
2002 | const int block_w= 1<<(LOG2_MB_SIZE - level); | |
2003 | static BlockNode null_block= { //FIXME add border maybe | |
2004 | .color= {128,128,128}, | |
2005 | .mx= 0, | |
2006 | .my= 0, | |
2007 | .type= 0, | |
2008 | .level= 0, | |
2009 | }; | |
2010 | int trx= (x+1)<<rem_depth; | |
2011 | int try= (y+1)<<rem_depth; | |
2012 | BlockNode *left = x ? &s->block[index-1] : &null_block; | |
2013 | BlockNode *top = y ? &s->block[index-w] : &null_block; | |
2014 | BlockNode *right = trx<w ? &s->block[index+1] : &null_block; | |
2015 | BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; | |
2016 | BlockNode *tl = y && x ? &s->block[index-w-1] : left; | |
2017 | BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt | |
2018 | int pl = left->color[0]; | |
2019 | int pcb= left->color[1]; | |
2020 | int pcr= left->color[2]; | |
2021 | int pmx= mid_pred(left->mx, top->mx, tr->mx); | |
2022 | int pmy= mid_pred(left->my, top->my, tr->my); | |
2023 | int mx=0, my=0; | |
2024 | int l,cr,cb, i; | |
2025 | const int stride= s->current_picture.linesize[0]; | |
2026 | const int uvstride= s->current_picture.linesize[1]; | |
2027 | const int instride= s->input_picture.linesize[0]; | |
2028 | const int uvinstride= s->input_picture.linesize[1]; | |
2029 | uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w; | |
2030 | uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2; | |
2031 | uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2; | |
2032 | uint8_t current_mb[3][stride*block_w]; | |
2033 | uint8_t *current_data[3]= {¤t_mb[0][0], ¤t_mb[1][0], ¤t_mb[2][0]}; | |
2034 | int P[10][2]; | |
2035 | int16_t last_mv[3][2]; | |
2036 | int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused | |
2037 | const int shift= 1+qpel; | |
2038 | MotionEstContext *c= &s->m.me; | |
2039 | int mx_context= av_log2(2*ABS(left->mx - top->mx)); | |
2040 | int my_context= av_log2(2*ABS(left->my - top->my)); | |
2041 | int s_context= 2*left->level + 2*top->level + tl->level + tr->level; | |
2042 | ||
2043 | assert(sizeof(s->block_state) >= 256); | |
2044 | if(s->keyframe){ | |
2045 | set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); | |
2046 | return 0; | |
2047 | } | |
2048 | ||
2049 | //FIXME optimize | |
2050 | for(i=0; i<block_w; i++) | |
2051 | memcpy(¤t_mb[0][0] + stride*i, new_l + instride*i, block_w); | |
2052 | for(i=0; i<block_w>>1; i++) | |
2053 | memcpy(¤t_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1); | |
2054 | for(i=0; i<block_w>>1; i++) | |
2055 | memcpy(¤t_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1); | |
2056 | ||
2057 | // clip predictors / edge ? | |
2058 | ||
2059 | P_LEFT[0]= left->mx; | |
2060 | P_LEFT[1]= left->my; | |
2061 | P_TOP [0]= top->mx; | |
2062 | P_TOP [1]= top->my; | |
2063 | P_TOPRIGHT[0]= tr->mx; | |
2064 | P_TOPRIGHT[1]= tr->my; | |
2065 | ||
2066 | last_mv[0][0]= s->block[index].mx; | |
2067 | last_mv[0][1]= s->block[index].my; | |
2068 | last_mv[1][0]= right->mx; | |
2069 | last_mv[1][1]= right->my; | |
2070 | last_mv[2][0]= bottom->mx; | |
2071 | last_mv[2][1]= bottom->my; | |
2072 | ||
2073 | s->m.mb_stride=2; | |
2074 | s->m.mb_x= | |
2075 | s->m.mb_y= 0; | |
2076 | s->m.me.skip= 0; | |
2077 | ||
2078 | init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0); | |
2079 | ||
2080 | assert(s->m.me. stride == stride); | |
2081 | assert(s->m.me.uvstride == uvstride); | |
2082 | ||
2083 | c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); | |
2084 | c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); | |
2085 | c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); | |
2086 | c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; | |
2087 | ||
ff158dc9 MN |
2088 | c->xmin = - x*block_w - 16+2; |
2089 | c->ymin = - y*block_w - 16+2; | |
2090 | c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
2091 | c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
155ec6ed MN |
2092 | |
2093 | if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); | |
2094 | if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); | |
2095 | if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); | |
2096 | if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); | |
2097 | if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); | |
2098 | if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip | |
2099 | if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); | |
2100 | ||
2101 | P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); | |
2102 | P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); | |
2103 | ||
2104 | if (!y) { | |
2105 | c->pred_x= P_LEFT[0]; | |
2106 | c->pred_y= P_LEFT[1]; | |
2107 | } else { | |
2108 | c->pred_x = P_MEDIAN[0]; | |
2109 | c->pred_y = P_MEDIAN[1]; | |
2110 | } | |
2111 | ||
2112 | score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, | |
2113 | (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); | |
2114 | ||
2115 | assert(mx >= c->xmin); | |
2116 | assert(mx <= c->xmax); | |
2117 | assert(my >= c->ymin); | |
2118 | assert(my <= c->ymax); | |
2119 | ||
2120 | score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w); | |
2121 | score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); | |
2122 | //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2 | |
2123 | ||
2124 | // subpel search | |
2125 | pc= s->c; | |
28869757 MN |
2126 | pc.bytestream_start= |
2127 | pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo | |
155ec6ed MN |
2128 | memcpy(p_state, s->block_state, sizeof(s->block_state)); |
2129 | ||
2130 | if(level!=s->block_max_depth) | |
28869757 MN |
2131 | put_rac(&pc, &p_state[4 + s_context], 1); |
2132 | put_rac(&pc, &p_state[1 + left->type + top->type], 0); | |
155ec6ed MN |
2133 | put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1); |
2134 | put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1); | |
28869757 MN |
2135 | p_len= pc.bytestream - pc.bytestream_start; |
2136 | score += (s->lambda2*(p_len*8 | |
2137 | + (pc.outstanding_count - s->c.outstanding_count)*8 | |
2138 | + (-av_log2(pc.range) + av_log2(s->c.range)) | |
2139 | ))>>FF_LAMBDA_SHIFT; | |
155ec6ed MN |
2140 | |
2141 | block_s= block_w*block_w; | |
2142 | sum = pix_sum(¤t_mb[0][0], stride, block_w); | |
2143 | l= (sum + block_s/2)/block_s; | |
2144 | iscore = pix_norm1(¤t_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s; | |
2145 | ||
2146 | block_s= block_w*block_w>>2; | |
2147 | sum = pix_sum(¤t_mb[1][0], uvstride, block_w>>1); | |
2148 | cb= (sum + block_s/2)/block_s; | |
2149 | // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; | |
2150 | sum = pix_sum(¤t_mb[2][0], uvstride, block_w>>1); | |
2151 | cr= (sum + block_s/2)/block_s; | |
2152 | // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; | |
2153 | ||
2154 | ic= s->c; | |
28869757 MN |
2155 | ic.bytestream_start= |
2156 | ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo | |
155ec6ed MN |
2157 | memcpy(i_state, s->block_state, sizeof(s->block_state)); |
2158 | if(level!=s->block_max_depth) | |
28869757 MN |
2159 | put_rac(&ic, &i_state[4 + s_context], 1); |
2160 | put_rac(&ic, &i_state[1 + left->type + top->type], 1); | |
155ec6ed MN |
2161 | put_symbol(&ic, &i_state[32], l-pl , 1); |
2162 | put_symbol(&ic, &i_state[64], cb-pcb, 1); | |
2163 | put_symbol(&ic, &i_state[96], cr-pcr, 1); | |
28869757 MN |
2164 | i_len= ic.bytestream - ic.bytestream_start; |
2165 | iscore += (s->lambda2*(i_len*8 | |
2166 | + (ic.outstanding_count - s->c.outstanding_count)*8 | |
2167 | + (-av_log2(ic.range) + av_log2(s->c.range)) | |
2168 | ))>>FF_LAMBDA_SHIFT; | |
155ec6ed MN |
2169 | |
2170 | // assert(score==256*256*256*64-1); | |
2171 | assert(iscore < 255*255*256 + s->lambda2*10); | |
2172 | assert(iscore >= 0); | |
2173 | assert(l>=0 && l<=255); | |
2174 | assert(pl>=0 && pl<=255); | |
2175 | ||
2176 | if(level==0){ | |
2177 | int varc= iscore >> 8; | |
2178 | int vard= score >> 8; | |
2179 | if (vard <= 64 || vard < varc) | |
2180 | c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); | |
2181 | else | |
2182 | c->scene_change_score+= s->m.qscale; | |
2183 | } | |
2184 | ||
2185 | if(level!=s->block_max_depth){ | |
28869757 | 2186 | put_rac(&s->c, &s->block_state[4 + s_context], 0); |
155ec6ed MN |
2187 | score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); |
2188 | score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); | |
2189 | score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); | |
2190 | score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); | |
2191 | score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead | |
2192 | ||
2193 | if(score2 < score && score2 < iscore) | |
2194 | return score2; | |
2195 | } | |
2196 | ||
2197 | if(iscore < score){ | |
28869757 | 2198 | memcpy(pbbak, i_buffer, i_len); |
155ec6ed | 2199 | s->c= ic; |
28869757 MN |
2200 | s->c.bytestream_start= pbbak_start; |
2201 | s->c.bytestream= pbbak + i_len; | |
155ec6ed MN |
2202 | set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA); |
2203 | memcpy(s->block_state, i_state, sizeof(s->block_state)); | |
2204 | return iscore; | |
2205 | }else{ | |
28869757 | 2206 | memcpy(pbbak, p_buffer, p_len); |
155ec6ed | 2207 | s->c= pc; |
28869757 MN |
2208 | s->c.bytestream_start= pbbak_start; |
2209 | s->c.bytestream= pbbak + p_len; | |
155ec6ed MN |
2210 | set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0); |
2211 | memcpy(s->block_state, p_state, sizeof(s->block_state)); | |
2212 | return score; | |
2213 | } | |
2214 | } | |
2215 | ||
2216 | static void decode_q_branch(SnowContext *s, int level, int x, int y){ | |
2217 | const int w= s->b_width << s->block_max_depth; | |
155ec6ed MN |
2218 | const int rem_depth= s->block_max_depth - level; |
2219 | const int index= (x + y*w) << rem_depth; | |
2220 | static BlockNode null_block= { //FIXME add border maybe | |
2221 | .color= {128,128,128}, | |
2222 | .mx= 0, | |
2223 | .my= 0, | |
2224 | .type= 0, | |
2225 | .level= 0, | |
2226 | }; | |
2227 | int trx= (x+1)<<rem_depth; | |
155ec6ed MN |
2228 | BlockNode *left = x ? &s->block[index-1] : &null_block; |
2229 | BlockNode *top = y ? &s->block[index-w] : &null_block; | |
2230 | BlockNode *tl = y && x ? &s->block[index-w-1] : left; | |
2231 | BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt | |
2232 | int s_context= 2*left->level + 2*top->level + tl->level + tr->level; | |
2233 | ||
2234 | if(s->keyframe){ | |
2235 | set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA); | |
2236 | return; | |
2237 | } | |
2238 | ||
28869757 | 2239 | if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ |
155ec6ed MN |
2240 | int type; |
2241 | int l = left->color[0]; | |
2242 | int cb= left->color[1]; | |
2243 | int cr= left->color[2]; | |
2244 | int mx= mid_pred(left->mx, top->mx, tr->mx); | |
2245 | int my= mid_pred(left->my, top->my, tr->my); | |
2246 | int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx)); | |
2247 | int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my)); | |
2248 | ||
28869757 | 2249 | type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; |
155ec6ed MN |
2250 | |
2251 | if(type){ | |
2252 | l += get_symbol(&s->c, &s->block_state[32], 1); | |
2253 | cb+= get_symbol(&s->c, &s->block_state[64], 1); | |
2254 | cr+= get_symbol(&s->c, &s->block_state[96], 1); | |
2255 | }else{ | |
2256 | mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1); | |
2257 | my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1); | |
2258 | } | |
2259 | set_blocks(s, level, x, y, l, cb, cr, mx, my, type); | |
2260 | }else{ | |
2261 | decode_q_branch(s, level+1, 2*x+0, 2*y+0); | |
2262 | decode_q_branch(s, level+1, 2*x+1, 2*y+0); | |
2263 | decode_q_branch(s, level+1, 2*x+0, 2*y+1); | |
2264 | decode_q_branch(s, level+1, 2*x+1, 2*y+1); | |
2265 | } | |
2266 | } | |
2267 | ||
2268 | static void encode_blocks(SnowContext *s){ | |
2269 | int x, y; | |
2270 | int w= s->b_width; | |
2271 | int h= s->b_height; | |
2272 | ||
2273 | for(y=0; y<h; y++){ | |
d06c75a8 | 2274 | if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit |
0ecca7a4 MN |
2275 | av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
2276 | return; | |
2277 | } | |
155ec6ed MN |
2278 | for(x=0; x<w; x++){ |
2279 | encode_q_branch(s, 0, x, y); | |
2280 | } | |
2281 | } | |
2282 | } | |
2283 | ||
2284 | static void decode_blocks(SnowContext *s){ | |
2285 | int x, y; | |
2286 | int w= s->b_width; | |
2287 | int h= s->b_height; | |
2288 | ||
2289 | for(y=0; y<h; y++){ | |
2290 | for(x=0; x<w; x++){ | |
2291 | decode_q_branch(s, 0, x, y); | |
2292 | } | |
2293 | } | |
791e7b83 MN |
2294 | } |
2295 | ||
2296 | static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ | |
2297 | int x, y; | |
3924dac4 | 2298 | START_TIMER |
791e7b83 MN |
2299 | for(y=0; y < b_h+5; y++){ |
2300 | for(x=0; x < b_w; x++){ | |
3924dac4 MN |
2301 | int a0= src[x ]; |
2302 | int a1= src[x + 1]; | |
2303 | int a2= src[x + 2]; | |
2304 | int a3= src[x + 3]; | |
2305 | int a4= src[x + 4]; | |
2306 | int a5= src[x + 5]; | |
791e7b83 MN |
2307 | // int am= 9*(a1+a2) - (a0+a3); |
2308 | int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); | |
2309 | // int am= 18*(a2+a3) - 2*(a1+a4); | |
2310 | // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; | |
2311 | // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3; | |
2312 | ||
2313 | // if(b_w==16) am= 8*(a1+a2); | |
2314 | ||
3924dac4 MN |
2315 | if(dx<8) tmp[x]= (32*a2*( 8-dx) + am* dx + 128)>>8; |
2316 | else tmp[x]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8; | |
791e7b83 MN |
2317 | |
2318 | /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6; | |
2319 | else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6; | |
2320 | else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6; | |
2321 | else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/ | |
2322 | } | |
3924dac4 MN |
2323 | tmp += stride; |
2324 | src += stride; | |
791e7b83 | 2325 | } |
3924dac4 MN |
2326 | tmp -= (b_h+5)*stride; |
2327 | ||
791e7b83 MN |
2328 | for(y=0; y < b_h; y++){ |
2329 | for(x=0; x < b_w; x++){ | |
3924dac4 MN |
2330 | int a0= tmp[x + 0*stride]; |
2331 | int a1= tmp[x + 1*stride]; | |
2332 | int a2= tmp[x + 2*stride]; | |
2333 | int a3= tmp[x + 3*stride]; | |
2334 | int a4= tmp[x + 4*stride]; | |
2335 | int a5= tmp[x + 5*stride]; | |
791e7b83 MN |
2336 | int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
2337 | // int am= 18*(a2+a3) - 2*(a1+a4); | |
2338 | /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; | |
2339 | int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/ | |
2340 | ||
2341 | // if(b_w==16) am= 8*(a1+a2); | |
2342 | ||
3924dac4 MN |
2343 | if(dy<8) dst[x]= (32*a2*( 8-dy) + am* dy + 128)>>8; |
2344 | else dst[x]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8; | |
791e7b83 MN |
2345 | |
2346 | /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6; | |
2347 | else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6; | |
2348 | else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6; | |
2349 | else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/ | |
2350 | } | |
3924dac4 MN |
2351 | dst += stride; |
2352 | tmp += stride; | |
791e7b83 | 2353 | } |
3924dac4 | 2354 | STOP_TIMER("mc_block") |
791e7b83 MN |
2355 | } |
2356 | ||
791e7b83 | 2357 | #define mca(dx,dy,b_w)\ |
d92b5807 | 2358 | static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\ |
791e7b83 MN |
2359 | uint8_t tmp[stride*(b_w+5)];\ |
2360 | assert(h==b_w);\ | |
2361 | mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\ | |
2362 | } | |
2363 | ||
2364 | mca( 0, 0,16) | |
2365 | mca( 8, 0,16) | |
2366 | mca( 0, 8,16) | |
2367 | mca( 8, 8,16) | |
d92b5807 MN |
2368 | mca( 0, 0,8) |
2369 | mca( 8, 0,8) | |
2370 | mca( 0, 8,8) | |
2371 | mca( 8, 8,8) | |
791e7b83 | 2372 | |
ff158dc9 MN |
2373 | static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
2374 | if(block->type){ | |
2375 | int x, y; | |
2376 | const int color= block->color[plane_index]; | |
2377 | for(y=0; y < b_h; y++){ | |
2378 | for(x=0; x < b_w; x++){ | |
2379 | dst[x + y*stride]= color; | |
2380 | } | |
2381 | } | |
2382 | }else{ | |
2383 | const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; | |
2384 | int mx= block->mx*scale; | |
2385 | int my= block->my*scale; | |
ec697587 MN |
2386 | const int dx= mx&15; |
2387 | const int dy= my&15; | |
ff158dc9 MN |
2388 | sx += (mx>>4) - 2; |
2389 | sy += (my>>4) - 2; | |
2390 | src += sx + sy*stride; | |
2391 | if( (unsigned)sx >= w - b_w - 4 | |
2392 | || (unsigned)sy >= h - b_h - 4){ | |
2393 | ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h); | |
2394 | src= tmp + MB_SIZE; | |
2395 | } | |
ec697587 MN |
2396 | if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16)) |
2397 | mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); | |
2398 | else | |
2399 | s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); | |
ff158dc9 MN |
2400 | } |
2401 | } | |
2402 | ||
2403 | static always_inline int same_block(BlockNode *a, BlockNode *b){ | |
2404 | return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type); | |
2405 | } | |
2406 | ||
2407 | //FIXME name clenup (b_w, block_w, b_width stuff) | |
a0d1931c Y |
2408 | static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ |
2409 | DWTELEM * dst = NULL; | |
2410 | const int b_width = s->b_width << s->block_max_depth; | |
2411 | const int b_height= s->b_height << s->block_max_depth; | |
2412 | const int b_stride= b_width; | |
2413 | BlockNode *lt= &s->block[b_x + b_y*b_stride]; | |
2414 | BlockNode *rt= lt+1; | |
2415 | BlockNode *lb= lt+b_stride; | |
2416 | BlockNode *rb= lb+1; | |
2417 | uint8_t *block[4]; | |
2418 | uint8_t tmp[src_stride*(b_h+5)]; //FIXME align | |
2419 | int x,y; | |
2420 | ||
2421 | if(b_x<0){ | |
2422 | lt= rt; | |
2423 | lb= rb; | |
2424 | }else if(b_x + 1 >= b_width){ | |
2425 | rt= lt; | |
2426 | rb= lb; | |
2427 | } | |
2428 | if(b_y<0){ | |
2429 | lt= lb; | |
2430 | rt= rb; | |
2431 | }else if(b_y + 1 >= b_height){ | |
2432 | lb= lt; | |
2433 | rb= rt; | |
2434 | } | |
2435 | ||
2436 | if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 | |
2437 | obmc -= src_x; | |
2438 | b_w += src_x; | |
2439 | src_x=0; | |
2440 | }else if(src_x + b_w > w){ | |
2441 | b_w = w - src_x; | |
2442 | } | |
2443 | if(src_y<0){ | |
2444 | obmc -= src_y*obmc_stride; | |
2445 | b_h += src_y; | |
2446 | src_y=0; | |
2447 | }else if(src_y + b_h> h){ | |
2448 | b_h = h - src_y; | |
2449 | } | |
2450 | ||
2451 | if(b_w<=0 || b_h<=0) return; | |
2452 | ||
2453 | assert(src_stride > 7*MB_SIZE); | |
2454 | // old_dst += src_x + src_y*dst_stride; | |
2455 | dst8+= src_x + src_y*src_stride; | |
2456 | // src += src_x + src_y*src_stride; | |
2457 | ||
2458 | block[0]= tmp+3*MB_SIZE; | |
2459 | pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); | |
2460 | ||
2461 | if(same_block(lt, rt)){ | |
2462 | block[1]= block[0]; | |
2463 | }else{ | |
2464 | block[1]= tmp + 4*MB_SIZE; | |
2465 | pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); | |
2466 | } | |
2467 | ||
2468 | if(same_block(lt, lb)){ | |
2469 | block[2]= block[0]; | |
2470 | }else if(same_block(rt, lb)){ | |
2471 | block[2]= block[1]; | |
2472 | }else{ | |
2473 | block[2]= tmp+5*MB_SIZE; | |
2474 | pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); | |
2475 | } | |
2476 | ||
2477 | if(same_block(lt, rb) ){ | |
2478 | block[3]= block[0]; | |
2479 | }else if(same_block(rt, rb)){ | |
2480 | block[3]= block[1]; | |
2481 | }else if(same_block(lb, rb)){ | |
2482 | block[3]= block[2]; | |
2483 | }else{ | |
2484 | block[3]= tmp+6*MB_SIZE; | |
2485 | pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); | |
2486 | } | |
2487 | #if 0 | |
2488 | for(y=0; y<b_h; y++){ | |
2489 | for(x=0; x<b_w; x++){ | |
2490 | int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2491 | if(add) dst[x + y*dst_stride] += v; | |
2492 | else dst[x + y*dst_stride] -= v; | |
2493 | } | |
2494 | } | |
2495 | for(y=0; y<b_h; y++){ | |
2496 | uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2497 | for(x=0; x<b_w; x++){ | |
2498 | int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2499 | if(add) dst[x + y*dst_stride] += v; | |
2500 | else dst[x + y*dst_stride] -= v; | |
2501 | } | |
2502 | } | |
2503 | for(y=0; y<b_h; y++){ | |
2504 | uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2505 | for(x=0; x<b_w; x++){ | |
2506 | int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2507 | if(add) dst[x + y*dst_stride] += v; | |
2508 | else dst[x + y*dst_stride] -= v; | |
2509 | } | |
2510 | } | |
2511 | for(y=0; y<b_h; y++){ | |
2512 | uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2513 | uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2514 | for(x=0; x<b_w; x++){ | |
2515 | int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2516 | if(add) dst[x + y*dst_stride] += v; | |
2517 | else dst[x + y*dst_stride] -= v; | |
2518 | } | |
2519 | } | |
2520 | #else | |
2521 | { | |
2522 | ||
2523 | START_TIMER | |
2524 | ||
2525 | int block_index = 0; | |
2526 | for(y=0; y<b_h; y++){ | |
2527 | //FIXME ugly missue of obmc_stride | |
2528 | uint8_t *obmc1= obmc + y*obmc_stride; | |
2529 | uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2530 | uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2531 | uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2532 | dst = slice_buffer_get_line(sb, src_y + y); | |
2533 | for(x=0; x<b_w; x++){ | |
2534 | int v= obmc1[x] * block[3][x + y*src_stride] | |
2535 | +obmc2[x] * block[2][x + y*src_stride] | |
2536 | +obmc3[x] * block[1][x + y*src_stride] | |
2537 | +obmc4[x] * block[0][x + y*src_stride]; | |
2538 | ||
2539 | v <<= 8 - LOG2_OBMC_MAX; | |
2540 | if(FRAC_BITS != 8){ | |
2541 | v += 1<<(7 - FRAC_BITS); | |
2542 | v >>= 8 - FRAC_BITS; | |
2543 | } | |
2544 | if(add){ | |
2545 | // v += old_dst[x + y*dst_stride]; | |
2546 | v += dst[x + src_x]; | |
2547 | v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2548 | if(v&(~255)) v= ~(v>>31); | |
2549 | dst8[x + y*src_stride] = v; | |
2550 | }else{ | |
2551 | // old_dst[x + y*dst_stride] -= v; | |
2552 | dst[x + src_x] -= v; | |
2553 | } | |
2554 | } | |
2555 | } | |
2556 | STOP_TIMER("Inner add y block") | |
2557 | } | |
2558 | #endif | |
2559 | } | |
2560 | ||
2561 | //FIXME name clenup (b_w, block_w, b_width stuff) | |
715a97f0 | 2562 | static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ |
ff158dc9 MN |
2563 | const int b_width = s->b_width << s->block_max_depth; |
2564 | const int b_height= s->b_height << s->block_max_depth; | |
2565 | const int b_stride= b_width; | |
2566 | BlockNode *lt= &s->block[b_x + b_y*b_stride]; | |
2567 | BlockNode *rt= lt+1; | |
2568 | BlockNode *lb= lt+b_stride; | |
2569 | BlockNode *rb= lb+1; | |
2570 | uint8_t *block[4]; | |
2571 | uint8_t tmp[src_stride*(b_h+5)]; //FIXME align | |
791e7b83 MN |
2572 | int x,y; |
2573 | ||
ff158dc9 MN |
2574 | if(b_x<0){ |
2575 | lt= rt; | |
2576 | lb= rb; | |
2577 | }else if(b_x + 1 >= b_width){ | |
2578 | rt= lt; | |
2579 | rb= lb; | |
791e7b83 | 2580 | } |
ff158dc9 MN |
2581 | if(b_y<0){ |
2582 | lt= lb; | |
2583 | rt= rb; | |
2584 | }else if(b_y + 1 >= b_height){ | |
2585 | lb= lt; | |
2586 | rb= rt; | |
2587 | } | |
2588 | ||
2589 | if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 | |
2590 | obmc -= src_x; | |
2591 | b_w += src_x; | |
2592 | src_x=0; | |
2593 | }else if(src_x + b_w > w){ | |
2594 | b_w = w - src_x; | |
2595 | } | |
2596 | if(src_y<0){ | |
2597 | obmc -= src_y*obmc_stride; | |
2598 | b_h += src_y; | |
2599 | src_y=0; | |
2600 | }else if(src_y + b_h> h){ | |
2601 | b_h = h - src_y; | |
791e7b83 | 2602 | } |
620ab797 | 2603 | |
ff158dc9 | 2604 | if(b_w<=0 || b_h<=0) return; |
155ec6ed | 2605 | |
ff158dc9 MN |
2606 | assert(src_stride > 7*MB_SIZE); |
2607 | dst += src_x + src_y*dst_stride; | |
715a97f0 | 2608 | dst8+= src_x + src_y*src_stride; |
ff158dc9 MN |
2609 | // src += src_x + src_y*src_stride; |
2610 | ||
2611 | block[0]= tmp+3*MB_SIZE; | |
2612 | pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); | |
2613 | ||
2614 | if(same_block(lt, rt)){ | |
2615 | block[1]= block[0]; | |
791e7b83 | 2616 | }else{ |
ff158dc9 MN |
2617 | block[1]= tmp + 4*MB_SIZE; |
2618 | pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); | |
2619 | } | |
2620 | ||
2621 | if(same_block(lt, lb)){ | |
2622 | block[2]= block[0]; | |
2623 | }else if(same_block(rt, lb)){ | |
2624 | block[2]= block[1]; | |
2625 | }else{ | |
2626 | block[2]= tmp+5*MB_SIZE; | |
2627 | pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); | |
2628 | } | |
791e7b83 | 2629 | |
ff158dc9 MN |
2630 | if(same_block(lt, rb) ){ |
2631 | block[3]= block[0]; | |
2632 | }else if(same_block(rt, rb)){ | |
2633 | block[3]= block[1]; | |
2634 | }else if(same_block(lb, rb)){ | |
2635 | block[3]= block[2]; | |
2636 | }else{ | |
2637 | block[3]= tmp+6*MB_SIZE; | |
2638 | pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); | |
2639 | } | |
2640 | #if 0 | |
2641 | for(y=0; y<b_h; y++){ | |
2642 | for(x=0; x<b_w; x++){ | |
2643 | int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2644 | if(add) dst[x + y*dst_stride] += v; | |
2645 | else dst[x + y*dst_stride] -= v; | |
2646 | } | |
2647 | } | |
2648 | for(y=0; y<b_h; y++){ | |
2649 | uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2650 | for(x=0; x<b_w; x++){ | |
2651 | int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2652 | if(add) dst[x + y*dst_stride] += v; | |
2653 | else dst[x + y*dst_stride] -= v; | |
2654 | } | |
2655 | } | |
2656 | for(y=0; y<b_h; y++){ | |
2657 | uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2658 | for(x=0; x<b_w; x++){ | |
2659 | int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2660 | if(add) dst[x + y*dst_stride] += v; | |
2661 | else dst[x + y*dst_stride] -= v; | |
2662 | } | |
2663 | } | |
2664 | for(y=0; y<b_h; y++){ | |
2665 | uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2666 | uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2667 | for(x=0; x<b_w; x++){ | |
2668 | int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2669 | if(add) dst[x + y*dst_stride] += v; | |
2670 | else dst[x + y*dst_stride] -= v; | |
2671 | } | |
2672 | } | |
2673 | #else | |
2674 | for(y=0; y<b_h; y++){ | |
2675 | //FIXME ugly missue of obmc_stride | |
2676 | uint8_t *obmc1= obmc + y*obmc_stride; | |
2677 | uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2678 | uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2679 | uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2680 | for(x=0; x<b_w; x++){ | |
2681 | int v= obmc1[x] * block[3][x + y*src_stride] | |
2682 | +obmc2[x] * block[2][x + y*src_stride] | |
2683 | +obmc3[x] * block[1][x + y*src_stride] | |
2684 | +obmc4[x] * block[0][x + y*src_stride]; | |
715a97f0 MN |
2685 | |
2686 | v <<= 8 - LOG2_OBMC_MAX; | |
034aff03 MN |
2687 | if(FRAC_BITS != 8){ |
2688 | v += 1<<(7 - FRAC_BITS); | |
2689 | v >>= 8 - FRAC_BITS; | |
2690 | } | |
715a97f0 MN |
2691 | if(add){ |
2692 | v += dst[x + y*dst_stride]; | |
2693 | v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2694 | if(v&(~255)) v= ~(v>>31); | |
2695 | dst8[x + y*src_stride] = v; | |
2696 | }else{ | |
2697 | dst[x + y*dst_stride] -= v; | |
2698 | } | |
791e7b83 MN |
2699 | } |
2700 | } | |
ff158dc9 | 2701 | #endif |
791e7b83 MN |
2702 | } |
2703 | ||
a0d1931c Y |
2704 | static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){ |
2705 | Plane *p= &s->plane[plane_index]; | |
2706 | const int mb_w= s->b_width << s->block_max_depth; | |
2707 | const int mb_h= s->b_height << s->block_max_depth; | |
2708 | int x, y, mb_x; | |
2709 | int block_size = MB_SIZE >> s->block_max_depth; | |
2710 | int block_w = plane_index ? block_size/2 : block_size; | |
2711 | const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2712 | int obmc_stride= plane_index ? block_size : 2*block_size; | |
2713 | int ref_stride= s->current_picture.linesize[plane_index]; | |
2714 | uint8_t *ref = s->last_picture.data[plane_index]; | |
2715 | uint8_t *dst8= s->current_picture.data[plane_index]; | |
2716 | int w= p->width; | |
2717 | int h= p->height; | |
2718 | START_TIMER | |
2719 | ||
2720 | if(s->keyframe || (s->avctx->debug&512)){ | |
2721 | if(mb_y==mb_h) | |
2722 | return; | |
2723 | ||
2724 | if(add){ | |
2725 | for(y=block_w*mb_y; y<block_w*(mb_y+1); y++) | |
2726 | { | |
2727 | // DWTELEM * line = slice_buffer_get_line(sb, y); | |
2728 | DWTELEM * line = sb->line[y]; | |
2729 | for(x=0; x<w; x++) | |
2730 | { | |
2731 | // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2732 | int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2733 | v >>= FRAC_BITS; | |
2734 | if(v&(~255)) v= ~(v>>31); | |
2735 | dst8[x + y*ref_stride]= v; | |
2736 | } | |
2737 | } | |
2738 | }else{ | |
2739 | for(y=block_w*mb_y; y<block_w*(mb_y+1); y++) | |
2740 | { | |
2741 | // DWTELEM * line = slice_buffer_get_line(sb, y); | |
2742 | DWTELEM * line = sb->line[y]; | |
2743 | for(x=0; x<w; x++) | |
2744 | { | |
2745 | line[x] -= 128 << FRAC_BITS; | |
2746 | // buf[x + y*w]-= 128<<FRAC_BITS; | |
2747 | } | |
2748 | } | |
2749 | } | |
2750 | ||
2751 | return; | |
2752 | } | |
2753 | ||
2754 | for(mb_x=0; mb_x<=mb_w; mb_x++){ | |
2755 | START_TIMER | |
2756 | ||
2757 | add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, | |
2758 | block_w*mb_x - block_w/2, | |
2759 | block_w*mb_y - block_w/2, | |
2760 | block_w, block_w, | |
2761 | w, h, | |
2762 | w, ref_stride, obmc_stride, | |
2763 | mb_x - 1, mb_y - 1, | |
2764 | add, plane_index); | |
2765 | ||
2766 | STOP_TIMER("add_yblock") | |
2767 | } | |
2768 | ||
2769 | STOP_TIMER("predict_slice") | |
2770 | } | |
2771 | ||
f9e6ebf7 | 2772 | static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ |
791e7b83 | 2773 | Plane *p= &s->plane[plane_index]; |
155ec6ed MN |
2774 | const int mb_w= s->b_width << s->block_max_depth; |
2775 | const int mb_h= s->b_height << s->block_max_depth; | |
f9e6ebf7 | 2776 | int x, y, mb_x; |
155ec6ed MN |
2777 | int block_size = MB_SIZE >> s->block_max_depth; |
2778 | int block_w = plane_index ? block_size/2 : block_size; | |
ff158dc9 | 2779 | const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
155ec6ed | 2780 | int obmc_stride= plane_index ? block_size : 2*block_size; |
715a97f0 | 2781 | int ref_stride= s->current_picture.linesize[plane_index]; |
791e7b83 | 2782 | uint8_t *ref = s->last_picture.data[plane_index]; |
715a97f0 | 2783 | uint8_t *dst8= s->current_picture.data[plane_index]; |
791e7b83 MN |
2784 | int w= p->width; |
2785 | int h= p->height; | |
fff6d4ea | 2786 | START_TIMER |
791e7b83 | 2787 | |
ff158dc9 | 2788 | if(s->keyframe || (s->avctx->debug&512)){ |
f9e6ebf7 LM |
2789 | if(mb_y==mb_h) |
2790 | return; | |
2791 | ||
715a97f0 | 2792 | if(add){ |
f9e6ebf7 | 2793 | for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){ |
715a97f0 MN |
2794 | for(x=0; x<w; x++){ |
2795 | int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2796 | v >>= FRAC_BITS; | |
2797 | if(v&(~255)) v= ~(v>>31); | |
2798 | dst8[x + y*ref_stride]= v; | |
2799 | } | |
2800 | } | |
2801 | }else{ | |
f9e6ebf7 | 2802 | for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){ |
715a97f0 MN |
2803 | for(x=0; x<w; x++){ |
2804 | buf[x + y*w]-= 128<<FRAC_BITS; | |
2805 | } | |
ff158dc9 | 2806 | } |
791e7b83 | 2807 | } |
ff158dc9 MN |
2808 | |
2809 | return; | |
791e7b83 MN |
2810 | } |
2811 | ||
ff158dc9 | 2812 | for(mb_x=0; mb_x<=mb_w; mb_x++){ |
fff6d4ea | 2813 | START_TIMER |
ff158dc9 | 2814 | |
715a97f0 | 2815 | add_yblock(s, buf, dst8, ref, obmc, |
ff158dc9 | 2816 | block_w*mb_x - block_w/2, |
791e7b83 | 2817 | block_w*mb_y - block_w/2, |
ff158dc9 | 2818 | block_w, block_w, |
791e7b83 | 2819 | w, h, |
ff158dc9 MN |
2820 | w, ref_stride, obmc_stride, |
2821 | mb_x - 1, mb_y - 1, | |
2822 | add, plane_index); | |
2823 | ||
2824 | STOP_TIMER("add_yblock") | |
791e7b83 | 2825 | } |
fff6d4ea | 2826 | |
f9e6ebf7 LM |
2827 | STOP_TIMER("predict_slice") |
2828 | } | |
2829 | ||
2830 | static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ | |
2831 | const int mb_h= s->b_height << s->block_max_depth; | |
2832 | int mb_y; | |
2833 | for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2834 | predict_slice(s, buf, plane_index, add, mb_y); | |
791e7b83 MN |
2835 | } |
2836 | ||
2837 | static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){ | |
2838 | const int level= b->level; | |
2839 | const int w= b->width; | |
2840 | const int h= b->height; | |
c97de57c MN |
2841 | const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); |
2842 | const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | |
da66b631 MN |
2843 | int x,y, thres1, thres2; |
2844 | START_TIMER | |
791e7b83 | 2845 | |
93fbdb5a MN |
2846 | if(s->qlog == LOSSLESS_QLOG) return; |
2847 | ||
791e7b83 | 2848 | bias= bias ? 0 : (3*qmul)>>3; |
da66b631 MN |
2849 | thres1= ((qmul - bias)>>QEXPSHIFT) - 1; |
2850 | thres2= 2*thres1; | |
791e7b83 MN |
2851 | |
2852 | if(!bias){ | |
2853 | for(y=0; y<h; y++){ | |
2854 | for(x=0; x<w; x++){ | |
da66b631 MN |
2855 | int i= src[x + y*stride]; |
2856 | ||
2857 | if((unsigned)(i+thres1) > thres2){ | |
2858 | if(i>=0){ | |
2859 | i<<= QEXPSHIFT; | |
2860 | i/= qmul; //FIXME optimize | |
2861 | src[x + y*stride]= i; | |
2862 | }else{ | |
2863 | i= -i; | |
2864 | i<<= QEXPSHIFT; | |
2865 | i/= qmul; //FIXME optimize | |
2866 | src[x + y*stride]= -i; | |
2867 | } | |
2868 | }else | |
2869 | src[x + y*stride]= 0; | |
791e7b83 MN |
2870 | } |
2871 | } | |
2872 | }else{ | |
2873 | for(y=0; y<h; y++){ | |
2874 | for(x=0; x<w; x++){ | |
2875 | int i= src[x + y*stride]; | |
2876 | ||
da66b631 MN |
2877 | if((unsigned)(i+thres1) > thres2){ |
2878 | if(i>=0){ | |
2879 | i<<= QEXPSHIFT; | |
2880 | i= (i + bias) / qmul; //FIXME optimize | |
2881 | src[x + y*stride]= i; | |
2882 | }else{ | |
2883 | i= -i; | |
2884 | i<<= QEXPSHIFT; | |
2885 | i= (i + bias) / qmul; //FIXME optimize | |
2886 | src[x + y*stride]= -i; | |
2887 | } | |
2888 | }else | |
2889 | src[x + y*stride]= 0; | |
791e7b83 MN |
2890 | } |
2891 | } | |
2892 | } | |
da66b631 MN |
2893 | if(level+1 == s->spatial_decomposition_count){ |
2894 | // STOP_TIMER("quantize") | |
2895 | } | |
791e7b83 MN |
2896 | } |
2897 | ||
a0d1931c Y |
2898 | static void dequantize_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride){ |
2899 | const int w= b->width; | |
2900 | const int h= b->height; | |
c97de57c MN |
2901 | const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); |
2902 | const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | |
a0d1931c Y |
2903 | const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
2904 | int x,y; | |
2905 | START_TIMER | |
2906 | ||
2907 | if(s->qlog == LOSSLESS_QLOG) return; | |
2908 | ||
a0d1931c Y |
2909 | for(y=0; y<h; y++){ |
2910 | // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); | |
2911 | DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | |
2912 | for(x=0; x<w; x++){ | |
2913 | int i= line[x]; | |
2914 | if(i<0){ | |
2915 | line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
2916 | }else if(i>0){ | |
2917 | line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
2918 | } | |
2919 | } | |
2920 | } | |
2921 | if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ | |
2922 | STOP_TIMER("dquant") | |
2923 | } | |
2924 | } | |
2925 | ||
791e7b83 | 2926 | static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){ |
791e7b83 MN |
2927 | const int w= b->width; |
2928 | const int h= b->height; | |
c97de57c MN |
2929 | const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); |
2930 | const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | |
791e7b83 MN |
2931 | const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
2932 | int x,y; | |
ea7d9cd4 | 2933 | START_TIMER |
791e7b83 | 2934 | |
93fbdb5a MN |
2935 | if(s->qlog == LOSSLESS_QLOG) return; |
2936 | ||
791e7b83 MN |
2937 | for(y=0; y<h; y++){ |
2938 | for(x=0; x<w; x++){ | |
2939 | int i= src[x + y*stride]; | |
2940 | if(i<0){ | |
2941 | src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
2942 | }else if(i>0){ | |
2943 | src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
2944 | } | |
2945 | } | |
2946 | } | |
ea7d9cd4 MN |
2947 | if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ |
2948 | STOP_TIMER("dquant") | |
2949 | } | |
791e7b83 MN |
2950 | } |
2951 | ||
2952 | static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ | |
2953 | const int w= b->width; | |
2954 | const int h= b->height; | |
2955 | int x,y; | |
2956 | ||
2957 | for(y=h-1; y>=0; y--){ | |
2958 | for(x=w-1; x>=0; x--){ | |
2959 | int i= x + y*stride; | |
2960 | ||
2961 | if(x){ | |
2962 | if(use_median){ | |
2963 | if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
2964 | else src[i] -= src[i - 1]; | |
2965 | }else{ | |
2966 | if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
2967 | else src[i] -= src[i - 1]; | |
2968 | } | |
2969 | }else{ | |
2970 | if(y) src[i] -= src[i - stride]; | |
2971 | } | |
2972 | } | |
2973 | } | |
2974 | } | |
2975 | ||
a0d1931c Y |
2976 | static void correlate_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ |
2977 | const int w= b->width; | |
2978 | const int h= b->height; | |
2979 | int x,y; | |
2980 | ||
2981 | // START_TIMER | |
2982 | ||
2983 | DWTELEM * line; | |
2984 | DWTELEM * prev; | |
2985 | ||
2986 | for(y=0; y<h; y++){ | |
2987 | prev = line; | |
2988 | // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); | |
2989 | line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | |
2990 | for(x=0; x<w; x++){ | |
2991 | if(x){ | |
2992 | if(use_median){ | |
2993 | if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); | |
2994 | else line[x] += line[x - 1]; | |
2995 | }else{ | |
2996 | if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); | |
2997 | else line[x] += line[x - 1]; | |
2998 | } | |
2999 | }else{ | |
3000 | if(y) line[x] += prev[x]; | |
3001 | } | |
3002 | } | |
3003 | } | |
3004 | ||
3005 | // STOP_TIMER("correlate") | |
3006 | } | |
3007 | ||
791e7b83 MN |
3008 | static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ |
3009 | const int w= b->width; | |
3010 | const int h= b->height; | |
3011 | int x,y; | |
3012 | ||
3013 | for(y=0; y<h; y++){ | |
3014 | for(x=0; x<w; x++){ | |
3015 | int i= x + y*stride; | |
3016 | ||
3017 | if(x){ | |
3018 | if(use_median){ | |
3019 | if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3020 | else src[i] += src[i - 1]; | |
3021 | }else{ | |
3022 | if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3023 | else src[i] += src[i - 1]; | |
3024 | } | |
3025 | }else{ | |
3026 | if(y) src[i] += src[i - stride]; | |
3027 | } | |
3028 | } | |
3029 | } | |
3030 | } | |
3031 | ||
3032 | static void encode_header(SnowContext *s){ | |
3033 | int plane_index, level, orientation; | |
28869757 MN |
3034 | uint8_t kstate[32]; |
3035 | ||
3036 | memset(kstate, MID_STATE, sizeof(kstate)); | |
791e7b83 | 3037 | |
28869757 | 3038 | put_rac(&s->c, kstate, s->keyframe); |
19aa028d MN |
3039 | if(s->keyframe || s->always_reset) |
3040 | reset_contexts(s); | |
791e7b83 MN |
3041 | if(s->keyframe){ |
3042 | put_symbol(&s->c, s->header_state, s->version, 0); | |
28869757 | 3043 | put_rac(&s->c, s->header_state, s->always_reset); |
791e7b83 MN |
3044 | put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); |
3045 | put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); | |
3046 | put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); | |
3047 | put_symbol(&s->c, s->header_state, s->colorspace_type, 0); | |
791e7b83 MN |
3048 | put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); |
3049 | put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); | |
28869757 MN |
3050 | put_rac(&s->c, s->header_state, s->spatial_scalability); |
3051 | // put_rac(&s->c, s->header_state, s->rate_scalability); | |
791e7b83 MN |
3052 | |
3053 | for(plane_index=0; plane_index<2; plane_index++){ | |
3054 | for(level=0; level<s->spatial_decomposition_count; level++){ | |
3055 | for(orientation=level ? 1:0; orientation<4; orientation++){ | |
3056 | if(orientation==2) continue; | |
3057 | put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); | |
3058 | } | |
3059 | } | |
3060 | } | |
3061 | } | |
3062 | put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0); | |
3063 | put_symbol(&s->c, s->header_state, s->qlog, 1); | |
3064 | put_symbol(&s->c, s->header_state, s->mv_scale, 0); | |
3065 | put_symbol(&s->c, s->header_state, s->qbias, 1); | |
155ec6ed | 3066 | put_symbol(&s->c, s->header_state, s->block_max_depth, 0); |
791e7b83 MN |
3067 | } |
3068 | ||
3069 | static int decode_header(SnowContext *s){ | |
3070 | int plane_index, level, orientation; | |
28869757 MN |
3071 | uint8_t kstate[32]; |
3072 | ||
3073 | memset(kstate, MID_STATE, sizeof(kstate)); | |
791e7b83 | 3074 | |
28869757 | 3075 | s->keyframe= get_rac(&s->c, kstate); |
19aa028d MN |
3076 | if(s->keyframe || s->always_reset) |
3077 | reset_contexts(s); | |
791e7b83 MN |
3078 | if(s->keyframe){ |
3079 | s->version= get_symbol(&s->c, s->header_state, 0); | |
3080 | if(s->version>0){ | |
3081 | av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version); | |
3082 | return -1; | |
3083 | } | |
28869757 | 3084 | s->always_reset= get_rac(&s->c, s->header_state); |
791e7b83 MN |
3085 | s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); |
3086 | s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
3087 | s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
3088 | s->colorspace_type= get_symbol(&s->c, s->header_state, 0); | |
791e7b83 MN |
3089 | s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); |
3090 | s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); | |
28869757 MN |
3091 | s->spatial_scalability= get_rac(&s->c, s->header_state); |
3092 | // s->rate_scalability= get_rac(&s->c, s->header_state); | |
791e7b83 MN |
3093 | |
3094 | for(plane_index=0; plane_index<3; plane_index++){ | |
3095 | for(level=0; level<s->spatial_decomposition_count; level++){ | |
3096 | for(orientation=level ? 1:0; orientation<4; orientation++){ | |
3097 | int q; | |
3098 | if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; | |
3099 | else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; | |
3100 | else q= get_symbol(&s->c, s->header_state, 1); | |
3101 | s->plane[plane_index].band[level][orientation].qlog= q; | |
3102 | } | |
3103 | } | |
3104 | } | |
3105 | } | |
3106 | ||
3107 | s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0); | |
3108 | if(s->spatial_decomposition_type > 2){ | |
3109 | av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); | |
3110 | return -1; | |
3111 | } | |
3112 | ||
3113 | s->qlog= get_symbol(&s->c, s->header_state, 1); | |
3114 | s->mv_scale= get_symbol(&s->c, s->header_state, 0); | |
3115 | s->qbias= get_symbol(&s->c, s->header_state, 1); | |
155ec6ed | 3116 | s->block_max_depth= get_symbol(&s->c, s->header_state, 0); |
791e7b83 MN |
3117 | |
3118 | return 0; | |
3119 | } | |
3120 | ||
c97de57c MN |
3121 | static void init_qexp(){ |
3122 | int i; | |
3123 | double v=128; | |
3124 | ||
3125 | for(i=0; i<QROOT; i++){ | |
3126 | qexp[i]= lrintf(v); | |
3127 | v *= pow(2, 1.0 / QROOT); | |
3128 | } | |
3129 | } | |
3130 | ||
791e7b83 MN |
3131 | static int common_init(AVCodecContext *avctx){ |
3132 | SnowContext *s = avctx->priv_data; | |
3133 | int width, height; | |
3134 | int level, orientation, plane_index, dec; | |
3135 | ||
3136 | s->avctx= avctx; | |
3137 | ||
3138 | dsputil_init(&s->dsp, avctx); | |
3139 | ||
3140 | #define mcf(dx,dy)\ | |
3141 | s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |
3142 | s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ | |
d92b5807 MN |
3143 | s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ |
3144 | s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ | |
3145 | s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ | |
3146 | s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; | |
791e7b83 MN |
3147 | |
3148 | mcf( 0, 0) | |
3149 | mcf( 4, 0) | |
3150 | mcf( 8, 0) | |
3151 | mcf(12, 0) | |
3152 | mcf( 0, 4) | |
3153 | mcf( 4, 4) | |
3154 | mcf( 8, 4) | |
3155 | mcf(12, 4) | |
3156 | mcf( 0, 8) | |
3157 | mcf( 4, 8) | |
3158 | mcf( 8, 8) | |
3159 | mcf(12, 8) | |
3160 | mcf( 0,12) | |
3161 | mcf( 4,12) | |
3162 | mcf( 8,12) | |
3163 | mcf(12,12) | |
3164 | ||
3165 | #define mcfh(dx,dy)\ | |
3166 | s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ | |
3167 | s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ | |
d92b5807 MN |
3168 | mc_block_hpel ## dx ## dy ## 16;\ |
3169 | s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ | |
3170 | s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ | |
3171 | mc_block_hpel ## dx ## dy ## 8; | |
791e7b83 MN |
3172 | |
3173 | mcfh(0, 0) | |
3174 | mcfh(8, 0) | |
3175 | mcfh(0, 8) | |
3176 | mcfh(8, 8) | |
c97de57c MN |
3177 | |
3178 | if(!qexp[0]) | |
3179 | init_qexp(); | |
3180 | ||
791e7b83 MN |
3181 | dec= s->spatial_decomposition_count= 5; |
3182 | s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type | |
3183 | ||
3184 | s->chroma_h_shift= 1; //FIXME XXX | |
3185 | s->chroma_v_shift= 1; | |
3186 | ||
3187 | // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); | |
3188 | ||
155ec6ed MN |
3189 | width= s->avctx->width; |
3190 | height= s->avctx->height; | |
3191 | ||
3192 | s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); | |
791e7b83 MN |
3193 | |
3194 | s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; | |
155ec6ed | 3195 | s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0; |
791e7b83 MN |
3196 | |
3197 | for(plane_index=0; plane_index<3; plane_index++){ | |
3198 | int w= s->avctx->width; | |
3199 | int h= s->avctx->height; | |
3200 | ||
3201 | if(plane_index){ | |
3202 | w>>= s->chroma_h_shift; | |
3203 | h>>= s->chroma_v_shift; | |
3204 | } | |
3205 | s->plane[plane_index].width = w; | |
3206 | s->plane[plane_index].height= h; | |
3bb9f096 | 3207 | //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h); |
791e7b83 MN |
3208 | for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
3209 | for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3210 | SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
3211 | ||
3212 | b->buf= s->spatial_dwt_buffer; | |
3213 | b->level= level; | |
3214 | b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | |
3215 | b->width = (w + !(orientation&1))>>1; | |
3216 | b->height= (h + !(orientation>1))>>1; | |
3217 | ||
a0d1931c Y |
3218 | b->stride_line = 1 << (s->spatial_decomposition_count - level); |
3219 | b->buf_x_offset = 0; | |
3220 | b->buf_y_offset = 0; | |
3221 | ||
3222 | if(orientation&1){ | |
3223 | b->buf += (w+1)>>1; | |
3224 | b->buf_x_offset = (w+1)>>1; | |
3225 | } | |
3226 | if(orientation>1){ | |
3227 | b->buf += b->stride>>1; | |
3228 | b->buf_y_offset = b->stride_line >> 1; | |
3229 | } | |
791e7b83 MN |
3230 | |
3231 | if(level) | |
3232 | b->parent= &s->plane[plane_index].band[level-1][orientation]; | |
a0d1931c | 3233 | b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); |
791e7b83 MN |
3234 | } |
3235 | w= (w+1)>>1; | |
3236 | h= (h+1)>>1; | |
3237 | } | |
3238 | } | |
3239 | ||
791e7b83 MN |
3240 | reset_contexts(s); |
3241 | /* | |
3242 | width= s->width= avctx->width; | |
3243 | height= s->height= avctx->height; | |
3244 | ||
3245 | assert(width && height); | |
3246 | */ | |
3247 | s->avctx->get_buffer(s->avctx, &s->mconly_picture); | |
3248 | ||
3249 | return 0; | |
3250 | } | |
3251 | ||
3252 | ||
3253 | static void calculate_vissual_weight(SnowContext *s, Plane *p){ | |
3254 | int width = p->width; | |
3255 | int height= p->height; | |
39c61bbb | 3256 | int level, orientation, x, y; |
791e7b83 MN |
3257 | |
3258 | for(level=0; level<s->spatial_decomposition_count; level++){ | |
3259 | for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3260 | SubBand *b= &p->band[level][orientation]; | |
3261 | DWTELEM *buf= b->buf; | |
3262 | int64_t error=0; | |
3263 | ||
3264 | memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height); | |
3265 | buf[b->width/2 + b->height/2*b->stride]= 256*256; | |
46c281e8 | 3266 | ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); |
791e7b83 MN |
3267 | for(y=0; y<height; y++){ |
3268 | for(x=0; x<width; x++){ | |
3269 | int64_t d= s->spatial_dwt_buffer[x + y*width]; | |
3270 | error += d*d; | |
3271 | } | |
3272 | } | |
3273 | ||
3274 | b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); | |
46c281e8 | 3275 | // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/); |
791e7b83 MN |
3276 | } |
3277 | } | |
3278 | } | |
3279 | ||
3280 | static int encode_init(AVCodecContext *avctx) | |
3281 | { | |
3282 | SnowContext *s = avctx->priv_data; | |
39c61bbb | 3283 | int plane_index; |
791e7b83 | 3284 | |
2ff9ff5b MN |
3285 | if(avctx->strict_std_compliance >= 0){ |
3286 | av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n" | |
53513831 | 3287 | "use vstrict=-1 / -strict -1 to use it anyway\n"); |
2ff9ff5b MN |
3288 | return -1; |
3289 | } | |
3290 | ||
791e7b83 | 3291 | common_init(avctx); |
155ec6ed | 3292 | alloc_blocks(s); |
791e7b83 MN |
3293 | |
3294 | s->version=0; | |
3295 | ||
3296 | s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); | |
3297 | s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
3298 | s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
791e7b83 MN |
3299 | h263_encode_init(&s->m); //mv_penalty |
3300 | ||
3301 | for(plane_index=0; plane_index<3; plane_index++){ | |
3302 | calculate_vissual_weight(s, &s->plane[plane_index]); | |
3303 | } | |
3304 | ||
3305 | ||
3306 | avctx->coded_frame= &s->current_picture; | |
3307 | switch(avctx->pix_fmt){ | |
3308 | // case PIX_FMT_YUV444P: | |
3309 | // case PIX_FMT_YUV422P: | |
3310 | case PIX_FMT_YUV420P: | |
3311 | case PIX_FMT_GRAY8: | |
3312 | // case PIX_FMT_YUV411P: | |
3313 | // case PIX_FMT_YUV410P: | |
3314 | s->colorspace_type= 0; | |
3315 | break; | |
3316 | /* case PIX_FMT_RGBA32: | |
3317 | s->colorspace= 1; | |
3318 | break;*/ | |
3319 | default: | |
3320 | av_log(avctx, AV_LOG_ERROR, "format not supported\n"); | |
3321 | return -1; | |
3322 | } | |
3323 | // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | |
3324 | s->chroma_h_shift= 1; | |
3325 | s->chroma_v_shift= 1; | |
3326 | return 0; | |
3327 | } | |
3328 | ||
3329 | static int frame_start(SnowContext *s){ | |
3330 | AVFrame tmp; | |
64886072 MN |
3331 | int w= s->avctx->width; //FIXME round up to x16 ? |
3332 | int h= s->avctx->height; | |
791e7b83 | 3333 | |
64886072 MN |
3334 | if(s->current_picture.data[0]){ |
3335 | draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); | |
3336 | draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); | |
3337 | draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); | |
3338 | } | |
3339 | ||
791e7b83 MN |
3340 | tmp= s->last_picture; |
3341 | s->last_picture= s->current_picture; | |
3342 | s->current_picture= tmp; | |
3343 | ||
3344 | s->current_picture.reference= 1; | |
3345 | if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ | |
3346 | av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
3347 | return -1; | |
3348 | } | |
3349 | ||
3350 | return 0; | |
3351 | } | |
3352 | ||
3353 | static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ | |
3354 | SnowContext *s = avctx->priv_data; | |
28869757 | 3355 | RangeCoder * const c= &s->c; |
791e7b83 MN |
3356 | AVFrame *pict = data; |
3357 | const int width= s->avctx->width; | |
3358 | const int height= s->avctx->height; | |
39c61bbb | 3359 | int level, orientation, plane_index; |
791e7b83 | 3360 | |
28869757 MN |
3361 | ff_init_range_encoder(c, buf, buf_size); |
3362 | ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
791e7b83 MN |
3363 | |
3364 | s->input_picture = *pict; | |
3365 | ||
791e7b83 MN |
3366 | s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; |
3367 | pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; | |
3368 | ||
93fbdb5a MN |
3369 | if(pict->quality){ |
3370 | s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2)); | |
3371 | //<64 >60 | |
c97de57c | 3372 | s->qlog += 61*QROOT/8; |
93fbdb5a MN |
3373 | }else{ |
3374 | s->qlog= LOSSLESS_QLOG; | |
3375 | } | |
791e7b83 | 3376 | |
791e7b83 | 3377 | frame_start(s); |
19aa028d | 3378 | s->current_picture.key_frame= s->keyframe; |
791e7b83 MN |
3379 | |
3380 | if(pict->pict_type == P_TYPE){ | |
3381 | int block_width = (width +15)>>4; | |
3382 | int block_height= (height+15)>>4; | |
3383 | int stride= s->current_picture.linesize[0]; | |
791e7b83 MN |
3384 | |
3385 | assert(s->current_picture.data[0]); | |
3386 | assert(s->last_picture.data[0]); | |
3387 | ||
3388 | s->m.avctx= s->avctx; | |
3389 | s->m.current_picture.data[0]= s->current_picture.data[0]; | |
3390 | s->m. last_picture.data[0]= s-> last_picture.data[0]; | |
3391 | s->m. new_picture.data[0]= s-> input_picture.data[0]; | |
3392 | s->m.current_picture_ptr= &s->m.current_picture; | |
3393 | s->m. last_picture_ptr= &s->m. last_picture; | |
3394 | s->m.linesize= | |
3395 | s->m. last_picture.linesize[0]= | |
3396 | s->m. new_picture.linesize[0]= | |
3397 | s->m.current_picture.linesize[0]= stride; | |
155ec6ed | 3398 | s->m.uvlinesize= s->current_picture.linesize[1]; |
791e7b83 MN |
3399 | s->m.width = width; |
3400 | s->m.height= height; | |
3401 | s->m.mb_width = block_width; | |
3402 | s->m.mb_height= block_height; | |
3403 | s->m.mb_stride= s->m.mb_width+1; | |
3404 | s->m.b8_stride= 2*s->m.mb_width+1; | |
3405 | s->m.f_code=1; | |
3406 | s->m.pict_type= pict->pict_type; | |
3407 | s->m.me_method= s->avctx->me_method; | |
3408 | s->m.me.scene_change_score=0; | |
3409 | s->m.flags= s->avctx->flags; | |
3410 | s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; | |
3411 | s->m.out_format= FMT_H263; | |
3412 | s->m.unrestricted_mv= 1; | |
3413 | ||
155ec6ed | 3414 | s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else |
791e7b83 | 3415 | s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); |
155ec6ed | 3416 | s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; |
791e7b83 | 3417 | |
791e7b83 MN |
3418 | s->m.dsp= s->dsp; //move |
3419 | ff_init_me(&s->m); | |
791e7b83 | 3420 | } |
791e7b83 | 3421 | |
155ec6ed MN |
3422 | redo_frame: |
3423 | ||
791e7b83 MN |
3424 | s->qbias= pict->pict_type == P_TYPE ? 2 : 0; |
3425 | ||
3426 | encode_header(s); | |
155ec6ed MN |
3427 | encode_blocks(s); |
3428 | ||
791e7b83 MN |
3429 | for(plane_index=0; plane_index<3; plane_index++){ |
3430 | Plane *p= &s->plane[plane_index]; | |
3431 | int w= p->width; | |
3432 | int h= p->height; | |
3433 | int x, y; | |
39c61bbb | 3434 | // int bits= put_bits_count(&s->c.pb); |
791e7b83 MN |
3435 | |
3436 | //FIXME optimize | |
791e7b83 MN |
3437 | if(pict->data[plane_index]) //FIXME gray hack |
3438 | for(y=0; y<h; y++){ | |
3439 | for(x=0; x<w; x++){ | |
034aff03 | 3440 | s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; |
791e7b83 MN |
3441 | } |
3442 | } | |
3443 | predict_plane(s, s->spatial_dwt_buffer, plane_index, 0); | |
155ec6ed MN |
3444 | |
3445 | if( plane_index==0 | |
3446 | && pict->pict_type == P_TYPE | |
3447 | && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ | |
28869757 MN |
3448 | ff_init_range_encoder(c, buf, buf_size); |
3449 | ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
155ec6ed MN |
3450 | pict->pict_type= FF_I_TYPE; |
3451 | s->keyframe=1; | |
3452 | reset_contexts(s); | |
3453 | goto redo_frame; | |
3454 | } | |
3455 | ||
93fbdb5a MN |
3456 | if(s->qlog == LOSSLESS_QLOG){ |
3457 | for(y=0; y<h; y++){ | |
3458 | for(x=0; x<w; x++){ | |
3cff4572 | 3459 | s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; |
93fbdb5a MN |
3460 | } |
3461 | } | |
3462 | } | |
791e7b83 | 3463 | |
46c281e8 | 3464 | ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); |
93fbdb5a | 3465 | |
791e7b83 MN |
3466 | for(level=0; level<s->spatial_decomposition_count; level++){ |
3467 | for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3468 | SubBand *b= &p->band[level][orientation]; | |
3469 | ||
3470 | quantize(s, b, b->buf, b->stride, s->qbias); | |
3471 | if(orientation==0) | |
3472 | decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0); | |
3473 | encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation); | |
3474 | assert(b->parent==NULL || b->parent->stride == b->stride*2); | |
3475 | if(orientation==0) | |
3476 | correlate(s, b, b->buf, b->stride, 1, 0); | |
3477 | } | |
3478 | } | |
3479 | // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits); | |
3480 | ||
3481 | for(level=0; level<s->spatial_decomposition_count; level++){ | |
3482 | for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3483 | SubBand *b= &p->band[level][orientation]; | |
3484 | ||
3485 | dequantize(s, b, b->buf, b->stride); | |
3486 | } | |
3487 | } | |
93fbdb5a | 3488 | |
46c281e8 | 3489 | ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); |
93fbdb5a MN |
3490 | if(s->qlog == LOSSLESS_QLOG){ |
3491 | for(y=0; y<h; y++){ | |
3492 | for(x=0; x<w; x++){ | |
034aff03 | 3493 | s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS; |
93fbdb5a MN |
3494 | } |
3495 | } | |
3496 | } | |
715a97f0 | 3497 | {START_TIMER |
791e7b83 | 3498 | predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); |
715a97f0 | 3499 | STOP_TIMER("pred-conv")} |
791e7b83 MN |
3500 | if(s->avctx->flags&CODEC_FLAG_PSNR){ |
3501 | int64_t error= 0; | |
3502 | ||
3503 | if(pict->data[plane_index]) //FIXME gray hack | |
3504 | for(y=0; y<h; y++){ | |
3505 | for(x=0; x<w; x++){ | |
93fbdb5a | 3506 | int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
791e7b83 MN |
3507 | error += d*d; |
3508 | } | |
3509 | } | |
791e7b83 | 3510 | s->avctx->error[plane_index] += error; |
bd368b56 | 3511 | s->current_picture.error[plane_index] = error; |
791e7b83 MN |
3512 | } |
3513 | } | |
3514 | ||
3515 | if(s->last_picture.data[0]) | |
3516 | avctx->release_buffer(avctx, &s->last_picture); | |
3517 | ||
3518 | emms_c(); | |
3519 | ||
28869757 | 3520 | return ff_rac_terminate(c); |
791e7b83 MN |
3521 | } |
3522 | ||
3523 | static void common_end(SnowContext *s){ | |
7b49c309 MN |
3524 | int plane_index, level, orientation; |
3525 | ||
791e7b83 | 3526 | av_freep(&s->spatial_dwt_buffer); |
791e7b83 MN |
3527 | |
3528 | av_freep(&s->m.me.scratchpad); | |
3529 | av_freep(&s->m.me.map); | |
3530 | av_freep(&s->m.me.score_map); | |
155ec6ed MN |
3531 | |
3532 | av_freep(&s->block); | |
7b49c309 MN |
3533 | |
3534 | for(plane_index=0; plane_index<3; plane_index++){ | |
3535 | for(level=s->spatial_decomposition_count-1; level>=0; level--){ | |
3536 | for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3537 | SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
3538 | ||
a0d1931c | 3539 | av_freep(&b->x_coeff); |
7b49c309 MN |
3540 | } |
3541 | } | |
3542 | } | |
791e7b83 MN |
3543 | } |
3544 | ||
3545 | static int encode_end(AVCodecContext *avctx) | |
3546 | { | |
3547 | SnowContext *s = avctx->priv_data; | |
3548 | ||
3549 | common_end(s); | |
3550 | ||
3551 | return 0; | |
3552 | } | |
3553 | ||
3554 | static int decode_init(AVCodecContext *avctx) | |
3555 | { | |
a0d1931c Y |
3556 | SnowContext *s = avctx->priv_data; |
3557 | int block_size; | |
791e7b83 MN |
3558 | |
3559 | common_init(avctx); | |
3560 | ||
a0d1931c Y |
3561 | block_size = MB_SIZE >> s->block_max_depth; |
3562 | /* FIXME block_size * 2 is determined empirically. block_size * 1.5 is definitely needed, but I (Robert) cannot figure out why more than that is needed. Perhaps there is a bug, or perhaps I overlooked some demands that are placed on the buffer. */ | |
3563 | /* FIXME The formula is WRONG. For height > 480, the buffer will overflow. */ | |
3564 | /* FIXME For now, I will use a full frame of lines. Fortunately, this should not materially effect cache performance because lines are allocated using a stack, so if in fact only 50 out of 496 lines are needed at a time, the other 446 will sit allocated but never accessed. */ | |
3565 | // slice_buffer_init(s->plane[0].sb, s->plane[0].height, (block_size * 2) + (s->spatial_decomposition_count * s->spatial_decomposition_count), s->plane[0].width, s->spatial_dwt_buffer); | |
3566 | slice_buffer_init(&s->sb, s->plane[0].height, s->plane[0].height, s->plane[0].width, s->spatial_dwt_buffer); | |
3567 | ||
791e7b83 MN |
3568 | return 0; |
3569 | } | |
3570 | ||
3571 | static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ | |
3572 | SnowContext *s = avctx->priv_data; | |
28869757 | 3573 | RangeCoder * const c= &s->c; |
791e7b83 MN |
3574 | int bytes_read; |
3575 | AVFrame *picture = data; | |
39c61bbb | 3576 | int level, orientation, plane_index; |
791e7b83 | 3577 | |
28869757 MN |
3578 | ff_init_range_decoder(c, buf, buf_size); |
3579 | ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
791e7b83 | 3580 | |
791e7b83 MN |
3581 | s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P |
3582 | decode_header(s); | |
155ec6ed | 3583 | if(!s->block) alloc_blocks(s); |
791e7b83 MN |
3584 | |
3585 | frame_start(s); | |
3586 | //keyframe flag dupliaction mess FIXME | |
3587 | if(avctx->debug&FF_DEBUG_PICT_INFO) | |
3588 | av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); | |
3589 | ||
155ec6ed | 3590 | decode_blocks(s); |
791e7b83 MN |
3591 | |
3592 | for(plane_index=0; plane_index<3; plane_index++){ | |
3593 | Plane *p= &s->plane[plane_index]; | |
3594 | int w= p->width; | |
3595 | int h= p->height; | |
3596 | int x, y; | |
a0d1931c Y |
3597 | int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ |
3598 | SubBand * correlate_band; | |
791e7b83 MN |
3599 | |
3600 | if(s->avctx->debug&2048){ | |
3601 | memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | |
3602 | predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); | |
3603 | ||
3604 | for(y=0; y<h; y++){ | |
3605 | for(x=0; x<w; x++){ | |
715a97f0 | 3606 | int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; |
791e7b83 MN |
3607 | s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; |
3608 | } | |
3609 | } | |
3610 | } | |
791e7b83 | 3611 | |
a0d1931c Y |
3612 | { START_TIMER |
3613 | for(level=0; level<s->spatial_decomposition_count; level++){ | |
3614 | for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3615 | SubBand *b= &p->band[level][orientation]; | |
3616 | unpack_coeffs(s, b, b->parent, orientation); | |
791e7b83 | 3617 | } |
a0d1931c Y |
3618 | } |
3619 | STOP_TIMER("unpack coeffs"); | |
3620 | } | |
3621 | ||
3622 | /* Handle level 0, orientation 0 specially. It is particularly resistant to slicing but fortunately quite small, so process it in one pass. */ | |
3623 | correlate_band = &p->band[0][0]; | |
3624 | decode_subband_slice_buffered(s, correlate_band, &s->sb, 0, correlate_band->height, decode_state[0][0]); | |
3625 | correlate_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0); | |
3626 | dequantize_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride); | |
791e7b83 | 3627 | |
f9e6ebf7 LM |
3628 | {START_TIMER |
3629 | const int mb_h= s->b_height << s->block_max_depth; | |
3630 | const int block_size = MB_SIZE >> s->block_max_depth; | |
3631 | const int block_w = plane_index ? block_size/2 : block_size; | |
3632 | int mb_y; | |
3633 | dwt_compose_t cs[MAX_DECOMPOSITIONS]; | |
3634 | int yd=0, yq=0; | |
a0d1931c Y |
3635 | int y; |
3636 | int end_y; | |
f9e6ebf7 | 3637 | |
a0d1931c | 3638 | ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); |
f9e6ebf7 | 3639 | for(mb_y=0; mb_y<=mb_h; mb_y++){ |
a0d1931c Y |
3640 | |
3641 | const int slice_starty = block_w*mb_y; | |
f9e6ebf7 | 3642 | const int slice_h = block_w*(mb_y+1); |
f9e6ebf7 | 3643 | |
a0d1931c Y |
3644 | { |
3645 | START_TIMER | |
3646 | for(level=0; level<s->spatial_decomposition_count; level++){ | |
3647 | for(orientation=level ? 1 : 1; orientation<4; orientation++){ | |
3648 | SubBand *b= &p->band[level][orientation]; | |
3649 | int start_y; | |
3650 | int end_y; | |
3651 | int our_mb_start = mb_y; | |
3652 | int our_mb_end = (mb_y + 1); | |
3653 | start_y = FFMIN(b->height, (mb_y ? ((block_w * our_mb_start - 4) >> (s->spatial_decomposition_count - level)) + 5 : 0)); | |
3654 | end_y = FFMIN(b->height, (((block_w * our_mb_end - 4) >> (s->spatial_decomposition_count - level)) + 5)); | |
3655 | ||
3656 | if (start_y != end_y) | |
3657 | decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | |
3658 | } | |
3659 | } | |
3660 | STOP_TIMER("decode_subband_slice"); | |
3661 | } | |
3662 | ||
3663 | { START_TIMER | |
3664 | for(; yd<slice_h; yd+=4){ | |
3665 | ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | |
3666 | } | |
3667 | STOP_TIMER("idwt slice");} | |
3668 | ||
3669 | ||
93fbdb5a | 3670 | if(s->qlog == LOSSLESS_QLOG){ |
f9e6ebf7 | 3671 | for(; yq<slice_h && yq<h; yq++){ |
a0d1931c | 3672 | DWTELEM * line = slice_buffer_get_line(&s->sb, yq); |
93fbdb5a | 3673 | for(x=0; x<w; x++){ |
a0d1931c | 3674 | line[x] <<= FRAC_BITS; |
93fbdb5a MN |
3675 | } |
3676 | } | |
3677 | } | |
f9e6ebf7 | 3678 | |
a0d1931c Y |
3679 | predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y); |
3680 | ||
3681 | /* Nasty hack based empirically on how predict_slice_buffered() hits the buffer. */ | |
3682 | /* FIXME If possible, make predict_slice fit into the slice. As of now, it works on some previous lines (up to slice_height / 2) if the condition on the next line is false. */ | |
3683 | if (s->keyframe || (s->avctx->debug&512)){ | |
3684 | y = FFMIN(p->height, slice_starty); | |
3685 | end_y = FFMIN(p->height, slice_h); | |
3686 | } | |
3687 | else{ | |
3688 | y = FFMAX(0, FFMIN(p->height, slice_starty - (block_w >> 1))); | |
3689 | end_y = FFMAX(0, FFMIN(p->height, slice_h - (block_w >> 1))); | |
3690 | } | |
3691 | while(y < end_y) | |
3692 | slice_buffer_release(&s->sb, y++); | |
f9e6ebf7 | 3693 | } |
a0d1931c Y |
3694 | |
3695 | slice_buffer_flush(&s->sb); | |
3696 | ||
f9e6ebf7 | 3697 | STOP_TIMER("idwt + predict_slices")} |
791e7b83 MN |
3698 | } |