Add ff_ prefix to data symbols of encoders, decoders, hwaccel, parsers, bsf.
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/intmath.h"
22 #include "avcodec.h"
23 #include "dsputil.h"
24 #include "dwt.h"
25 #include "snow.h"
26
27 #include "rangecoder.h"
28 #include "mathops.h"
29
30 #include "mpegvideo.h"
31 #include "h263.h"
32
33 #undef NDEBUG
34 #include <assert.h>
35
36 static const int8_t quant3[256]={
37 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
53 };
54 static const int8_t quant3b[256]={
55 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71 };
72 static const int8_t quant3bA[256]={
73 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 };
90 static const int8_t quant5[256]={
91 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
107 };
108 static const int8_t quant7[256]={
109 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
120 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
123 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
124 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
125 };
126 static const int8_t quant9[256]={
127 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
128 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
142 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
143 };
144 static const int8_t quant11[256]={
145 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
146 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
147 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
156 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
159 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
160 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
161 };
162 static const int8_t quant13[256]={
163 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
164 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
165 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
166 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
169 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
173 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
174 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
176 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
177 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
178 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179 };
180
181 #if 0 //64*cubic
182 static const uint8_t obmc32[1024]={
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
185 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
186 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
187 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
188 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
189 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
190 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
191 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
192 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
193 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
194 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
195 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
196 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
199 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
200 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
201 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
202 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
203 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
204 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
205 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
206 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
207 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
208 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
209 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
210 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
211 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
212 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
213 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
215 //error:0.000022
216 };
217 static const uint8_t obmc16[256]={
218 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
219 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
220 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
221 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
222 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
223 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
226 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
227 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
228 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
229 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
230 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
231 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
232 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
233 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
234 //error:0.000033
235 };
236 #elif 1 // 64*linear
237 static const uint8_t obmc32[1024]={
238 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
239 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
240 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
241 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
242 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
243 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
244 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
245 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
246 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
247 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
248 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
249 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
250 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
254 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
255 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
256 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
257 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
258 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
259 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
260 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
261 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
262 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
263 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
264 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
265 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
266 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
267 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
268 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
269 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
270 //error:0.000020
271 };
272 static const uint8_t obmc16[256]={
273 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
274 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
275 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
276 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
277 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
281 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
282 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
283 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
284 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
285 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
286 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
287 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
288 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
289 //error:0.000015
290 };
291 #else //64*cos
292 static const uint8_t obmc32[1024]={
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
296 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
297 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
298 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
299 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
300 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
301 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
302 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
303 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
304 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
305 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
306 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
309 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
310 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
311 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
312 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
313 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
314 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
315 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
316 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
317 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
318 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
319 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
320 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
321 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
322 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
323 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
325 //error:0.000022
326 };
327 static const uint8_t obmc16[256]={
328 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
329 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
330 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
331 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
332 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
333 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
336 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
337 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
338 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
339 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
340 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
341 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
342 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
343 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
344 //error:0.000022
345 };
346 #endif /* 0 */
347
348 //linear *64
349 static const uint8_t obmc8[64]={
350 4, 12, 20, 28, 28, 20, 12, 4,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 20, 60,100,140,140,100, 60, 20,
353 28, 84,140,196,196,140, 84, 28,
354 28, 84,140,196,196,140, 84, 28,
355 20, 60,100,140,140,100, 60, 20,
356 12, 36, 60, 84, 84, 60, 36, 12,
357 4, 12, 20, 28, 28, 20, 12, 4,
358 //error:0.000000
359 };
360
361 //linear *64
362 static const uint8_t obmc4[16]={
363 16, 48, 48, 16,
364 48,144,144, 48,
365 48,144,144, 48,
366 16, 48, 48, 16,
367 //error:0.000000
368 };
369
370 static const uint8_t * const obmc_tab[4]={
371 obmc32, obmc16, obmc8, obmc4
372 };
373
374 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
375
376 typedef struct BlockNode{
377 int16_t mx;
378 int16_t my;
379 uint8_t ref;
380 uint8_t color[3];
381 uint8_t type;
382 //#define TYPE_SPLIT 1
383 #define BLOCK_INTRA 1
384 #define BLOCK_OPT 2
385 //#define TYPE_NOCOLOR 4
386 uint8_t level; //FIXME merge into type?
387 }BlockNode;
388
389 static const BlockNode null_block= { //FIXME add border maybe
390 .color= {128,128,128},
391 .mx= 0,
392 .my= 0,
393 .ref= 0,
394 .type= 0,
395 .level= 0,
396 };
397
398 #define LOG2_MB_SIZE 4
399 #define MB_SIZE (1<<LOG2_MB_SIZE)
400 #define ENCODER_EXTRA_BITS 4
401 #define HTAPS_MAX 8
402
403 typedef struct x_and_coeff{
404 int16_t x;
405 uint16_t coeff;
406 } x_and_coeff;
407
408 typedef struct SubBand{
409 int level;
410 int stride;
411 int width;
412 int height;
413 int qlog; ///< log(qscale)/log[2^(1/6)]
414 DWTELEM *buf;
415 IDWTELEM *ibuf;
416 int buf_x_offset;
417 int buf_y_offset;
418 int stride_line; ///< Stride measured in lines, not pixels.
419 x_and_coeff * x_coeff;
420 struct SubBand *parent;
421 uint8_t state[/*7*2*/ 7 + 512][32];
422 }SubBand;
423
424 typedef struct Plane{
425 int width;
426 int height;
427 SubBand band[MAX_DECOMPOSITIONS][4];
428
429 int htaps;
430 int8_t hcoeff[HTAPS_MAX/2];
431 int diag_mc;
432 int fast_mc;
433
434 int last_htaps;
435 int8_t last_hcoeff[HTAPS_MAX/2];
436 int last_diag_mc;
437 }Plane;
438
439 typedef struct SnowContext{
440
441 AVCodecContext *avctx;
442 RangeCoder c;
443 DSPContext dsp;
444 DWTContext dwt;
445 AVFrame new_picture;
446 AVFrame input_picture; ///< new_picture with the internal linesizes
447 AVFrame current_picture;
448 AVFrame last_picture[MAX_REF_FRAMES];
449 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
450 AVFrame mconly_picture;
451 // uint8_t q_context[16];
452 uint8_t header_state[32];
453 uint8_t block_state[128 + 32*128];
454 int keyframe;
455 int always_reset;
456 int version;
457 int spatial_decomposition_type;
458 int last_spatial_decomposition_type;
459 int temporal_decomposition_type;
460 int spatial_decomposition_count;
461 int last_spatial_decomposition_count;
462 int temporal_decomposition_count;
463 int max_ref_frames;
464 int ref_frames;
465 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
466 uint32_t *ref_scores[MAX_REF_FRAMES];
467 DWTELEM *spatial_dwt_buffer;
468 IDWTELEM *spatial_idwt_buffer;
469 int colorspace_type;
470 int chroma_h_shift;
471 int chroma_v_shift;
472 int spatial_scalability;
473 int qlog;
474 int last_qlog;
475 int lambda;
476 int lambda2;
477 int pass1_rc;
478 int mv_scale;
479 int last_mv_scale;
480 int qbias;
481 int last_qbias;
482 #define QBIAS_SHIFT 3
483 int b_width;
484 int b_height;
485 int block_max_depth;
486 int last_block_max_depth;
487 Plane plane[MAX_PLANES];
488 BlockNode *block;
489 #define ME_CACHE_SIZE 1024
490 int me_cache[ME_CACHE_SIZE];
491 int me_cache_generation;
492 slice_buffer sb;
493
494 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
495
496 uint8_t *scratchbuf;
497 }SnowContext;
498
499 #ifdef __sgi
500 // Avoid a name clash on SGI IRIX
501 #undef qexp
502 #endif
503 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
504 static uint8_t qexp[QROOT];
505
506 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
507 int i;
508
509 if(v){
510 const int a= FFABS(v);
511 const int e= av_log2(a);
512 #if 1
513 const int el= FFMIN(e, 10);
514 put_rac(c, state+0, 0);
515
516 for(i=0; i<el; i++){
517 put_rac(c, state+1+i, 1); //1..10
518 }
519 for(; i<e; i++){
520 put_rac(c, state+1+9, 1); //1..10
521 }
522 put_rac(c, state+1+FFMIN(i,9), 0);
523
524 for(i=e-1; i>=el; i--){
525 put_rac(c, state+22+9, (a>>i)&1); //22..31
526 }
527 for(; i>=0; i--){
528 put_rac(c, state+22+i, (a>>i)&1); //22..31
529 }
530
531 if(is_signed)
532 put_rac(c, state+11 + el, v < 0); //11..21
533 #else
534
535 put_rac(c, state+0, 0);
536 if(e<=9){
537 for(i=0; i<e; i++){
538 put_rac(c, state+1+i, 1); //1..10
539 }
540 put_rac(c, state+1+i, 0);
541
542 for(i=e-1; i>=0; i--){
543 put_rac(c, state+22+i, (a>>i)&1); //22..31
544 }
545
546 if(is_signed)
547 put_rac(c, state+11 + e, v < 0); //11..21
548 }else{
549 for(i=0; i<e; i++){
550 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
551 }
552 put_rac(c, state+1+9, 0);
553
554 for(i=e-1; i>=0; i--){
555 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
556 }
557
558 if(is_signed)
559 put_rac(c, state+11 + 10, v < 0); //11..21
560 }
561 #endif /* 1 */
562 }else{
563 put_rac(c, state+0, 1);
564 }
565 }
566
567 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
568 if(get_rac(c, state+0))
569 return 0;
570 else{
571 int i, e, a;
572 e= 0;
573 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
574 e++;
575 }
576
577 a= 1;
578 for(i=e-1; i>=0; i--){
579 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
580 }
581
582 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
583 return (a^e)-e;
584 }
585 }
586
587 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
588 int i;
589 int r= log2>=0 ? 1<<log2 : 1;
590
591 assert(v>=0);
592 assert(log2>=-4);
593
594 while(v >= r){
595 put_rac(c, state+4+log2, 1);
596 v -= r;
597 log2++;
598 if(log2>0) r+=r;
599 }
600 put_rac(c, state+4+log2, 0);
601
602 for(i=log2-1; i>=0; i--){
603 put_rac(c, state+31-i, (v>>i)&1);
604 }
605 }
606
607 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
608 int i;
609 int r= log2>=0 ? 1<<log2 : 1;
610 int v=0;
611
612 assert(log2>=-4);
613
614 while(get_rac(c, state+4+log2)){
615 v+= r;
616 log2++;
617 if(log2>0) r+=r;
618 }
619
620 for(i=log2-1; i>=0; i--){
621 v+= get_rac(c, state+31-i)<<i;
622 }
623
624 return v;
625 }
626
627 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
628 const int w= b->width;
629 const int h= b->height;
630 int x,y;
631
632 int run, runs;
633 x_and_coeff *xc= b->x_coeff;
634 x_and_coeff *prev_xc= NULL;
635 x_and_coeff *prev2_xc= xc;
636 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
637 x_and_coeff *prev_parent_xc= parent_xc;
638
639 runs= get_symbol2(&s->c, b->state[30], 0);
640 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
641 else run= INT_MAX;
642
643 for(y=0; y<h; y++){
644 int v=0;
645 int lt=0, t=0, rt=0;
646
647 if(y && prev_xc->x == 0){
648 rt= prev_xc->coeff;
649 }
650 for(x=0; x<w; x++){
651 int p=0;
652 const int l= v;
653
654 lt= t; t= rt;
655
656 if(y){
657 if(prev_xc->x <= x)
658 prev_xc++;
659 if(prev_xc->x == x + 1)
660 rt= prev_xc->coeff;
661 else
662 rt=0;
663 }
664 if(parent_xc){
665 if(x>>1 > parent_xc->x){
666 parent_xc++;
667 }
668 if(x>>1 == parent_xc->x){
669 p= parent_xc->coeff;
670 }
671 }
672 if(/*ll|*/l|lt|t|rt|p){
673 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
674
675 v=get_rac(&s->c, &b->state[0][context]);
676 if(v){
677 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
678 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
679
680 xc->x=x;
681 (xc++)->coeff= v;
682 }
683 }else{
684 if(!run){
685 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
686 else run= INT_MAX;
687 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
688 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
689
690 xc->x=x;
691 (xc++)->coeff= v;
692 }else{
693 int max_run;
694 run--;
695 v=0;
696
697 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
698 else max_run= FFMIN(run, w-x-1);
699 if(parent_xc)
700 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
701 x+= max_run;
702 run-= max_run;
703 }
704 }
705 }
706 (xc++)->x= w+1; //end marker
707 prev_xc= prev2_xc;
708 prev2_xc= xc;
709
710 if(parent_xc){
711 if(y&1){
712 while(parent_xc->x != parent->width+1)
713 parent_xc++;
714 parent_xc++;
715 prev_parent_xc= parent_xc;
716 }else{
717 parent_xc= prev_parent_xc;
718 }
719 }
720 }
721
722 (xc++)->x= w+1; //end marker
723 }
724
725 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
726 const int w= b->width;
727 int y;
728 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
729 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
730 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
731 int new_index = 0;
732
733 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
734 qadd= 0;
735 qmul= 1<<QEXPSHIFT;
736 }
737
738 /* If we are on the second or later slice, restore our index. */
739 if (start_y != 0)
740 new_index = save_state[0];
741
742
743 for(y=start_y; y<h; y++){
744 int x = 0;
745 int v;
746 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
747 memset(line, 0, b->width*sizeof(IDWTELEM));
748 v = b->x_coeff[new_index].coeff;
749 x = b->x_coeff[new_index++].x;
750 while(x < w){
751 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
752 register int u= -(v&1);
753 line[x] = (t^u) - u;
754
755 v = b->x_coeff[new_index].coeff;
756 x = b->x_coeff[new_index++].x;
757 }
758 }
759
760 /* Save our variables for the next slice. */
761 save_state[0] = new_index;
762
763 return;
764 }
765
766 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
767 int plane_index, level, orientation;
768
769 for(plane_index=0; plane_index<3; plane_index++){
770 for(level=0; level<MAX_DECOMPOSITIONS; level++){
771 for(orientation=level ? 1:0; orientation<4; orientation++){
772 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
773 }
774 }
775 }
776 memset(s->header_state, MID_STATE, sizeof(s->header_state));
777 memset(s->block_state, MID_STATE, sizeof(s->block_state));
778 }
779
780 static int alloc_blocks(SnowContext *s){
781 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
782 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
783
784 s->b_width = w;
785 s->b_height= h;
786
787 av_free(s->block);
788 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
789 return 0;
790 }
791
792 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
793 uint8_t *bytestream= d->bytestream;
794 uint8_t *bytestream_start= d->bytestream_start;
795 *d= *s;
796 d->bytestream= bytestream;
797 d->bytestream_start= bytestream_start;
798 }
799
800 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
801 const int w= s->b_width << s->block_max_depth;
802 const int rem_depth= s->block_max_depth - level;
803 const int index= (x + y*w) << rem_depth;
804 const int block_w= 1<<rem_depth;
805 BlockNode block;
806 int i,j;
807
808 block.color[0]= l;
809 block.color[1]= cb;
810 block.color[2]= cr;
811 block.mx= mx;
812 block.my= my;
813 block.ref= ref;
814 block.type= type;
815 block.level= level;
816
817 for(j=0; j<block_w; j++){
818 for(i=0; i<block_w; i++){
819 s->block[index + i + j*w]= block;
820 }
821 }
822 }
823
824 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
825 const int offset[3]= {
826 y*c-> stride + x,
827 ((y*c->uvstride + x)>>1),
828 ((y*c->uvstride + x)>>1),
829 };
830 int i;
831 for(i=0; i<3; i++){
832 c->src[0][i]= src [i];
833 c->ref[0][i]= ref [i] + offset[i];
834 }
835 assert(!ref_index);
836 }
837
838 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
839 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
840 if(s->ref_frames == 1){
841 *mx = mid_pred(left->mx, top->mx, tr->mx);
842 *my = mid_pred(left->my, top->my, tr->my);
843 }else{
844 const int *scale = scale_mv_ref[ref];
845 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
846 (top ->mx * scale[top ->ref] + 128) >>8,
847 (tr ->mx * scale[tr ->ref] + 128) >>8);
848 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
849 (top ->my * scale[top ->ref] + 128) >>8,
850 (tr ->my * scale[tr ->ref] + 128) >>8);
851 }
852 }
853
854 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
855 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
856 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
857 }else{
858 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
859 }
860 }
861
862 static void decode_q_branch(SnowContext *s, int level, int x, int y){
863 const int w= s->b_width << s->block_max_depth;
864 const int rem_depth= s->block_max_depth - level;
865 const int index= (x + y*w) << rem_depth;
866 int trx= (x+1)<<rem_depth;
867 const BlockNode *left = x ? &s->block[index-1] : &null_block;
868 const BlockNode *top = y ? &s->block[index-w] : &null_block;
869 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
870 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
871 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
872
873 if(s->keyframe){
874 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
875 return;
876 }
877
878 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
879 int type, mx, my;
880 int l = left->color[0];
881 int cb= left->color[1];
882 int cr= left->color[2];
883 int ref = 0;
884 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
885 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
886 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
887
888 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
889
890 if(type){
891 pred_mv(s, &mx, &my, 0, left, top, tr);
892 l += get_symbol(&s->c, &s->block_state[32], 1);
893 cb+= get_symbol(&s->c, &s->block_state[64], 1);
894 cr+= get_symbol(&s->c, &s->block_state[96], 1);
895 }else{
896 if(s->ref_frames > 1)
897 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
898 pred_mv(s, &mx, &my, ref, left, top, tr);
899 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
900 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
901 }
902 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
903 }else{
904 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
905 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
906 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
907 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
908 }
909 }
910
911 static void decode_blocks(SnowContext *s){
912 int x, y;
913 int w= s->b_width;
914 int h= s->b_height;
915
916 for(y=0; y<h; y++){
917 for(x=0; x<w; x++){
918 decode_q_branch(s, 0, x, y);
919 }
920 }
921 }
922
923 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
924 static const uint8_t weight[64]={
925 8,7,6,5,4,3,2,1,
926 7,7,0,0,0,0,0,1,
927 6,0,6,0,0,0,2,0,
928 5,0,0,5,0,3,0,0,
929 4,0,0,0,4,0,0,0,
930 3,0,0,5,0,3,0,0,
931 2,0,6,0,0,0,2,0,
932 1,7,0,0,0,0,0,1,
933 };
934
935 static const uint8_t brane[256]={
936 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
937 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
938 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
939 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
940 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
941 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
942 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
943 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
944 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
945 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
946 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
947 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
948 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
949 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
950 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
951 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
952 };
953
954 static const uint8_t needs[16]={
955 0,1,0,0,
956 2,4,2,0,
957 0,1,0,0,
958 15
959 };
960
961 int x, y, b, r, l;
962 int16_t tmpIt [64*(32+HTAPS_MAX)];
963 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
964 int16_t *tmpI= tmpIt;
965 uint8_t *tmp2= tmp2t[0];
966 const uint8_t *hpel[11];
967 assert(dx<16 && dy<16);
968 r= brane[dx + 16*dy]&15;
969 l= brane[dx + 16*dy]>>4;
970
971 b= needs[l] | needs[r];
972 if(p && !p->diag_mc)
973 b= 15;
974
975 if(b&5){
976 for(y=0; y < b_h+HTAPS_MAX-1; y++){
977 for(x=0; x < b_w; x++){
978 int a_1=src[x + HTAPS_MAX/2-4];
979 int a0= src[x + HTAPS_MAX/2-3];
980 int a1= src[x + HTAPS_MAX/2-2];
981 int a2= src[x + HTAPS_MAX/2-1];
982 int a3= src[x + HTAPS_MAX/2+0];
983 int a4= src[x + HTAPS_MAX/2+1];
984 int a5= src[x + HTAPS_MAX/2+2];
985 int a6= src[x + HTAPS_MAX/2+3];
986 int am=0;
987 if(!p || p->fast_mc){
988 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
989 tmpI[x]= am;
990 am= (am+16)>>5;
991 }else{
992 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
993 tmpI[x]= am;
994 am= (am+32)>>6;
995 }
996
997 if(am&(~255)) am= ~(am>>31);
998 tmp2[x]= am;
999 }
1000 tmpI+= 64;
1001 tmp2+= stride;
1002 src += stride;
1003 }
1004 src -= stride*y;
1005 }
1006 src += HTAPS_MAX/2 - 1;
1007 tmp2= tmp2t[1];
1008
1009 if(b&2){
1010 for(y=0; y < b_h; y++){
1011 for(x=0; x < b_w+1; x++){
1012 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1013 int a0= src[x + (HTAPS_MAX/2-3)*stride];
1014 int a1= src[x + (HTAPS_MAX/2-2)*stride];
1015 int a2= src[x + (HTAPS_MAX/2-1)*stride];
1016 int a3= src[x + (HTAPS_MAX/2+0)*stride];
1017 int a4= src[x + (HTAPS_MAX/2+1)*stride];
1018 int a5= src[x + (HTAPS_MAX/2+2)*stride];
1019 int a6= src[x + (HTAPS_MAX/2+3)*stride];
1020 int am=0;
1021 if(!p || p->fast_mc)
1022 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1023 else
1024 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1025
1026 if(am&(~255)) am= ~(am>>31);
1027 tmp2[x]= am;
1028 }
1029 src += stride;
1030 tmp2+= stride;
1031 }
1032 src -= stride*y;
1033 }
1034 src += stride*(HTAPS_MAX/2 - 1);
1035 tmp2= tmp2t[2];
1036 tmpI= tmpIt;
1037 if(b&4){
1038 for(y=0; y < b_h; y++){
1039 for(x=0; x < b_w; x++){
1040 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1041 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1042 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1043 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1044 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1045 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1046 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1047 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1048 int am=0;
1049 if(!p || p->fast_mc)
1050 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1051 else
1052 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1053 if(am&(~255)) am= ~(am>>31);
1054 tmp2[x]= am;
1055 }
1056 tmpI+= 64;
1057 tmp2+= stride;
1058 }
1059 }
1060
1061 hpel[ 0]= src;
1062 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1063 hpel[ 2]= src + 1;
1064
1065 hpel[ 4]= tmp2t[1];
1066 hpel[ 5]= tmp2t[2];
1067 hpel[ 6]= tmp2t[1] + 1;
1068
1069 hpel[ 8]= src + stride;
1070 hpel[ 9]= hpel[1] + stride;
1071 hpel[10]= hpel[8] + 1;
1072
1073 if(b==15){
1074 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
1075 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1076 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1077 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1078 dx&=7;
1079 dy&=7;
1080 for(y=0; y < b_h; y++){
1081 for(x=0; x < b_w; x++){
1082 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1083 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
1084 }
1085 src1+=stride;
1086 src2+=stride;
1087 src3+=stride;
1088 src4+=stride;
1089 dst +=stride;
1090 }
1091 }else{
1092 const uint8_t *src1= hpel[l];
1093 const uint8_t *src2= hpel[r];
1094 int a= weight[((dx&7) + (8*(dy&7)))];
1095 int b= 8-a;
1096 for(y=0; y < b_h; y++){
1097 for(x=0; x < b_w; x++){
1098 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1099 }
1100 src1+=stride;
1101 src2+=stride;
1102 dst +=stride;
1103 }
1104 }
1105 }
1106
1107 #define mca(dx,dy,b_w)\
1108 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1109 assert(h==b_w);\
1110 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
1111 }
1112
1113 mca( 0, 0,16)
1114 mca( 8, 0,16)
1115 mca( 0, 8,16)
1116 mca( 8, 8,16)
1117 mca( 0, 0,8)
1118 mca( 8, 0,8)
1119 mca( 0, 8,8)
1120 mca( 8, 8,8)
1121
1122 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1123 if(block->type & BLOCK_INTRA){
1124 int x, y;
1125 const int color = block->color[plane_index];
1126 const int color4= color*0x01010101;
1127 if(b_w==32){
1128 for(y=0; y < b_h; y++){
1129 *(uint32_t*)&dst[0 + y*stride]= color4;
1130 *(uint32_t*)&dst[4 + y*stride]= color4;
1131 *(uint32_t*)&dst[8 + y*stride]= color4;
1132 *(uint32_t*)&dst[12+ y*stride]= color4;
1133 *(uint32_t*)&dst[16+ y*stride]= color4;
1134 *(uint32_t*)&dst[20+ y*stride]= color4;
1135 *(uint32_t*)&dst[24+ y*stride]= color4;
1136 *(uint32_t*)&dst[28+ y*stride]= color4;
1137 }
1138 }else if(b_w==16){
1139 for(y=0; y < b_h; y++){
1140 *(uint32_t*)&dst[0 + y*stride]= color4;
1141 *(uint32_t*)&dst[4 + y*stride]= color4;
1142 *(uint32_t*)&dst[8 + y*stride]= color4;
1143 *(uint32_t*)&dst[12+ y*stride]= color4;
1144 }
1145 }else if(b_w==8){
1146 for(y=0; y < b_h; y++){
1147 *(uint32_t*)&dst[0 + y*stride]= color4;
1148 *(uint32_t*)&dst[4 + y*stride]= color4;
1149 }
1150 }else if(b_w==4){
1151 for(y=0; y < b_h; y++){
1152 *(uint32_t*)&dst[0 + y*stride]= color4;
1153 }
1154 }else{
1155 for(y=0; y < b_h; y++){
1156 for(x=0; x < b_w; x++){
1157 dst[x + y*stride]= color;
1158 }
1159 }
1160 }
1161 }else{
1162 uint8_t *src= s->last_picture[block->ref].data[plane_index];
1163 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
1164 int mx= block->mx*scale;
1165 int my= block->my*scale;
1166 const int dx= mx&15;
1167 const int dy= my&15;
1168 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1169 sx += (mx>>4) - (HTAPS_MAX/2-1);
1170 sy += (my>>4) - (HTAPS_MAX/2-1);
1171 src += sx + sy*stride;
1172 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1173 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1174 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1175 src= tmp + MB_SIZE;
1176 }
1177 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1178 // assert(!(b_w&(b_w-1)));
1179 assert(b_w>1 && b_h>1);
1180 assert((tab_index>=0 && tab_index<4) || b_w==32);
1181 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1182 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
1183 else if(b_w==32){
1184 int y;
1185 for(y=0; y<b_h; y+=16){
1186 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1187 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1188 }
1189 }else if(b_w==b_h)
1190 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1191 else if(b_w==2*b_h){
1192 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
1193 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1194 }else{
1195 assert(2*b_w==b_h);
1196 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
1197 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1198 }
1199 }
1200 }
1201
1202 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1203 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1204 int y, x;
1205 IDWTELEM * dst;
1206 for(y=0; y<b_h; y++){
1207 //FIXME ugly misuse of obmc_stride
1208 const uint8_t *obmc1= obmc + y*obmc_stride;
1209 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1210 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1211 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1212 dst = slice_buffer_get_line(sb, src_y + y);
1213 for(x=0; x<b_w; x++){
1214 int v= obmc1[x] * block[3][x + y*src_stride]
1215 +obmc2[x] * block[2][x + y*src_stride]
1216 +obmc3[x] * block[1][x + y*src_stride]
1217 +obmc4[x] * block[0][x + y*src_stride];
1218
1219 v <<= 8 - LOG2_OBMC_MAX;
1220 if(FRAC_BITS != 8){
1221 v >>= 8 - FRAC_BITS;
1222 }
1223 if(add){
1224 v += dst[x + src_x];
1225 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1226 if(v&(~255)) v= ~(v>>31);
1227 dst8[x + y*src_stride] = v;
1228 }else{
1229 dst[x + src_x] -= v;
1230 }
1231 }
1232 }
1233 }
1234
1235 //FIXME name cleanup (b_w, block_w, b_width stuff)
1236 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1237 const int b_width = s->b_width << s->block_max_depth;
1238 const int b_height= s->b_height << s->block_max_depth;
1239 const int b_stride= b_width;
1240 BlockNode *lt= &s->block[b_x + b_y*b_stride];
1241 BlockNode *rt= lt+1;
1242 BlockNode *lb= lt+b_stride;
1243 BlockNode *rb= lb+1;
1244 uint8_t *block[4];
1245 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
1246 uint8_t *tmp = s->scratchbuf;
1247 uint8_t *ptmp;
1248 int x,y;
1249
1250 if(b_x<0){
1251 lt= rt;
1252 lb= rb;
1253 }else if(b_x + 1 >= b_width){
1254 rt= lt;
1255 rb= lb;
1256 }
1257 if(b_y<0){
1258 lt= lb;
1259 rt= rb;
1260 }else if(b_y + 1 >= b_height){
1261 lb= lt;
1262 rb= rt;
1263 }
1264
1265 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
1266 obmc -= src_x;
1267 b_w += src_x;
1268 if(!sliced && !offset_dst)
1269 dst -= src_x;
1270 src_x=0;
1271 }else if(src_x + b_w > w){
1272 b_w = w - src_x;
1273 }
1274 if(src_y<0){
1275 obmc -= src_y*obmc_stride;
1276 b_h += src_y;
1277 if(!sliced && !offset_dst)
1278 dst -= src_y*dst_stride;
1279 src_y=0;
1280 }else if(src_y + b_h> h){
1281 b_h = h - src_y;
1282 }
1283
1284 if(b_w<=0 || b_h<=0) return;
1285
1286 assert(src_stride > 2*MB_SIZE + 5);
1287
1288 if(!sliced && offset_dst)
1289 dst += src_x + src_y*dst_stride;
1290 dst8+= src_x + src_y*src_stride;
1291 // src += src_x + src_y*src_stride;
1292
1293 ptmp= tmp + 3*tmp_step;
1294 block[0]= ptmp;
1295 ptmp+=tmp_step;
1296 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1297
1298 if(same_block(lt, rt)){
1299 block[1]= block[0];
1300 }else{
1301 block[1]= ptmp;
1302 ptmp+=tmp_step;
1303 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1304 }
1305
1306 if(same_block(lt, lb)){
1307 block[2]= block[0];
1308 }else if(same_block(rt, lb)){
1309 block[2]= block[1];
1310 }else{
1311 block[2]= ptmp;
1312 ptmp+=tmp_step;
1313 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1314 }
1315
1316 if(same_block(lt, rb) ){
1317 block[3]= block[0];
1318 }else if(same_block(rt, rb)){
1319 block[3]= block[1];
1320 }else if(same_block(lb, rb)){
1321 block[3]= block[2];
1322 }else{
1323 block[3]= ptmp;
1324 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1325 }
1326 #if 0
1327 for(y=0; y<b_h; y++){
1328 for(x=0; x<b_w; x++){
1329 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
1330 if(add) dst[x + y*dst_stride] += v;
1331 else dst[x + y*dst_stride] -= v;
1332 }
1333 }
1334 for(y=0; y<b_h; y++){
1335 uint8_t *obmc2= obmc + (obmc_stride>>1);
1336 for(x=0; x<b_w; x++){
1337 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
1338 if(add) dst[x + y*dst_stride] += v;
1339 else dst[x + y*dst_stride] -= v;
1340 }
1341 }
1342 for(y=0; y<b_h; y++){
1343 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1344 for(x=0; x<b_w; x++){
1345 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
1346 if(add) dst[x + y*dst_stride] += v;
1347 else dst[x + y*dst_stride] -= v;
1348 }
1349 }
1350 for(y=0; y<b_h; y++){
1351 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1352 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1353 for(x=0; x<b_w; x++){
1354 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
1355 if(add) dst[x + y*dst_stride] += v;
1356 else dst[x + y*dst_stride] -= v;
1357 }
1358 }
1359 #else
1360 if(sliced){
1361 s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1362 }else{
1363 for(y=0; y<b_h; y++){
1364 //FIXME ugly misuse of obmc_stride
1365 const uint8_t *obmc1= obmc + y*obmc_stride;
1366 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1367 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1368 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1369 for(x=0; x<b_w; x++){
1370 int v= obmc1[x] * block[3][x + y*src_stride]
1371 +obmc2[x] * block[2][x + y*src_stride]
1372 +obmc3[x] * block[1][x + y*src_stride]
1373 +obmc4[x] * block[0][x + y*src_stride];
1374
1375 v <<= 8 - LOG2_OBMC_MAX;
1376 if(FRAC_BITS != 8){
1377 v >>= 8 - FRAC_BITS;
1378 }
1379 if(add){
1380 v += dst[x + y*dst_stride];
1381 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1382 if(v&(~255)) v= ~(v>>31);
1383 dst8[x + y*src_stride] = v;
1384 }else{
1385 dst[x + y*dst_stride] -= v;
1386 }
1387 }
1388 }
1389 }
1390 #endif /* 0 */
1391 }
1392
1393 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
1394 Plane *p= &s->plane[plane_index];
1395 const int mb_w= s->b_width << s->block_max_depth;
1396 const int mb_h= s->b_height << s->block_max_depth;
1397 int x, y, mb_x;
1398 int block_size = MB_SIZE >> s->block_max_depth;
1399 int block_w = plane_index ? block_size/2 : block_size;
1400 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1401 int obmc_stride= plane_index ? block_size : 2*block_size;
1402 int ref_stride= s->current_picture.linesize[plane_index];
1403 uint8_t *dst8= s->current_picture.data[plane_index];
1404 int w= p->width;
1405 int h= p->height;
1406
1407 if(s->keyframe || (s->avctx->debug&512)){
1408 if(mb_y==mb_h)
1409 return;
1410
1411 if(add){
1412 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1413 // DWTELEM * line = slice_buffer_get_line(sb, y);
1414 IDWTELEM * line = sb->line[y];
1415 for(x=0; x<w; x++){
1416 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1417 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1418 v >>= FRAC_BITS;
1419 if(v&(~255)) v= ~(v>>31);
1420 dst8[x + y*ref_stride]= v;
1421 }
1422 }
1423 }else{
1424 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1425 // DWTELEM * line = slice_buffer_get_line(sb, y);
1426 IDWTELEM * line = sb->line[y];
1427 for(x=0; x<w; x++){
1428 line[x] -= 128 << FRAC_BITS;
1429 // buf[x + y*w]-= 128<<FRAC_BITS;
1430 }
1431 }
1432 }
1433
1434 return;
1435 }
1436
1437 for(mb_x=0; mb_x<=mb_w; mb_x++){
1438 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1439 block_w*mb_x - block_w/2,
1440 block_w*mb_y - block_w/2,
1441 block_w, block_w,
1442 w, h,
1443 w, ref_stride, obmc_stride,
1444 mb_x - 1, mb_y - 1,
1445 add, 0, plane_index);
1446 }
1447 }
1448
1449 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
1450 Plane *p= &s->plane[plane_index];
1451 const int mb_w= s->b_width << s->block_max_depth;
1452 const int mb_h= s->b_height << s->block_max_depth;
1453 int x, y, mb_x;
1454 int block_size = MB_SIZE >> s->block_max_depth;
1455 int block_w = plane_index ? block_size/2 : block_size;
1456 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1457 const int obmc_stride= plane_index ? block_size : 2*block_size;
1458 int ref_stride= s->current_picture.linesize[plane_index];
1459 uint8_t *dst8= s->current_picture.data[plane_index];
1460 int w= p->width;
1461 int h= p->height;
1462
1463 if(s->keyframe || (s->avctx->debug&512)){
1464 if(mb_y==mb_h)
1465 return;
1466
1467 if(add){
1468 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1469 for(x=0; x<w; x++){
1470 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1471 v >>= FRAC_BITS;
1472 if(v&(~255)) v= ~(v>>31);
1473 dst8[x + y*ref_stride]= v;
1474 }
1475 }
1476 }else{
1477 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1478 for(x=0; x<w; x++){
1479 buf[x + y*w]-= 128<<FRAC_BITS;
1480 }
1481 }
1482 }
1483
1484 return;
1485 }
1486
1487 for(mb_x=0; mb_x<=mb_w; mb_x++){
1488 add_yblock(s, 0, NULL, buf, dst8, obmc,
1489 block_w*mb_x - block_w/2,
1490 block_w*mb_y - block_w/2,
1491 block_w, block_w,
1492 w, h,
1493 w, ref_stride, obmc_stride,
1494 mb_x - 1, mb_y - 1,
1495 add, 1, plane_index);
1496 }
1497 }
1498
1499 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
1500 const int mb_h= s->b_height << s->block_max_depth;
1501 int mb_y;
1502 for(mb_y=0; mb_y<=mb_h; mb_y++)
1503 predict_slice(s, buf, plane_index, add, mb_y);
1504 }
1505
1506 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1507 const int w= b->width;
1508 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1509 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1510 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1511 int x,y;
1512
1513 if(s->qlog == LOSSLESS_QLOG) return;
1514
1515 for(y=start_y; y<end_y; y++){
1516 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1517 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1518 for(x=0; x<w; x++){
1519 int i= line[x];
1520 if(i<0){
1521 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1522 }else if(i>0){
1523 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
1524 }
1525 }
1526 }
1527 }
1528
1529 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1530 const int w= b->width;
1531 int x,y;
1532
1533 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1534 IDWTELEM * prev;
1535
1536 if (start_y != 0)
1537 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1538
1539 for(y=start_y; y<end_y; y++){
1540 prev = line;
1541 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1542 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1543 for(x=0; x<w; x++){
1544 if(x){
1545 if(use_median){
1546 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1547 else line[x] += line[x - 1];
1548 }else{
1549 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1550 else line[x] += line[x - 1];
1551 }
1552 }else{
1553 if(y) line[x] += prev[x];
1554 }
1555 }
1556 }
1557 }
1558
1559 static void decode_qlogs(SnowContext *s){
1560 int plane_index, level, orientation;
1561
1562 for(plane_index=0; plane_index<3; plane_index++){
1563 for(level=0; level<s->spatial_decomposition_count; level++){
1564 for(orientation=level ? 1:0; orientation<4; orientation++){
1565 int q;
1566 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1567 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1568 else q= get_symbol(&s->c, s->header_state, 1);
1569 s->plane[plane_index].band[level][orientation].qlog= q;
1570 }
1571 }
1572 }
1573 }
1574
1575 #define GET_S(dst, check) \
1576 tmp= get_symbol(&s->c, s->header_state, 0);\
1577 if(!(check)){\
1578 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1579 return -1;\
1580 }\
1581 dst= tmp;
1582
1583 static int decode_header(SnowContext *s){
1584 int plane_index, tmp;
1585 uint8_t kstate[32];
1586
1587 memset(kstate, MID_STATE, sizeof(kstate));
1588
1589 s->keyframe= get_rac(&s->c, kstate);
1590 if(s->keyframe || s->always_reset){
1591 reset_contexts(s);
1592 s->spatial_decomposition_type=
1593 s->qlog=
1594 s->qbias=
1595 s->mv_scale=
1596 s->block_max_depth= 0;
1597 }
1598 if(s->keyframe){
1599 GET_S(s->version, tmp <= 0U)
1600 s->always_reset= get_rac(&s->c, s->header_state);
1601 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1602 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1603 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1604 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1605 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1606 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1607 s->spatial_scalability= get_rac(&s->c, s->header_state);
1608 // s->rate_scalability= get_rac(&s->c, s->header_state);
1609 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1610 s->max_ref_frames++;
1611
1612 decode_qlogs(s);
1613 }
1614
1615 if(!s->keyframe){
1616 if(get_rac(&s->c, s->header_state)){
1617 for(plane_index=0; plane_index<2; plane_index++){
1618 int htaps, i, sum=0;
1619 Plane *p= &s->plane[plane_index];
1620 p->diag_mc= get_rac(&s->c, s->header_state);
1621 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1622 if((unsigned)htaps > HTAPS_MAX || htaps==0)
1623 return -1;
1624 p->htaps= htaps;
1625 for(i= htaps/2; i; i--){
1626 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1627 sum += p->hcoeff[i];
1628 }
1629 p->hcoeff[0]= 32-sum;
1630 }
1631 s->plane[2].diag_mc= s->plane[1].diag_mc;
1632 s->plane[2].htaps = s->plane[1].htaps;
1633 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1634 }
1635 if(get_rac(&s->c, s->header_state)){
1636 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1637 decode_qlogs(s);
1638 }
1639 }
1640
1641 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1642 if(s->spatial_decomposition_type > 1U){
1643 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1644 return -1;
1645 }
1646 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1647 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1648 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1649 return -1;
1650 }
1651
1652 s->qlog += get_symbol(&s->c, s->header_state, 1);
1653 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
1654 s->qbias += get_symbol(&s->c, s->header_state, 1);
1655 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1656 if(s->block_max_depth > 1 || s->block_max_depth < 0){
1657 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1658 s->block_max_depth= 0;
1659 return -1;
1660 }
1661
1662 return 0;
1663 }
1664
1665 static void init_qexp(void){
1666 int i;
1667 double v=128;
1668
1669 for(i=0; i<QROOT; i++){
1670 qexp[i]= lrintf(v);
1671 v *= pow(2, 1.0 / QROOT);
1672 }
1673 }
1674
1675 static av_cold int common_init(AVCodecContext *avctx){
1676 SnowContext *s = avctx->priv_data;
1677 int width, height;
1678 int i, j;
1679
1680 s->avctx= avctx;
1681 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
1682
1683 dsputil_init(&s->dsp, avctx);
1684 ff_dwt_init(&s->dwt);
1685
1686 #define mcf(dx,dy)\
1687 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
1688 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1689 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1690 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
1691 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1692 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
1693
1694 mcf( 0, 0)
1695 mcf( 4, 0)
1696 mcf( 8, 0)
1697 mcf(12, 0)
1698 mcf( 0, 4)
1699 mcf( 4, 4)
1700 mcf( 8, 4)
1701 mcf(12, 4)
1702 mcf( 0, 8)
1703 mcf( 4, 8)
1704 mcf( 8, 8)
1705 mcf(12, 8)
1706 mcf( 0,12)
1707 mcf( 4,12)
1708 mcf( 8,12)
1709 mcf(12,12)
1710
1711 #define mcfh(dx,dy)\
1712 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
1713 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1714 mc_block_hpel ## dx ## dy ## 16;\
1715 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
1716 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1717 mc_block_hpel ## dx ## dy ## 8;
1718
1719 mcfh(0, 0)
1720 mcfh(8, 0)
1721 mcfh(0, 8)
1722 mcfh(8, 8)
1723
1724 if(!qexp[0])
1725 init_qexp();
1726
1727 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
1728
1729 width= s->avctx->width;
1730 height= s->avctx->height;
1731
1732 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1733 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
1734
1735 for(i=0; i<MAX_REF_FRAMES; i++)
1736 for(j=0; j<MAX_REF_FRAMES; j++)
1737 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
1738
1739 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1740 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1741
1742 return 0;
1743 }
1744
1745 static int common_init_after_header(AVCodecContext *avctx){
1746 SnowContext *s = avctx->priv_data;
1747 int plane_index, level, orientation;
1748
1749 for(plane_index=0; plane_index<3; plane_index++){
1750 int w= s->avctx->width;
1751 int h= s->avctx->height;
1752
1753 if(plane_index){
1754 w>>= s->chroma_h_shift;
1755 h>>= s->chroma_v_shift;
1756 }
1757 s->plane[plane_index].width = w;
1758 s->plane[plane_index].height= h;
1759
1760 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1761 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1762 SubBand *b= &s->plane[plane_index].band[level][orientation];
1763
1764 b->buf= s->spatial_dwt_buffer;
1765 b->level= level;
1766 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1767 b->width = (w + !(orientation&1))>>1;
1768 b->height= (h + !(orientation>1))>>1;
1769
1770 b->stride_line = 1 << (s->spatial_decomposition_count - level);
1771 b->buf_x_offset = 0;
1772 b->buf_y_offset = 0;
1773
1774 if(orientation&1){
1775 b->buf += (w+1)>>1;
1776 b->buf_x_offset = (w+1)>>1;
1777 }
1778 if(orientation>1){
1779 b->buf += b->stride>>1;
1780 b->buf_y_offset = b->stride_line >> 1;
1781 }
1782 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1783
1784 if(level)
1785 b->parent= &s->plane[plane_index].band[level-1][orientation];
1786 //FIXME avoid this realloc
1787 av_freep(&b->x_coeff);
1788 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1789 }
1790 w= (w+1)>>1;
1791 h= (h+1)>>1;
1792 }
1793 }
1794
1795 return 0;
1796 }
1797
1798 #define QUANTIZE2 0
1799
1800 #if QUANTIZE2==1
1801 #define Q2_STEP 8
1802
1803 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1804 SubBand *b= &p->band[level][orientation];
1805 int x, y;
1806 int xo=0;
1807 int yo=0;
1808 int step= 1 << (s->spatial_decomposition_count - level);
1809
1810 if(orientation&1)
1811 xo= step>>1;
1812 if(orientation&2)
1813 yo= step>>1;
1814
1815 //FIXME bias for nonzero ?
1816 //FIXME optimize
1817 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1818 for(y=0; y<p->height; y++){
1819 for(x=0; x<p->width; x++){
1820 int sx= (x-xo + step/2) / step / Q2_STEP;
1821 int sy= (y-yo + step/2) / step / Q2_STEP;
1822 int v= r0[x + y*p->width] - r1[x + y*p->width];
1823 assert(sx>=0 && sy>=0 && sx < score_stride);
1824 v= ((v+8)>>4)<<4;
1825 score[sx + sy*score_stride] += v*v;
1826 assert(score[sx + sy*score_stride] >= 0);
1827 }
1828 }
1829 }
1830
1831 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1832 int level, orientation;
1833
1834 for(level=0; level<s->spatial_decomposition_count; level++){
1835 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1836 SubBand *b= &p->band[level][orientation];
1837 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1838
1839 dequantize(s, b, dst, b->stride);
1840 }
1841 }
1842 }
1843
1844 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1845 int level, orientation, ys, xs, x, y, pass;
1846 IDWTELEM best_dequant[height * stride];
1847 IDWTELEM idwt2_buffer[height * stride];
1848 const int score_stride= (width + 10)/Q2_STEP;
1849 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1850 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1851 int threshold= (s->m.lambda * s->m.lambda) >> 6;
1852
1853 //FIXME pass the copy cleanly ?
1854
1855 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1856 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1857
1858 for(level=0; level<s->spatial_decomposition_count; level++){
1859 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1860 SubBand *b= &p->band[level][orientation];
1861 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1862 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
1863 assert(src == b->buf); // code does not depend on this but it is true currently
1864
1865 quantize(s, b, dst, src, b->stride, s->qbias);
1866 }
1867 }
1868 for(pass=0; pass<1; pass++){
1869 if(s->qbias == 0) //keyframe
1870 continue;
1871 for(level=0; level<s->spatial_decomposition_count; level++){
1872 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1873 SubBand *b= &p->band[level][orientation];
1874 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1875 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1876
1877 for(ys= 0; ys<Q2_STEP; ys++){
1878 for(xs= 0; xs<Q2_STEP; xs++){
1879 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1880 dequantize_all(s, p, idwt2_buffer, width, height);
1881 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1882 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1883 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1884 for(y=ys; y<b->height; y+= Q2_STEP){
1885 for(x=xs; x<b->width; x+= Q2_STEP){
1886 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1887 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1888 //FIXME try more than just --
1889 }
1890 }
1891 dequantize_all(s, p, idwt2_buffer, width, height);
1892 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1893 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1894 for(y=ys; y<b->height; y+= Q2_STEP){
1895 for(x=xs; x<b->width; x+= Q2_STEP){
1896 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1897 if(score[score_idx] <= best_score[score_idx] + threshold){
1898 best_score[score_idx]= score[score_idx];
1899 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1900 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1901 //FIXME copy instead
1902 }
1903 }
1904 }
1905 }
1906 }
1907 }
1908 }
1909 }
1910 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1911 }
1912
1913 #endif /* QUANTIZE2==1 */
1914
1915 #define USE_HALFPEL_PLANE 0
1916
1917 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1918 int p,x,y;
1919
1920 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
1921
1922 for(p=0; p<3; p++){
1923 int is_chroma= !!p;
1924 int w= s->avctx->width >>is_chroma;
1925 int h= s->avctx->height >>is_chroma;
1926 int ls= frame->linesize[p];
1927 uint8_t *src= frame->data[p];
1928
1929 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1930 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1931 halfpel[3][p]= (uint8_t