Correct x/ymin to avoid segfault due to out of picture reads.
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "avcodec.h"
22 #include "dsputil.h"
23 #include "snow.h"
24
25 #include "rangecoder.h"
26 #include "mathops.h"
27
28 #include "mpegvideo.h"
29
30 #undef NDEBUG
31 #include <assert.h>
32
33 static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 };
51 static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 };
69 static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 };
87 static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 };
105 static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 };
123 static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 };
141 static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 };
159 static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
176 };
177
178 #if 0 //64*cubic
179 static const uint8_t obmc32[1024]={
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 //error:0.000022
213 };
214 static const uint8_t obmc16[256]={
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
231 //error:0.000033
232 };
233 #elif 1 // 64*linear
234 static const uint8_t obmc32[1024]={
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
267 //error:0.000020
268 };
269 static const uint8_t obmc16[256]={
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
286 //error:0.000015
287 };
288 #else //64*cos
289 static const uint8_t obmc32[1024]={
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322 //error:0.000022
323 };
324 static const uint8_t obmc16[256]={
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
341 //error:0.000022
342 };
343 #endif /* 0 */
344
345 //linear *64
346 static const uint8_t obmc8[64]={
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
355 //error:0.000000
356 };
357
358 //linear *64
359 static const uint8_t obmc4[16]={
360 16, 48, 48, 16,
361 48,144,144, 48,
362 48,144,144, 48,
363 16, 48, 48, 16,
364 //error:0.000000
365 };
366
367 static const uint8_t * const obmc_tab[4]={
368 obmc32, obmc16, obmc8, obmc4
369 };
370
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372
373 typedef struct BlockNode{
374 int16_t mx;
375 int16_t my;
376 uint8_t ref;
377 uint8_t color[3];
378 uint8_t type;
379 //#define TYPE_SPLIT 1
380 #define BLOCK_INTRA 1
381 #define BLOCK_OPT 2
382 //#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
384 }BlockNode;
385
386 static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
388 .mx= 0,
389 .my= 0,
390 .ref= 0,
391 .type= 0,
392 .level= 0,
393 };
394
395 #define LOG2_MB_SIZE 4
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
397 #define ENCODER_EXTRA_BITS 4
398 #define HTAPS_MAX 8
399
400 typedef struct x_and_coeff{
401 int16_t x;
402 uint16_t coeff;
403 } x_and_coeff;
404
405 typedef struct SubBand{
406 int level;
407 int stride;
408 int width;
409 int height;
410 int qlog; ///< log(qscale)/log[2^(1/6)]
411 DWTELEM *buf;
412 IDWTELEM *ibuf;
413 int buf_x_offset;
414 int buf_y_offset;
415 int stride_line; ///< Stride measured in lines, not pixels.
416 x_and_coeff * x_coeff;
417 struct SubBand *parent;
418 uint8_t state[/*7*2*/ 7 + 512][32];
419 }SubBand;
420
421 typedef struct Plane{
422 int width;
423 int height;
424 SubBand band[MAX_DECOMPOSITIONS][4];
425
426 int htaps;
427 int8_t hcoeff[HTAPS_MAX/2];
428 int diag_mc;
429 int fast_mc;
430
431 int last_htaps;
432 int8_t last_hcoeff[HTAPS_MAX/2];
433 int last_diag_mc;
434 }Plane;
435
436 typedef struct SnowContext{
437 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438
439 AVCodecContext *avctx;
440 RangeCoder c;
441 DSPContext dsp;
442 AVFrame new_picture;
443 AVFrame input_picture; ///< new_picture with the internal linesizes
444 AVFrame current_picture;
445 AVFrame last_picture[MAX_REF_FRAMES];
446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
447 AVFrame mconly_picture;
448 // uint8_t q_context[16];
449 uint8_t header_state[32];
450 uint8_t block_state[128 + 32*128];
451 int keyframe;
452 int always_reset;
453 int version;
454 int spatial_decomposition_type;
455 int last_spatial_decomposition_type;
456 int temporal_decomposition_type;
457 int spatial_decomposition_count;
458 int last_spatial_decomposition_count;
459 int temporal_decomposition_count;
460 int max_ref_frames;
461 int ref_frames;
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463 uint32_t *ref_scores[MAX_REF_FRAMES];
464 DWTELEM *spatial_dwt_buffer;
465 IDWTELEM *spatial_idwt_buffer;
466 int colorspace_type;
467 int chroma_h_shift;
468 int chroma_v_shift;
469 int spatial_scalability;
470 int qlog;
471 int last_qlog;
472 int lambda;
473 int lambda2;
474 int pass1_rc;
475 int mv_scale;
476 int last_mv_scale;
477 int qbias;
478 int last_qbias;
479 #define QBIAS_SHIFT 3
480 int b_width;
481 int b_height;
482 int block_max_depth;
483 int last_block_max_depth;
484 Plane plane[MAX_PLANES];
485 BlockNode *block;
486 #define ME_CACHE_SIZE 1024
487 int me_cache[ME_CACHE_SIZE];
488 int me_cache_generation;
489 slice_buffer sb;
490
491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
492
493 uint8_t *scratchbuf;
494 }SnowContext;
495
496 typedef struct {
497 IDWTELEM *b0;
498 IDWTELEM *b1;
499 IDWTELEM *b2;
500 IDWTELEM *b3;
501 int y;
502 } DWTCompose;
503
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
506
507 static void iterative_me(SnowContext *s);
508
509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
510 {
511 int i;
512
513 buf->base_buffer = base_buffer;
514 buf->line_count = line_count;
515 buf->line_width = line_width;
516 buf->data_count = max_allocated_lines;
517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
519
520 for(i = 0; i < max_allocated_lines; i++){
521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
522 }
523
524 buf->data_stack_top = max_allocated_lines - 1;
525 }
526
527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
528 {
529 IDWTELEM * buffer;
530
531 assert(buf->data_stack_top >= 0);
532 // assert(!buf->line[line]);
533 if (buf->line[line])
534 return buf->line[line];
535
536 buffer = buf->data_stack[buf->data_stack_top];
537 buf->data_stack_top--;
538 buf->line[line] = buffer;
539
540 return buffer;
541 }
542
543 static void slice_buffer_release(slice_buffer * buf, int line)
544 {
545 IDWTELEM * buffer;
546
547 assert(line >= 0 && line < buf->line_count);
548 assert(buf->line[line]);
549
550 buffer = buf->line[line];
551 buf->data_stack_top++;
552 buf->data_stack[buf->data_stack_top] = buffer;
553 buf->line[line] = NULL;
554 }
555
556 static void slice_buffer_flush(slice_buffer * buf)
557 {
558 int i;
559 for(i = 0; i < buf->line_count; i++){
560 if (buf->line[i])
561 slice_buffer_release(buf, i);
562 }
563 }
564
565 static void slice_buffer_destroy(slice_buffer * buf)
566 {
567 int i;
568 slice_buffer_flush(buf);
569
570 for(i = buf->data_count - 1; i >= 0; i--){
571 av_freep(&buf->data_stack[i]);
572 }
573 av_freep(&buf->data_stack);
574 av_freep(&buf->line);
575 }
576
577 #ifdef __sgi
578 // Avoid a name clash on SGI IRIX
579 #undef qexp
580 #endif
581 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
582 static uint8_t qexp[QROOT];
583
584 static inline int mirror(int v, int m){
585 while((unsigned)v > (unsigned)m){
586 v=-v;
587 if(v<0) v+= 2*m;
588 }
589 return v;
590 }
591
592 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
593 int i;
594
595 if(v){
596 const int a= FFABS(v);
597 const int e= av_log2(a);
598 #if 1
599 const int el= FFMIN(e, 10);
600 put_rac(c, state+0, 0);
601
602 for(i=0; i<el; i++){
603 put_rac(c, state+1+i, 1); //1..10
604 }
605 for(; i<e; i++){
606 put_rac(c, state+1+9, 1); //1..10
607 }
608 put_rac(c, state+1+FFMIN(i,9), 0);
609
610 for(i=e-1; i>=el; i--){
611 put_rac(c, state+22+9, (a>>i)&1); //22..31
612 }
613 for(; i>=0; i--){
614 put_rac(c, state+22+i, (a>>i)&1); //22..31
615 }
616
617 if(is_signed)
618 put_rac(c, state+11 + el, v < 0); //11..21
619 #else
620
621 put_rac(c, state+0, 0);
622 if(e<=9){
623 for(i=0; i<e; i++){
624 put_rac(c, state+1+i, 1); //1..10
625 }
626 put_rac(c, state+1+i, 0);
627
628 for(i=e-1; i>=0; i--){
629 put_rac(c, state+22+i, (a>>i)&1); //22..31
630 }
631
632 if(is_signed)
633 put_rac(c, state+11 + e, v < 0); //11..21
634 }else{
635 for(i=0; i<e; i++){
636 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
637 }
638 put_rac(c, state+1+9, 0);
639
640 for(i=e-1; i>=0; i--){
641 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
642 }
643
644 if(is_signed)
645 put_rac(c, state+11 + 10, v < 0); //11..21
646 }
647 #endif /* 1 */
648 }else{
649 put_rac(c, state+0, 1);
650 }
651 }
652
653 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
654 if(get_rac(c, state+0))
655 return 0;
656 else{
657 int i, e, a;
658 e= 0;
659 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
660 e++;
661 }
662
663 a= 1;
664 for(i=e-1; i>=0; i--){
665 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
666 }
667
668 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
669 return (a^e)-e;
670 }
671 }
672
673 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
674 int i;
675 int r= log2>=0 ? 1<<log2 : 1;
676
677 assert(v>=0);
678 assert(log2>=-4);
679
680 while(v >= r){
681 put_rac(c, state+4+log2, 1);
682 v -= r;
683 log2++;
684 if(log2>0) r+=r;
685 }
686 put_rac(c, state+4+log2, 0);
687
688 for(i=log2-1; i>=0; i--){
689 put_rac(c, state+31-i, (v>>i)&1);
690 }
691 }
692
693 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
694 int i;
695 int r= log2>=0 ? 1<<log2 : 1;
696 int v=0;
697
698 assert(log2>=-4);
699
700 while(get_rac(c, state+4+log2)){
701 v+= r;
702 log2++;
703 if(log2>0) r+=r;
704 }
705
706 for(i=log2-1; i>=0; i--){
707 v+= get_rac(c, state+31-i)<<i;
708 }
709
710 return v;
711 }
712
713 static av_always_inline void
714 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
715 int dst_step, int src_step, int ref_step,
716 int width, int mul, int add, int shift,
717 int highpass, int inverse){
718 const int mirror_left= !highpass;
719 const int mirror_right= (width&1) ^ highpass;
720 const int w= (width>>1) - 1 + (highpass & width);
721 int i;
722
723 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
724 if(mirror_left){
725 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
726 dst += dst_step;
727 src += src_step;
728 }
729
730 for(i=0; i<w; i++){
731 dst[i*dst_step] =
732 LIFT(src[i*src_step],
733 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
734 inverse);
735 }
736
737 if(mirror_right){
738 dst[w*dst_step] =
739 LIFT(src[w*src_step],
740 ((mul*2*ref[w*ref_step]+add)>>shift),
741 inverse);
742 }
743 }
744
745 static av_always_inline void
746 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
747 int dst_step, int src_step, int ref_step,
748 int width, int mul, int add, int shift,
749 int highpass, int inverse){
750 const int mirror_left= !highpass;
751 const int mirror_right= (width&1) ^ highpass;
752 const int w= (width>>1) - 1 + (highpass & width);
753 int i;
754
755 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
756 if(mirror_left){
757 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
758 dst += dst_step;
759 src += src_step;
760 }
761
762 for(i=0; i<w; i++){
763 dst[i*dst_step] =
764 LIFT(src[i*src_step],
765 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
766 inverse);
767 }
768
769 if(mirror_right){
770 dst[w*dst_step] =
771 LIFT(src[w*src_step],
772 ((mul*2*ref[w*ref_step]+add)>>shift),
773 inverse);
774 }
775 }
776
777 #ifndef liftS
778 static av_always_inline void
779 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
780 int dst_step, int src_step, int ref_step,
781 int width, int mul, int add, int shift,
782 int highpass, int inverse){
783 const int mirror_left= !highpass;
784 const int mirror_right= (width&1) ^ highpass;
785 const int w= (width>>1) - 1 + (highpass & width);
786 int i;
787
788 assert(shift == 4);
789 #define LIFTS(src, ref, inv) \
790 ((inv) ? \
791 (src) + (((ref) + 4*(src))>>shift): \
792 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
793 if(mirror_left){
794 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
795 dst += dst_step;
796 src += src_step;
797 }
798
799 for(i=0; i<w; i++){
800 dst[i*dst_step] =
801 LIFTS(src[i*src_step],
802 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
803 inverse);
804 }
805
806 if(mirror_right){
807 dst[w*dst_step] =
808 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
809 }
810 }
811 static av_always_inline void
812 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
813 int dst_step, int src_step, int ref_step,
814 int width, int mul, int add, int shift,
815 int highpass, int inverse){
816 const int mirror_left= !highpass;
817 const int mirror_right= (width&1) ^ highpass;
818 const int w= (width>>1) - 1 + (highpass & width);
819 int i;
820
821 assert(shift == 4);
822 #define LIFTS(src, ref, inv) \
823 ((inv) ? \
824 (src) + (((ref) + 4*(src))>>shift): \
825 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
826 if(mirror_left){
827 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
828 dst += dst_step;
829 src += src_step;
830 }
831
832 for(i=0; i<w; i++){
833 dst[i*dst_step] =
834 LIFTS(src[i*src_step],
835 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
836 inverse);
837 }
838
839 if(mirror_right){
840 dst[w*dst_step] =
841 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
842 }
843 }
844 #endif /* ! liftS */
845
846 static void horizontal_decompose53i(DWTELEM *b, int width){
847 DWTELEM temp[width];
848 const int width2= width>>1;
849 int x;
850 const int w2= (width+1)>>1;
851
852 for(x=0; x<width2; x++){
853 temp[x ]= b[2*x ];
854 temp[x+w2]= b[2*x + 1];
855 }
856 if(width&1)
857 temp[x ]= b[2*x ];
858 #if 0
859 {
860 int A1,A2,A3,A4;
861 A2= temp[1 ];
862 A4= temp[0 ];
863 A1= temp[0+width2];
864 A1 -= (A2 + A4)>>1;
865 A4 += (A1 + 1)>>1;
866 b[0+width2] = A1;
867 b[0 ] = A4;
868 for(x=1; x+1<width2; x+=2){
869 A3= temp[x+width2];
870 A4= temp[x+1 ];
871 A3 -= (A2 + A4)>>1;
872 A2 += (A1 + A3 + 2)>>2;
873 b[x+width2] = A3;
874 b[x ] = A2;
875
876 A1= temp[x+1+width2];
877 A2= temp[x+2 ];
878 A1 -= (A2 + A4)>>1;
879 A4 += (A1 + A3 + 2)>>2;
880 b[x+1+width2] = A1;
881 b[x+1 ] = A4;
882 }
883 A3= temp[width-1];
884 A3 -= A2;
885 A2 += (A1 + A3 + 2)>>2;
886 b[width -1] = A3;
887 b[width2-1] = A2;
888 }
889 #else
890 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
891 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
892 #endif /* 0 */
893 }
894
895 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
896 int i;
897
898 for(i=0; i<width; i++){
899 b1[i] -= (b0[i] + b2[i])>>1;
900 }
901 }
902
903 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
904 int i;
905
906 for(i=0; i<width; i++){
907 b1[i] += (b0[i] + b2[i] + 2)>>2;
908 }
909 }
910
911 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
912 int y;
913 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
914 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
915
916 for(y=-2; y<height; y+=2){
917 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
918 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
919
920 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
921 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
922
923 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
924 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
925
926 b0=b2;
927 b1=b3;
928 }
929 }
930
931 static void horizontal_decompose97i(DWTELEM *b, int width){
932 DWTELEM temp[width];
933 const int w2= (width+1)>>1;
934
935 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
936 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
937 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
938 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
939 }
940
941
942 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
943 int i;
944
945 for(i=0; i<width; i++){
946 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
947 }
948 }
949
950 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
951 int i;
952
953 for(i=0; i<width; i++){
954 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
955 }
956 }
957
958 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
959 int i;
960
961 for(i=0; i<width; i++){
962 #ifdef liftS
963 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
964 #else
965 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
966 #endif
967 }
968 }
969
970 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
971 int i;
972
973 for(i=0; i<width; i++){
974 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
975 }
976 }
977
978 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
979 int y;
980 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
981 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
982 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
983 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
984
985 for(y=-4; y<height; y+=2){
986 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
987 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
988
989 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
990 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
991
992 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
993 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
994 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
995 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
996
997 b0=b2;
998 b1=b3;
999 b2=b4;
1000 b3=b5;
1001 }
1002 }
1003
1004 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1005 int level;
1006
1007 for(level=0; level<decomposition_count; level++){
1008 switch(type){
1009 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1010 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1011 }
1012 }
1013 }
1014
1015 static void horizontal_compose53i(IDWTELEM *b, int width){
1016 IDWTELEM temp[width];
1017 const int width2= width>>1;
1018 const int w2= (width+1)>>1;
1019 int x;
1020
1021 #if 0
1022 int A1,A2,A3,A4;
1023 A2= temp[1 ];
1024 A4= temp[0 ];
1025 A1= temp[0+width2];
1026 A1 -= (A2 + A4)>>1;
1027 A4 += (A1 + 1)>>1;
1028 b[0+width2] = A1;
1029 b[0 ] = A4;
1030 for(x=1; x+1<width2; x+=2){
1031 A3= temp[x+width2];
1032 A4= temp[x+1 ];
1033 A3 -= (A2 + A4)>>1;
1034 A2 += (A1 + A3 + 2)>>2;
1035 b[x+width2] = A3;
1036 b[x ] = A2;
1037
1038 A1= temp[x+1+width2];
1039 A2= temp[x+2 ];
1040 A1 -= (A2 + A4)>>1;
1041 A4 += (A1 + A3 + 2)>>2;
1042 b[x+1+width2] = A1;
1043 b[x+1 ] = A4;
1044 }
1045 A3= temp[width-1];
1046 A3 -= A2;
1047 A2 += (A1 + A3 + 2)>>2;
1048 b[width -1] = A3;
1049 b[width2-1] = A2;
1050 #else
1051 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1052 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1053 #endif /* 0 */
1054 for(x=0; x<width2; x++){
1055 b[2*x ]= temp[x ];
1056 b[2*x + 1]= temp[x+w2];
1057 }
1058 if(width&1)
1059 b[2*x ]= temp[x ];
1060 }
1061
1062 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1063 int i;
1064
1065 for(i=0; i<width; i++){
1066 b1[i] += (b0[i] + b2[i])>>1;
1067 }
1068 }
1069
1070 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1071 int i;
1072
1073 for(i=0; i<width; i++){
1074 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1075 }
1076 }
1077
1078 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1079 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1080 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1081 cs->y = -1;
1082 }
1083
1084 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1085 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1086 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1087 cs->y = -1;
1088 }
1089
1090 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1091 int y= cs->y;
1092
1093 IDWTELEM *b0= cs->b0;
1094 IDWTELEM *b1= cs->b1;
1095 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1096 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1097
1098 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1099 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1100
1101 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1102 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1103
1104 cs->b0 = b2;
1105 cs->b1 = b3;
1106 cs->y += 2;
1107 }
1108
1109 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1110 int y= cs->y;
1111 IDWTELEM *b0= cs->b0;
1112 IDWTELEM *b1= cs->b1;
1113 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1114 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1115
1116 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1117 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1118
1119 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1120 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1121
1122 cs->b0 = b2;
1123 cs->b1 = b3;
1124 cs->y += 2;
1125 }
1126
1127 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1128 DWTCompose cs;
1129 spatial_compose53i_init(&cs, buffer, height, stride);
1130 while(cs.y <= height)
1131 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1132 }
1133
1134
1135 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1136 IDWTELEM temp[width];
1137 const int w2= (width+1)>>1;
1138
1139 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1140 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1141 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1142 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1143 }
1144
1145 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1146 int i;
1147
1148 for(i=0; i<width; i++){
1149 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1150 }
1151 }
1152
1153 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1154 int i;
1155
1156 for(i=0; i<width; i++){
1157 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1158 }
1159 }
1160
1161 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1162 int i;
1163
1164 for(i=0; i<width; i++){
1165 #ifdef liftS
1166 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1167 #else
1168 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1169 #endif
1170 }
1171 }
1172
1173 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1174 int i;
1175
1176 for(i=0; i<width; i++){
1177 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1178 }
1179 }
1180
1181 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1182 int i;
1183
1184 for(i=0; i<width; i++){
1185 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1186 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1187 #ifdef liftS
1188 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1189 #else
1190 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1191 #endif
1192 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1193 }
1194 }
1195
1196 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1197 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1198 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1199 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1200 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1201 cs->y = -3;
1202 }
1203
1204 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1205 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1206 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1207 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1208 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1209 cs->y = -3;
1210 }
1211
1212 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1213 int y = cs->y;
1214
1215 IDWTELEM *b0= cs->b0;
1216 IDWTELEM *b1= cs->b1;
1217 IDWTELEM *b2= cs->b2;
1218 IDWTELEM *b3= cs->b3;
1219 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1220 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1221
1222 if(y>0 && y+4<height){
1223 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1224 }else{
1225 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1226 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1227 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1228 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1229 }
1230
1231 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1232 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1233
1234 cs->b0=b2;
1235 cs->b1=b3;
1236 cs->b2=b4;
1237 cs->b3=b5;
1238 cs->y += 2;
1239 }
1240
1241 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1242 int y = cs->y;
1243 IDWTELEM *b0= cs->b0;
1244 IDWTELEM *b1= cs->b1;
1245 IDWTELEM *b2= cs->b2;
1246 IDWTELEM *b3= cs->b3;
1247 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1248 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1249
1250 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1251 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1252 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1253 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1254
1255 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1256 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1257
1258 cs->b0=b2;
1259 cs->b1=b3;
1260 cs->b2=b4;
1261 cs->b3=b5;
1262 cs->y += 2;
1263 }
1264
1265 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1266 DWTCompose cs;
1267 spatial_compose97i_init(&cs, buffer, height, stride);
1268 while(cs.y <= height)
1269 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1270 }
1271
1272 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1273 int level;
1274 for(level=decomposition_count-1; level>=0; level--){
1275 switch(type){
1276 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1277 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1278 }
1279 }
1280 }
1281
1282 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1283 int level;
1284 for(level=decomposition_count-1; level>=0; level--){
1285 switch(type){
1286 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1287 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1288 }
1289 }
1290 }
1291
1292 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1293 const int support = type==1 ? 3 : 5;
1294 int level;
1295 if(type==2) return;
1296
1297 for(level=decomposition_count-1; level>=0; level--){
1298 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1299 switch(type){
1300 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1301 break;
1302 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1303 break;
1304 }
1305 }
1306 }
1307 }
1308
1309 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1310 const int support = type==1 ? 3 : 5;
1311 int level;
1312 if(type==2) return;
1313
1314 for(level=decomposition_count-1; level>=0; level--){
1315 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1316 switch(type){
1317 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1318 break;
1319 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1320 break;
1321 }
1322 }
1323 }
1324 }
1325
1326 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1327 DWTCompose cs[MAX_DECOMPOSITIONS];
1328 int y;
1329 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1330 for(y=0; y<height; y+=4)
1331 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1332 }
1333
1334 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1335 const int w= b->width;
1336 const int h= b->height;
1337 int x, y;
1338
1339 if(1){
1340 int run=0;
1341 int runs[w*h];
1342 int run_index=0;
1343 int max_index;
1344
1345 for(y=0; y<h; y++){
1346 for(x=0; x<w; x++){
1347 int v, p=0;
1348 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1349 v= src[x + y*stride];
1350
1351 if(y){
1352 t= src[x + (y-1)*stride];
1353 if(x){
1354 lt= src[x - 1 + (y-1)*stride];
1355 }
1356 if(x + 1 < w){
1357 rt= src[x + 1 + (y-1)*stride];
1358 }
1359 }
1360 if(x){
1361 l= src[x - 1 + y*stride];
1362 /*if(x > 1){
1363 if(orientation==1) ll= src[y + (x-2)*stride];
1364 else ll= src[x - 2 + y*stride];
1365 }*/
1366 }
1367 if(parent){
1368 int px= x>>1;
1369 int py= y>>1;
1370 if(px<b->parent->width && py<b->parent->height)
1371 p= parent[px + py*2*stride];
1372 }
1373 if(!(/*ll|*/l|lt|t|rt|p)){
1374 if(v){
1375 runs[run_index++]= run;
1376 run=0;
1377 }else{
1378 run++;
1379 }
1380 }
1381 }
1382 }
1383 max_index= run_index;
1384 runs[run_index++]= run;
1385 run_index=0;
1386 run= runs[run_index++];
1387
1388 put_symbol2(&s->c, b->state[30], max_index, 0);
1389 if(run_index <= max_index)
1390 put_symbol2(&s->c, b->state[1], run, 3);
1391
1392 for(y=0; y<h; y++){
1393 if(s->c.bytestream_end - s->c.bytestream < w*40){
1394 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1395 return -1;
1396 }
1397 for(x=0; x<w; x++){
1398 int v, p=0;
1399 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1400 v= src[x + y*stride];
1401
1402 if(y){
1403 t= src[x + (y-1)*stride];
1404 if(x){
1405 lt= src[x - 1 + (y-1)*stride];
1406 }
1407 if(x + 1 < w){
1408 rt= src[x + 1 + (y-1)*stride];
1409 }
1410 }
1411 if(x){
1412 l= src[x - 1 + y*stride];
1413 /*if(x > 1){
1414 if(orientation==1) ll= src[y + (x-2)*stride];
1415 else ll= src[x - 2 + y*stride];
1416 }*/
1417 }
1418 if(parent){
1419 int px= x>>1;
1420 int py= y>>1;
1421 if(px<b->parent->width && py<b->parent->height)
1422 p= parent[px + py*2*stride];
1423 }
1424 if(/*ll|*/l|lt|t|rt|p){
1425 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1426
1427 put_rac(&s->c, &b->state[0][context], !!v);
1428 }else{
1429 if(!run){
1430 run= runs[run_index++];
1431
1432 if(run_index <= max_index)
1433 put_symbol2(&s->c, b->state[1], run, 3);
1434 assert(v);
1435 }else{
1436 run--;
1437 assert(!v);
1438 }
1439 }
1440 if(v){
1441 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1442 int l2= 2*FFABS(l) + (l<0);
1443 int t2= 2*FFABS(t) + (t<0);
1444
1445 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1446 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1447 }
1448 }
1449 }
1450 }
1451 return 0;
1452 }
1453
1454 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1455 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1456 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1457 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1458 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1459 }
1460
1461 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1462 const int w= b->width;
1463 const int h= b->height;
1464 int x,y;
1465
1466 if(1){
1467 int run, runs;
1468 x_and_coeff *xc= b->x_coeff;
1469 x_and_coeff *prev_xc= NULL;
1470 x_and_coeff *prev2_xc= xc;
1471 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1472 x_and_coeff *prev_parent_xc= parent_xc;
1473
1474 runs= get_symbol2(&s->c, b->state[30], 0);
1475 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1476 else run= INT_MAX;
1477
1478 for(y=0; y<h; y++){
1479 int v=0;
1480 int lt=0, t=0, rt=0;
1481
1482 if(y && prev_xc->x == 0){
1483 rt= prev_xc->coeff;
1484 }
1485 for(x=0; x<w; x++){
1486 int p=0;
1487 const int l= v;
1488
1489 lt= t; t= rt;
1490
1491 if(y){
1492 if(prev_xc->x <= x)
1493 prev_xc++;
1494 if(prev_xc->x == x + 1)
1495 rt= prev_xc->coeff;
1496 else
1497 rt=0;
1498 }
1499 if(parent_xc){
1500 if(x>>1 > parent_xc->x){
1501 parent_xc++;
1502 }
1503 if(x>>1 == parent_xc->x){
1504 p= parent_xc->coeff;
1505 }
1506 }
1507 if(/*ll|*/l|lt|t|rt|p){
1508 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1509
1510 v=get_rac(&s->c, &b->state[0][context]);
1511 if(v){
1512 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1513 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1514
1515 xc->x=x;
1516 (xc++)->coeff= v;
1517 }
1518 }else{
1519 if(!run){
1520 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1521 else run= INT_MAX;
1522 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1523 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1524
1525 xc->x=x;
1526 (xc++)->coeff= v;
1527 }else{
1528 int max_run;
1529 run--;
1530 v=0;
1531
1532 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1533 else max_run= FFMIN(run, w-x-1);
1534 if(parent_xc)
1535 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1536 x+= max_run;
1537 run-= max_run;
1538 }
1539 }
1540 }
1541 (xc++)->x= w+1; //end marker
1542 prev_xc= prev2_xc;
1543 prev2_xc= xc;
1544
1545 if(parent_xc){
1546 if(y&1){
1547 while(parent_xc->x != parent->width+1)
1548 parent_xc++;
1549 parent_xc++;
1550 prev_parent_xc= parent_xc;
1551 }else{
1552 parent_xc= prev_parent_xc;
1553 }
1554 }
1555 }
1556
1557 (xc++)->x= w+1; //end marker
1558 }
1559 }
1560
1561 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1562 const int w= b->width;
1563 int y;
1564 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1565 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1566 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1567 int new_index = 0;
1568
1569 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1570 qadd= 0;
1571 qmul= 1<<QEXPSHIFT;
1572 }
1573
1574 /* If we are on the second or later slice, restore our index. */
1575 if (start_y != 0)
1576 new_index = save_state[0];
1577
1578
1579 for(y=start_y; y<h; y++){
1580 int x = 0;
1581 int v;
1582 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1583 memset(line, 0, b->width*sizeof(IDWTELEM));
1584 v = b->x_coeff[new_index].coeff;
1585 x = b->x_coeff[new_index++].x;
1586 while(x < w){
1587 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1588 register int u= -(v&1);
1589 line[x] = (t^u) - u;
1590
1591 v = b->x_coeff[new_index].coeff;
1592 x = b->x_coeff[new_index++].x;
1593 }
1594 }
1595
1596 /* Save our variables for the next slice. */
1597 save_state[0] = new_index;
1598
1599 return;
1600 }
1601
1602 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1603 int plane_index, level, orientation;
1604
1605 for(plane_index=0; plane_index<3; plane_index++){
1606 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1607 for(orientation=level ? 1:0; orientation<4; orientation++){
1608 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1609 }
1610 }
1611 }
1612 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1613 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1614 }
1615
1616 static int alloc_blocks(SnowContext *s){
1617 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1618 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1619
1620 s->b_width = w;
1621 s->b_height= h;
1622
1623 av_free(s->block);
1624 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1625 return 0;
1626 }
1627
1628 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1629 uint8_t *bytestream= d->bytestream;
1630 uint8_t *bytestream_start= d->bytestream_start;
1631 *d= *s;
1632 d->bytestream= bytestream;
1633 d->bytestream_start= bytestream_start;
1634 }
1635
1636 //near copy & paste from dsputil, FIXME
1637 static int pix_sum(uint8_t * pix, int line_size, int w)
1638 {
1639 int s, i, j;
1640
1641 s = 0;
1642 for (i = 0; i < w; i++) {
1643 for (j = 0; j < w; j++) {
1644 s += pix[0];
1645 pix ++;
1646 }
1647 pix += line_size - w;
1648 }
1649 return s;
1650 }
1651
1652 //near copy & paste from dsputil, FIXME
1653 static int pix_norm1(uint8_t * pix, int line_size, int w)
1654 {
1655 int s, i, j;
1656 uint32_t *sq = ff_squareTbl + 256;
1657
1658 s = 0;
1659 for (i = 0; i < w; i++) {
1660 for (j = 0; j < w; j ++) {
1661 s += sq[pix[0]];
1662 pix ++;
1663 }
1664 pix += line_size - w;
1665 }
1666 return s;
1667 }
1668
1669 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1670 const int w= s->b_width << s->block_max_depth;
1671 const int rem_depth= s->block_max_depth - level;
1672 const int index= (x + y*w) << rem_depth;
1673 const int block_w= 1<<rem_depth;
1674 BlockNode block;
1675 int i,j;
1676
1677 block.color[0]= l;
1678 block.color[1]= cb;
1679 block.color[2]= cr;
1680 block.mx= mx;
1681 block.my= my;
1682 block.ref= ref;
1683 block.type= type;
1684 block.level= level;
1685
1686 for(j=0; j<block_w; j++){
1687 for(i=0; i<block_w; i++){
1688 s->block[index + i + j*w]= block;
1689 }
1690 }
1691 }
1692
1693 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1694 const int offset[3]= {
1695 y*c-> stride + x,
1696 ((y*c->uvstride + x)>>1),
1697 ((y*c->uvstride + x)>>1),
1698 };
1699 int i;
1700 for(i=0; i<3; i++){
1701 c->src[0][i]= src [i];
1702 c->ref[0][i]= ref [i] + offset[i];
1703 }
1704 assert(!ref_index);
1705 }
1706
1707 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1708 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1709 if(s->ref_frames == 1){
1710 *mx = mid_pred(left->mx, top->mx, tr->mx);
1711 *my = mid_pred(left->my, top->my, tr->my);
1712 }else{
1713 const int *scale = scale_mv_ref[ref];
1714 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1715 (top ->mx * scale[top ->ref] + 128) >>8,
1716 (tr ->mx * scale[tr ->ref] + 128) >>8);
1717 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1718 (top ->my * scale[top ->ref] + 128) >>8,
1719 (tr ->my * scale[tr ->ref] + 128) >>8);
1720 }
1721 }
1722
1723 //FIXME copy&paste
1724 #define P_LEFT P[1]
1725 #define P_TOP P[2]
1726 #define P_TOPRIGHT P[3]
1727 #define P_MEDIAN P[4]
1728 #define P_MV1 P[9]
1729 #define FLAG_QPEL 1 //must be 1
1730
1731 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1732 uint8_t p_buffer[1024];
1733 uint8_t i_buffer[1024];
1734 uint8_t p_state[sizeof(s->block_state)];
1735 uint8_t i_state[sizeof(s->block_state)];
1736 RangeCoder pc, ic;
1737 uint8_t *pbbak= s->c.bytestream;
1738 uint8_t *pbbak_start= s->c.bytestream_start;
1739 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1740 const int w= s->b_width << s->block_max_depth;
1741 const int h= s->b_height << s->block_max_depth;
1742 const int rem_depth= s->block_max_depth - level;
1743 const int index= (x + y*w) << rem_depth;
1744 const int block_w= 1<<(LOG2_MB_SIZE - level);
1745 int trx= (x+1)<<rem_depth;
1746 int try= (y+1)<<rem_depth;
1747 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1748 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1749 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1750 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1751 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1752 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1753 int pl = left->color[0];
1754 int pcb= left->color[1];
1755 int pcr= left->color[2];
1756 int pmx, pmy;
1757 int mx=0, my=0;
1758 int l,cr,cb;
1759 const int stride= s->current_picture.linesize[0];
1760 const int uvstride= s->current_picture.linesize[1];
1761 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1762 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1763 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1764 int P[10][2];
1765 int16_t last_mv[3][2];
1766 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1767 const int shift= 1+qpel;
1768 MotionEstContext *c= &s->m.me;
1769 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1770 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1771 int my_context= av_log2(2*FFABS(left->my - top->my));
1772 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1773 int ref, best_ref, ref_score, ref_mx, ref_my;
1774
1775 assert(sizeof(s->block_state) >= 256);
1776 if(s->keyframe){
1777 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1778 return 0;
1779 }
1780
1781 // clip predictors / edge ?
1782
1783 P_LEFT[0]= left->mx;
1784 P_LEFT[1]= left->my;
1785 P_TOP [0]= top->mx;
1786 P_TOP [1]= top->my;
1787 P_TOPRIGHT[0]= tr->mx;
1788 P_TOPRIGHT[1]= tr->my;
1789
1790 last_mv[0][0]= s->block[index].mx;
1791 last_mv[0][1]= s->block[index].my;
1792 last_mv[1][0]= right->mx;
1793 last_mv[1][1]= right->my;
1794 last_mv[2][0]= bottom->mx;
1795 last_mv[2][1]= bottom->my;
1796
1797 s->m.mb_stride=2;
1798 s->m.mb_x=
1799 s->m.mb_y= 0;
1800 c->skip= 0;
1801
1802 assert(c-> stride == stride);
1803 assert(c->uvstride == uvstride);
1804
1805 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1806 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1807 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1808 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1809
1810 c->xmin = - x*block_w - 16+3;
1811 c->ymin = - y*block_w - 16+3;
1812 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
1813 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
1814
1815 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1816 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1817 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1818 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1819 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1820 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1821 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1822
1823 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1824 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1825
1826 if (!y) {
1827 c->pred_x= P_LEFT[0];
1828 c->pred_y= P_LEFT[1];
1829 } else {
1830 c->pred_x = P_MEDIAN[0];
1831 c->pred_y = P_MEDIAN[1];
1832 }
1833
1834 score= INT_MAX;
1835 best_ref= 0;
1836 for(ref=0; ref<s->ref_frames; ref++){
1837 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1838
1839 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1840 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1841
1842 assert(ref_mx >= c->xmin);
1843 assert(ref_mx <= c->xmax);
1844 assert(ref_my >= c->ymin);
1845 assert(ref_my <= c->ymax);
1846
1847 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1848 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1849 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1850 if(s->ref_mvs[ref]){
1851 s->ref_mvs[ref][index][0]= ref_mx;
1852 s->ref_mvs[ref][index][1]= ref_my;
1853 s->ref_scores[ref][index]= ref_score;
1854 }
1855 if(score > ref_score){
1856 score= ref_score;
1857 best_ref= ref;
1858 mx= ref_mx;
1859 my= ref_my;
1860 }
1861 }
1862 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1863
1864 // subpel search
1865 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1866 pc= s->c;
1867 pc.bytestream_start=
1868 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1869 memcpy(p_state, s->block_state, sizeof(s->block_state));
1870
1871 if(level!=s->block_max_depth)
1872 put_rac(&pc, &p_state[4 + s_context], 1);
1873 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1874 if(s->ref_frames > 1)
1875 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1876 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1877 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1878 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1879 p_len= pc.bytestream - pc.bytestream_start;
1880 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1881
1882 block_s= block_w*block_w;
1883 sum = pix_sum(current_data[0], stride, block_w);
1884 l= (sum + block_s/2)/block_s;
1885 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1886
1887 block_s= block_w*block_w>>2;
1888 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1889 cb= (sum + block_s/2)/block_s;
1890 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1891 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1892 cr= (sum + block_s/2)/block_s;
1893 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1894
1895 ic= s->c;
1896 ic.bytestream_start=
1897 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1898 memcpy(i_state, s->block_state, sizeof(s->block_state));
1899 if(level!=s->block_max_depth)
1900 put_rac(&ic, &i_state[4 + s_context], 1);
1901 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1902 put_symbol(&ic, &i_state[32], l-pl , 1);
1903 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1904 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1905 i_len= ic.bytestream - ic.bytestream_start;
1906 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1907
1908 // assert(score==256*256*256*64-1);
1909 assert(iscore < 255*255*256 + s->lambda2*10);
1910 assert(iscore >= 0);
1911 assert(l>=0 && l<=255);
1912 assert(pl>=0 && pl<=255);
1913
1914 if(level==0){
1915 int varc= iscore >> 8;
1916 int vard= score >> 8;
1917 if (vard <= 64 || vard < varc)
1918 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1919 else
1920 c->scene_change_score+= s->m.qscale;
1921 }
1922
1923 if(level!=s->block_max_depth){
1924 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1925 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1926 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1927 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1928 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1929 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1930
1931 if(score2 < score && score2 < iscore)
1932 return score2;
1933 }
1934
1935 if(iscore < score){
1936 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1937 memcpy(pbbak, i_buffer, i_len);
1938 s->c= ic;
1939 s->c.bytestream_start= pbbak_start;
1940 s->c.bytestream= pbbak + i_len;
1941 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1942 memcpy(s->block_state, i_state, sizeof(s->block_state));
1943 return iscore;
1944 }else{
1945 memcpy(pbbak, p_buffer, p_len);
1946 s->c= pc;
1947 s->c.bytestream_start= pbbak_start;
1948 s->c.bytestream= pbbak + p_len;
1949 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1950 memcpy(s->block_state, p_state, sizeof(s->block_state));
1951 return score;
1952 }
1953 }
1954
1955 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1956 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1957 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1958 }else{
1959 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1960 }
1961 }
1962
1963 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1964 const int w= s->b_width << s->block_max_depth;
1965 const int rem_depth= s->block_max_depth - level;
1966 const int index= (x + y*w) << rem_depth;
1967 int trx= (x+1)<<rem_depth;
1968 BlockNode *b= &s->block[index];
1969 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1970 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1971 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1972 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1973 int pl = left->color[0];
1974 int pcb= left->color[1];
1975 int pcr= left->color[2];
1976 int pmx, pmy;
1977 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1978 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1979 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1980 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1981
1982 if(s->keyframe){
1983 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1984 return;
1985 }
1986
1987 if(level!=s->block_max_depth){
1988 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1989 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1990 }else{
1991 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1992 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1993 encode_q_branch2(s, level+1, 2*x