Move ff_sqrt() to libavutil/intmath.h
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/intmath.h"
22 #include "avcodec.h"
23 #include "dsputil.h"
24 #include "snow.h"
25
26 #include "rangecoder.h"
27 #include "mathops.h"
28
29 #include "mpegvideo.h"
30 #include "h263.h"
31
32 #undef NDEBUG
33 #include <assert.h>
34
35 static const int8_t quant3[256]={
36 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52 };
53 static const int8_t quant3b[256]={
54 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 };
71 static const int8_t quant3bA[256]={
72 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 };
89 static const int8_t quant5[256]={
90 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
106 };
107 static const int8_t quant7[256]={
108 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
120 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
122 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
123 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
124 };
125 static const int8_t quant9[256]={
126 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
127 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
141 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
142 };
143 static const int8_t quant11[256]={
144 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
145 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
146 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
156 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
158 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
159 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
160 };
161 static const int8_t quant13[256]={
162 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
163 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
164 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
165 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
169 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
173 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
174 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
175 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
176 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
177 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 };
179
180 #if 0 //64*cubic
181 static const uint8_t obmc32[1024]={
182 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
183 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
184 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
185 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
186 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
187 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
188 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
189 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
190 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
191 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
192 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
193 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
194 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
195 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
198 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
199 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
200 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
201 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
202 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
203 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
204 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
205 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
206 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
207 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
208 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
209 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
210 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
211 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
212 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
213 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 //error:0.000022
215 };
216 static const uint8_t obmc16[256]={
217 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
218 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
219 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
220 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
221 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
222 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
225 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
226 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
227 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
228 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
229 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
230 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
231 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
232 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
233 //error:0.000033
234 };
235 #elif 1 // 64*linear
236 static const uint8_t obmc32[1024]={
237 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
238 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
239 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
240 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
241 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
242 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
243 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
244 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
245 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
246 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
247 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
248 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
249 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
250 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
253 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
254 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
255 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
256 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
257 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
258 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
259 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
260 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
261 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
262 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
263 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
264 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
265 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
266 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
267 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
268 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
269 //error:0.000020
270 };
271 static const uint8_t obmc16[256]={
272 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
273 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
274 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
275 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
276 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
277 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
280 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
281 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
282 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
283 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
284 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
285 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
286 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
287 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 //error:0.000015
289 };
290 #else //64*cos
291 static const uint8_t obmc32[1024]={
292 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
293 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
295 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
296 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
297 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
298 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
299 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
300 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
301 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
302 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
303 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
304 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
305 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
308 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
309 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
310 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
311 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
312 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
313 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
314 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
315 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
316 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
317 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
318 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
319 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
320 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
321 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
322 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
323 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 //error:0.000022
325 };
326 static const uint8_t obmc16[256]={
327 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
328 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
329 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
330 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
331 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
332 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
335 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
336 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
337 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
338 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
339 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
340 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
341 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
342 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
343 //error:0.000022
344 };
345 #endif /* 0 */
346
347 //linear *64
348 static const uint8_t obmc8[64]={
349 4, 12, 20, 28, 28, 20, 12, 4,
350 12, 36, 60, 84, 84, 60, 36, 12,
351 20, 60,100,140,140,100, 60, 20,
352 28, 84,140,196,196,140, 84, 28,
353 28, 84,140,196,196,140, 84, 28,
354 20, 60,100,140,140,100, 60, 20,
355 12, 36, 60, 84, 84, 60, 36, 12,
356 4, 12, 20, 28, 28, 20, 12, 4,
357 //error:0.000000
358 };
359
360 //linear *64
361 static const uint8_t obmc4[16]={
362 16, 48, 48, 16,
363 48,144,144, 48,
364 48,144,144, 48,
365 16, 48, 48, 16,
366 //error:0.000000
367 };
368
369 static const uint8_t * const obmc_tab[4]={
370 obmc32, obmc16, obmc8, obmc4
371 };
372
373 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
374
375 typedef struct BlockNode{
376 int16_t mx;
377 int16_t my;
378 uint8_t ref;
379 uint8_t color[3];
380 uint8_t type;
381 //#define TYPE_SPLIT 1
382 #define BLOCK_INTRA 1
383 #define BLOCK_OPT 2
384 //#define TYPE_NOCOLOR 4
385 uint8_t level; //FIXME merge into type?
386 }BlockNode;
387
388 static const BlockNode null_block= { //FIXME add border maybe
389 .color= {128,128,128},
390 .mx= 0,
391 .my= 0,
392 .ref= 0,
393 .type= 0,
394 .level= 0,
395 };
396
397 #define LOG2_MB_SIZE 4
398 #define MB_SIZE (1<<LOG2_MB_SIZE)
399 #define ENCODER_EXTRA_BITS 4
400 #define HTAPS_MAX 8
401
402 typedef struct x_and_coeff{
403 int16_t x;
404 uint16_t coeff;
405 } x_and_coeff;
406
407 typedef struct SubBand{
408 int level;
409 int stride;
410 int width;
411 int height;
412 int qlog; ///< log(qscale)/log[2^(1/6)]
413 DWTELEM *buf;
414 IDWTELEM *ibuf;
415 int buf_x_offset;
416 int buf_y_offset;
417 int stride_line; ///< Stride measured in lines, not pixels.
418 x_and_coeff * x_coeff;
419 struct SubBand *parent;
420 uint8_t state[/*7*2*/ 7 + 512][32];
421 }SubBand;
422
423 typedef struct Plane{
424 int width;
425 int height;
426 SubBand band[MAX_DECOMPOSITIONS][4];
427
428 int htaps;
429 int8_t hcoeff[HTAPS_MAX/2];
430 int diag_mc;
431 int fast_mc;
432
433 int last_htaps;
434 int8_t last_hcoeff[HTAPS_MAX/2];
435 int last_diag_mc;
436 }Plane;
437
438 typedef struct SnowContext{
439
440 AVCodecContext *avctx;
441 RangeCoder c;
442 DSPContext dsp;
443 AVFrame new_picture;
444 AVFrame input_picture; ///< new_picture with the internal linesizes
445 AVFrame current_picture;
446 AVFrame last_picture[MAX_REF_FRAMES];
447 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
448 AVFrame mconly_picture;
449 // uint8_t q_context[16];
450 uint8_t header_state[32];
451 uint8_t block_state[128 + 32*128];
452 int keyframe;
453 int always_reset;
454 int version;
455 int spatial_decomposition_type;
456 int last_spatial_decomposition_type;
457 int temporal_decomposition_type;
458 int spatial_decomposition_count;
459 int last_spatial_decomposition_count;
460 int temporal_decomposition_count;
461 int max_ref_frames;
462 int ref_frames;
463 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
464 uint32_t *ref_scores[MAX_REF_FRAMES];
465 DWTELEM *spatial_dwt_buffer;
466 IDWTELEM *spatial_idwt_buffer;
467 int colorspace_type;
468 int chroma_h_shift;
469 int chroma_v_shift;
470 int spatial_scalability;
471 int qlog;
472 int last_qlog;
473 int lambda;
474 int lambda2;
475 int pass1_rc;
476 int mv_scale;
477 int last_mv_scale;
478 int qbias;
479 int last_qbias;
480 #define QBIAS_SHIFT 3
481 int b_width;
482 int b_height;
483 int block_max_depth;
484 int last_block_max_depth;
485 Plane plane[MAX_PLANES];
486 BlockNode *block;
487 #define ME_CACHE_SIZE 1024
488 int me_cache[ME_CACHE_SIZE];
489 int me_cache_generation;
490 slice_buffer sb;
491
492 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
493
494 uint8_t *scratchbuf;
495 }SnowContext;
496
497 typedef struct {
498 IDWTELEM *b0;
499 IDWTELEM *b1;
500 IDWTELEM *b2;
501 IDWTELEM *b3;
502 int y;
503 } DWTCompose;
504
505 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
506 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
507
508 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
509 {
510 int i;
511
512 buf->base_buffer = base_buffer;
513 buf->line_count = line_count;
514 buf->line_width = line_width;
515 buf->data_count = max_allocated_lines;
516 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
517 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
518
519 for(i = 0; i < max_allocated_lines; i++){
520 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
521 }
522
523 buf->data_stack_top = max_allocated_lines - 1;
524 }
525
526 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
527 {
528 IDWTELEM * buffer;
529
530 assert(buf->data_stack_top >= 0);
531 // assert(!buf->line[line]);
532 if (buf->line[line])
533 return buf->line[line];
534
535 buffer = buf->data_stack[buf->data_stack_top];
536 buf->data_stack_top--;
537 buf->line[line] = buffer;
538
539 return buffer;
540 }
541
542 static void slice_buffer_release(slice_buffer * buf, int line)
543 {
544 IDWTELEM * buffer;
545
546 assert(line >= 0 && line < buf->line_count);
547 assert(buf->line[line]);
548
549 buffer = buf->line[line];
550 buf->data_stack_top++;
551 buf->data_stack[buf->data_stack_top] = buffer;
552 buf->line[line] = NULL;
553 }
554
555 static void slice_buffer_flush(slice_buffer * buf)
556 {
557 int i;
558 for(i = 0; i < buf->line_count; i++){
559 if (buf->line[i])
560 slice_buffer_release(buf, i);
561 }
562 }
563
564 static void slice_buffer_destroy(slice_buffer * buf)
565 {
566 int i;
567 slice_buffer_flush(buf);
568
569 for(i = buf->data_count - 1; i >= 0; i--){
570 av_freep(&buf->data_stack[i]);
571 }
572 av_freep(&buf->data_stack);
573 av_freep(&buf->line);
574 }
575
576 #ifdef __sgi
577 // Avoid a name clash on SGI IRIX
578 #undef qexp
579 #endif
580 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
581 static uint8_t qexp[QROOT];
582
583 static inline int mirror(int v, int m){
584 while((unsigned)v > (unsigned)m){
585 v=-v;
586 if(v<0) v+= 2*m;
587 }
588 return v;
589 }
590
591 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
592 int i;
593
594 if(v){
595 const int a= FFABS(v);
596 const int e= av_log2(a);
597 #if 1
598 const int el= FFMIN(e, 10);
599 put_rac(c, state+0, 0);
600
601 for(i=0; i<el; i++){
602 put_rac(c, state+1+i, 1); //1..10
603 }
604 for(; i<e; i++){
605 put_rac(c, state+1+9, 1); //1..10
606 }
607 put_rac(c, state+1+FFMIN(i,9), 0);
608
609 for(i=e-1; i>=el; i--){
610 put_rac(c, state+22+9, (a>>i)&1); //22..31
611 }
612 for(; i>=0; i--){
613 put_rac(c, state+22+i, (a>>i)&1); //22..31
614 }
615
616 if(is_signed)
617 put_rac(c, state+11 + el, v < 0); //11..21
618 #else
619
620 put_rac(c, state+0, 0);
621 if(e<=9){
622 for(i=0; i<e; i++){
623 put_rac(c, state+1+i, 1); //1..10
624 }
625 put_rac(c, state+1+i, 0);
626
627 for(i=e-1; i>=0; i--){
628 put_rac(c, state+22+i, (a>>i)&1); //22..31
629 }
630
631 if(is_signed)
632 put_rac(c, state+11 + e, v < 0); //11..21
633 }else{
634 for(i=0; i<e; i++){
635 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
636 }
637 put_rac(c, state+1+9, 0);
638
639 for(i=e-1; i>=0; i--){
640 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
641 }
642
643 if(is_signed)
644 put_rac(c, state+11 + 10, v < 0); //11..21
645 }
646 #endif /* 1 */
647 }else{
648 put_rac(c, state+0, 1);
649 }
650 }
651
652 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
653 if(get_rac(c, state+0))
654 return 0;
655 else{
656 int i, e, a;
657 e= 0;
658 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
659 e++;
660 }
661
662 a= 1;
663 for(i=e-1; i>=0; i--){
664 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
665 }
666
667 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
668 return (a^e)-e;
669 }
670 }
671
672 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
673 int i;
674 int r= log2>=0 ? 1<<log2 : 1;
675
676 assert(v>=0);
677 assert(log2>=-4);
678
679 while(v >= r){
680 put_rac(c, state+4+log2, 1);
681 v -= r;
682 log2++;
683 if(log2>0) r+=r;
684 }
685 put_rac(c, state+4+log2, 0);
686
687 for(i=log2-1; i>=0; i--){
688 put_rac(c, state+31-i, (v>>i)&1);
689 }
690 }
691
692 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
693 int i;
694 int r= log2>=0 ? 1<<log2 : 1;
695 int v=0;
696
697 assert(log2>=-4);
698
699 while(get_rac(c, state+4+log2)){
700 v+= r;
701 log2++;
702 if(log2>0) r+=r;
703 }
704
705 for(i=log2-1; i>=0; i--){
706 v+= get_rac(c, state+31-i)<<i;
707 }
708
709 return v;
710 }
711
712 static av_always_inline void
713 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
714 int dst_step, int src_step, int ref_step,
715 int width, int mul, int add, int shift,
716 int highpass, int inverse){
717 const int mirror_left= !highpass;
718 const int mirror_right= (width&1) ^ highpass;
719 const int w= (width>>1) - 1 + (highpass & width);
720 int i;
721
722 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
723 if(mirror_left){
724 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
725 dst += dst_step;
726 src += src_step;
727 }
728
729 for(i=0; i<w; i++){
730 dst[i*dst_step] =
731 LIFT(src[i*src_step],
732 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
733 inverse);
734 }
735
736 if(mirror_right){
737 dst[w*dst_step] =
738 LIFT(src[w*src_step],
739 ((mul*2*ref[w*ref_step]+add)>>shift),
740 inverse);
741 }
742 }
743
744 static av_always_inline void
745 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
746 int dst_step, int src_step, int ref_step,
747 int width, int mul, int add, int shift,
748 int highpass, int inverse){
749 const int mirror_left= !highpass;
750 const int mirror_right= (width&1) ^ highpass;
751 const int w= (width>>1) - 1 + (highpass & width);
752 int i;
753
754 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
755 if(mirror_left){
756 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
757 dst += dst_step;
758 src += src_step;
759 }
760
761 for(i=0; i<w; i++){
762 dst[i*dst_step] =
763 LIFT(src[i*src_step],
764 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
765 inverse);
766 }
767
768 if(mirror_right){
769 dst[w*dst_step] =
770 LIFT(src[w*src_step],
771 ((mul*2*ref[w*ref_step]+add)>>shift),
772 inverse);
773 }
774 }
775
776 #ifndef liftS
777 static av_always_inline void
778 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
779 int dst_step, int src_step, int ref_step,
780 int width, int mul, int add, int shift,
781 int highpass, int inverse){
782 const int mirror_left= !highpass;
783 const int mirror_right= (width&1) ^ highpass;
784 const int w= (width>>1) - 1 + (highpass & width);
785 int i;
786
787 assert(shift == 4);
788 #define LIFTS(src, ref, inv) \
789 ((inv) ? \
790 (src) + (((ref) + 4*(src))>>shift): \
791 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
792 if(mirror_left){
793 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
794 dst += dst_step;
795 src += src_step;
796 }
797
798 for(i=0; i<w; i++){
799 dst[i*dst_step] =
800 LIFTS(src[i*src_step],
801 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
802 inverse);
803 }
804
805 if(mirror_right){
806 dst[w*dst_step] =
807 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
808 }
809 }
810 static av_always_inline void
811 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
812 int dst_step, int src_step, int ref_step,
813 int width, int mul, int add, int shift,
814 int highpass, int inverse){
815 const int mirror_left= !highpass;
816 const int mirror_right= (width&1) ^ highpass;
817 const int w= (width>>1) - 1 + (highpass & width);
818 int i;
819
820 assert(shift == 4);
821 #define LIFTS(src, ref, inv) \
822 ((inv) ? \
823 (src) + (((ref) + 4*(src))>>shift): \
824 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
825 if(mirror_left){
826 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
827 dst += dst_step;
828 src += src_step;
829 }
830
831 for(i=0; i<w; i++){
832 dst[i*dst_step] =
833 LIFTS(src[i*src_step],
834 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
835 inverse);
836 }
837
838 if(mirror_right){
839 dst[w*dst_step] =
840 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
841 }
842 }
843 #endif /* ! liftS */
844
845 static void horizontal_decompose53i(DWTELEM *b, int width){
846 DWTELEM temp[width];
847 const int width2= width>>1;
848 int x;
849 const int w2= (width+1)>>1;
850
851 for(x=0; x<width2; x++){
852 temp[x ]= b[2*x ];
853 temp[x+w2]= b[2*x + 1];
854 }
855 if(width&1)
856 temp[x ]= b[2*x ];
857 #if 0
858 {
859 int A1,A2,A3,A4;
860 A2= temp[1 ];
861 A4= temp[0 ];
862 A1= temp[0+width2];
863 A1 -= (A2 + A4)>>1;
864 A4 += (A1 + 1)>>1;
865 b[0+width2] = A1;
866 b[0 ] = A4;
867 for(x=1; x+1<width2; x+=2){
868 A3= temp[x+width2];
869 A4= temp[x+1 ];
870 A3 -= (A2 + A4)>>1;
871 A2 += (A1 + A3 + 2)>>2;
872 b[x+width2] = A3;
873 b[x ] = A2;
874
875 A1= temp[x+1+width2];
876 A2= temp[x+2 ];
877 A1 -= (A2 + A4)>>1;
878 A4 += (A1 + A3 + 2)>>2;
879 b[x+1+width2] = A1;
880 b[x+1 ] = A4;
881 }
882 A3= temp[width-1];
883 A3 -= A2;
884 A2 += (A1 + A3 + 2)>>2;
885 b[width -1] = A3;
886 b[width2-1] = A2;
887 }
888 #else
889 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
890 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
891 #endif /* 0 */
892 }
893
894 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
895 int i;
896
897 for(i=0; i<width; i++){
898 b1[i] -= (b0[i] + b2[i])>>1;
899 }
900 }
901
902 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
903 int i;
904
905 for(i=0; i<width; i++){
906 b1[i] += (b0[i] + b2[i] + 2)>>2;
907 }
908 }
909
910 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
911 int y;
912 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
913 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
914
915 for(y=-2; y<height; y+=2){
916 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
917 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
918
919 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
920 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
921
922 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
923 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
924
925 b0=b2;
926 b1=b3;
927 }
928 }
929
930 static void horizontal_decompose97i(DWTELEM *b, int width){
931 DWTELEM temp[width];
932 const int w2= (width+1)>>1;
933
934 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
935 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
936 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
937 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
938 }
939
940
941 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
942 int i;
943
944 for(i=0; i<width; i++){
945 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
946 }
947 }
948
949 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
950 int i;
951
952 for(i=0; i<width; i++){
953 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
954 }
955 }
956
957 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
958 int i;
959
960 for(i=0; i<width; i++){
961 #ifdef liftS
962 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
963 #else
964 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
965 #endif
966 }
967 }
968
969 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
970 int i;
971
972 for(i=0; i<width; i++){
973 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
974 }
975 }
976
977 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
978 int y;
979 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
980 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
981 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
982 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
983
984 for(y=-4; y<height; y+=2){
985 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
986 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
987
988 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
989 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
990
991 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
992 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
993 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
994 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
995
996 b0=b2;
997 b1=b3;
998 b2=b4;
999 b3=b5;
1000 }
1001 }
1002
1003 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1004 int level;
1005
1006 for(level=0; level<decomposition_count; level++){
1007 switch(type){
1008 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1009 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1010 }
1011 }
1012 }
1013
1014 static void horizontal_compose53i(IDWTELEM *b, int width){
1015 IDWTELEM temp[width];
1016 const int width2= width>>1;
1017 const int w2= (width+1)>>1;
1018 int x;
1019
1020 for(x=0; x<width2; x++){
1021 temp[2*x ]= b[x ];
1022 temp[2*x + 1]= b[x+w2];
1023 }
1024 if(width&1)
1025 temp[2*x ]= b[x ];
1026
1027 b[0] = temp[0] - ((temp[1]+1)>>1);
1028 for(x=2; x<width-1; x+=2){
1029 b[x ] = temp[x ] - ((temp[x-1] + temp[x+1]+2)>>2);
1030 b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1);
1031 }
1032 if(width&1){
1033 b[x ] = temp[x ] - ((temp[x-1]+1)>>1);
1034 b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1);
1035 }else
1036 b[x-1] = temp[x-1] + b[x-2];
1037 }
1038
1039 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1040 int i;
1041
1042 for(i=0; i<width; i++){
1043 b1[i] += (b0[i] + b2[i])>>1;
1044 }
1045 }
1046
1047 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1048 int i;
1049
1050 for(i=0; i<width; i++){
1051 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1052 }
1053 }
1054
1055 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1056 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1057 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1058 cs->y = -1;
1059 }
1060
1061 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1062 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1063 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1064 cs->y = -1;
1065 }
1066
1067 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1068 int y= cs->y;
1069
1070 IDWTELEM *b0= cs->b0;
1071 IDWTELEM *b1= cs->b1;
1072 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1073 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1074
1075 if(y+1<(unsigned)height && y<(unsigned)height){
1076 int x;
1077
1078 for(x=0; x<width; x++){
1079 b2[x] -= (b1[x] + b3[x] + 2)>>2;
1080 b1[x] += (b0[x] + b2[x])>>1;
1081 }
1082 }else{
1083 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1084 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1085 }
1086
1087 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1088 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1089
1090 cs->b0 = b2;
1091 cs->b1 = b3;
1092 cs->y += 2;
1093 }
1094
1095 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1096 int y= cs->y;
1097 IDWTELEM *b0= cs->b0;
1098 IDWTELEM *b1= cs->b1;
1099 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1100 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1101
1102 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1103 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1104
1105 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1106 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1107
1108 cs->b0 = b2;
1109 cs->b1 = b3;
1110 cs->y += 2;
1111 }
1112
1113 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1114 DWTCompose cs;
1115 spatial_compose53i_init(&cs, buffer, height, stride);
1116 while(cs.y <= height)
1117 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1118 }
1119
1120
1121 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1122 IDWTELEM temp[width];
1123 const int w2= (width+1)>>1;
1124
1125 #if 0 //maybe more understadable but slower
1126 inv_lift (temp , b , b +w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1127 inv_lift (temp+1 , b +w2, temp , 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1);
1128
1129 inv_liftS(b , temp , temp+1 , 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1);
1130 inv_lift (b+1 , temp+1 , b , 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0);
1131 #else
1132 int x;
1133 temp[0] = b[0] - ((3*b[w2]+2)>>2);
1134 for(x=1; x<(width>>1); x++){
1135 temp[2*x ] = b[x ] - ((3*(b [x+w2-1] + b[x+w2])+4)>>3);
1136 temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
1137 }
1138 if(width&1){
1139 temp[2*x ] = b[x ] - ((3*b [x+w2-1]+2)>>2);
1140 temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
1141 }else
1142 temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2];
1143
1144 b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3);
1145 for(x=2; x<width-1; x+=2){
1146 b[x ] = temp[x ] + ((4*temp[x ] + temp[x-1] + temp[x+1]+8)>>4);
1147 b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
1148 }
1149 if(width&1){
1150 b[x ] = temp[x ] + ((2*temp[x ] + temp[x-1]+4)>>3);
1151 b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
1152 }else
1153 b[x-1] = temp[x-1] + 3*b [x-2];
1154 #endif
1155 }
1156
1157 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1158 int i;
1159
1160 for(i=0; i<width; i++){
1161 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1162 }
1163 }
1164
1165 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1166 int i;
1167
1168 for(i=0; i<width; i++){
1169 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1170 }
1171 }
1172
1173 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1174 int i;
1175
1176 for(i=0; i<width; i++){
1177 #ifdef liftS
1178 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1179 #else
1180 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1181 #endif
1182 }
1183 }
1184
1185 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1186 int i;
1187
1188 for(i=0; i<width; i++){
1189 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1190 }
1191 }
1192
1193 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1194 int i;
1195
1196 for(i=0; i<width; i++){
1197 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1198 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1199 #ifdef liftS
1200 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1201 #else
1202 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1203 #endif
1204 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1205 }
1206 }
1207
1208 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1209 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1210 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1211 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1212 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1213 cs->y = -3;
1214 }
1215
1216 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1217 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1218 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1219 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1220 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1221 cs->y = -3;
1222 }
1223
1224 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1225 int y = cs->y;
1226
1227 IDWTELEM *b0= cs->b0;
1228 IDWTELEM *b1= cs->b1;
1229 IDWTELEM *b2= cs->b2;
1230 IDWTELEM *b3= cs->b3;
1231 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1232 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1233
1234 if(y>0 && y+4<height){
1235 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1236 }else{
1237 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1238 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1239 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1240 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1241 }
1242
1243 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1244 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1245
1246 cs->b0=b2;
1247 cs->b1=b3;
1248 cs->b2=b4;
1249 cs->b3=b5;
1250 cs->y += 2;
1251 }
1252
1253 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1254 int y = cs->y;
1255 IDWTELEM *b0= cs->b0;
1256 IDWTELEM *b1= cs->b1;
1257 IDWTELEM *b2= cs->b2;
1258 IDWTELEM *b3= cs->b3;
1259 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1260 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1261
1262 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1263 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1264 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1265 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1266
1267 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1268 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1269
1270 cs->b0=b2;
1271 cs->b1=b3;
1272 cs->b2=b4;
1273 cs->b3=b5;
1274 cs->y += 2;
1275 }
1276
1277 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1278 DWTCompose cs;
1279 spatial_compose97i_init(&cs, buffer, height, stride);
1280 while(cs.y <= height)
1281 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1282 }
1283
1284 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1285 int level;
1286 for(level=decomposition_count-1; level>=0; level--){
1287 switch(type){
1288 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1289 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1290 }
1291 }
1292 }
1293
1294 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1295 const int support = type==1 ? 3 : 5;
1296 int level;
1297 if(type==2) return;
1298
1299 for(level=decomposition_count-1; level>=0; level--){
1300 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1301 switch(type){
1302 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1303 break;
1304 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1305 break;
1306 }
1307 }
1308 }
1309 }
1310
1311 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1312 const int w= b->width;
1313 const int h= b->height;
1314 int x,y;
1315
1316 int run, runs;
1317 x_and_coeff *xc= b->x_coeff;
1318 x_and_coeff *prev_xc= NULL;
1319 x_and_coeff *prev2_xc= xc;
1320 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1321 x_and_coeff *prev_parent_xc= parent_xc;
1322
1323 runs= get_symbol2(&s->c, b->state[30], 0);
1324 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1325 else run= INT_MAX;
1326
1327 for(y=0; y<h; y++){
1328 int v=0;
1329 int lt=0, t=0, rt=0;
1330
1331 if(y && prev_xc->x == 0){
1332 rt= prev_xc->coeff;
1333 }
1334 for(x=0; x<w; x++){
1335 int p=0;
1336 const int l= v;
1337
1338 lt= t; t= rt;
1339
1340 if(y){
1341 if(prev_xc->x <= x)
1342 prev_xc++;
1343 if(prev_xc->x == x + 1)
1344 rt= prev_xc->coeff;
1345 else
1346 rt=0;
1347 }
1348 if(parent_xc){
1349 if(x>>1 > parent_xc->x){
1350 parent_xc++;
1351 }
1352 if(x>>1 == parent_xc->x){
1353 p= parent_xc->coeff;
1354 }
1355 }
1356 if(/*ll|*/l|lt|t|rt|p){
1357 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1358
1359 v=get_rac(&s->c, &b->state[0][context]);
1360 if(v){
1361 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1362 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1363
1364 xc->x=x;
1365 (xc++)->coeff= v;
1366 }
1367 }else{
1368 if(!run){
1369 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1370 else run= INT_MAX;
1371 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1372 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1373
1374 xc->x=x;
1375 (xc++)->coeff= v;
1376 }else{
1377 int max_run;
1378 run--;
1379 v=0;
1380
1381 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1382 else max_run= FFMIN(run, w-x-1);
1383 if(parent_xc)
1384 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1385 x+= max_run;
1386 run-= max_run;
1387 }
1388 }
1389 }
1390 (xc++)->x= w+1; //end marker
1391 prev_xc= prev2_xc;
1392 prev2_xc= xc;
1393
1394 if(parent_xc){
1395 if(y&1){
1396 while(parent_xc->x != parent->width+1)
1397 parent_xc++;
1398 parent_xc++;
1399 prev_parent_xc= parent_xc;
1400 }else{
1401 parent_xc= prev_parent_xc;
1402 }
1403 }
1404 }
1405
1406 (xc++)->x= w+1; //end marker
1407 }
1408
1409 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1410 const int w= b->width;
1411 int y;
1412 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1413 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1414 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1415 int new_index = 0;
1416
1417 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1418 qadd= 0;
1419 qmul= 1<<QEXPSHIFT;
1420 }
1421
1422 /* If we are on the second or later slice, restore our index. */
1423 if (start_y != 0)
1424 new_index = save_state[0];
1425
1426
1427 for(y=start_y; y<h; y++){
1428 int x = 0;
1429 int v;
1430 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1431 memset(line, 0, b->width*sizeof(IDWTELEM));
1432 v = b->x_coeff[new_index].coeff;
1433 x = b->x_coeff[new_index++].x;
1434 while(x < w){
1435 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1436 register int u= -(v&1);
1437 line[x] = (t^u) - u;
1438
1439 v = b->x_coeff[new_index].coeff;
1440 x = b->x_coeff[new_index++].x;
1441 }
1442 }
1443
1444 /* Save our variables for the next slice. */
1445 save_state[0] = new_index;
1446
1447 return;
1448 }
1449
1450 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1451 int plane_index, level, orientation;
1452
1453 for(plane_index=0; plane_index<3; plane_index++){
1454 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1455 for(orientation=level ? 1:0; orientation<4; orientation++){
1456 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1457 }
1458 }
1459 }
1460 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1461 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1462 }
1463
1464 static int alloc_blocks(SnowContext *s){
1465 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1466 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1467
1468 s->b_width = w;
1469 s->b_height= h;
1470
1471 av_free(s->block);
1472 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1473 return 0;
1474 }
1475
1476 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1477 uint8_t *bytestream= d->bytestream;
1478 uint8_t *bytestream_start= d->bytestream_start;
1479 *d= *s;
1480 d->bytestream= bytestream;
1481 d->bytestream_start= bytestream_start;
1482 }
1483
1484 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1485 const int w= s->b_width << s->block_max_depth;
1486 const int rem_depth= s->block_max_depth - level;
1487 const int index= (x + y*w) << rem_depth;
1488 const int block_w= 1<<rem_depth;
1489 BlockNode block;
1490 int i,j;
1491
1492 block.color[0]= l;
1493 block.color[1]= cb;
1494 block.color[2]= cr;
1495 block.mx= mx;
1496 block.my= my;
1497 block.ref= ref;
1498 block.type= type;
1499 block.level= level;
1500
1501 for(j=0; j<block_w; j++){
1502 for(i=0; i<block_w; i++){
1503 s->block[index + i + j*w]= block;
1504 }
1505 }
1506 }
1507
1508 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1509 const int offset[3]= {
1510 y*c-> stride + x,
1511 ((y*c->uvstride + x)>>1),
1512 ((y*c->uvstride + x)>>1),
1513 };
1514 int i;
1515 for(i=0; i<3; i++){
1516 c->src[0][i]= src [i];
1517 c->ref[0][i]= ref [i] + offset[i];
1518 }
1519 assert(!ref_index);
1520 }
1521
1522 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1523 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1524 if(s->ref_frames == 1){
1525 *mx = mid_pred(left->mx, top->mx, tr->mx);
1526 *my = mid_pred(left->my, top->my, tr->my);
1527 }else{
1528 const int *scale = scale_mv_ref[ref];
1529 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1530 (top ->mx * scale[top ->ref] + 128) >>8,
1531 (tr ->mx * scale[tr ->ref] + 128) >>8);
1532 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1533 (top ->my * scale[top ->ref] + 128) >>8,
1534 (tr ->my * scale[tr ->ref] + 128) >>8);
1535 }
1536 }
1537
1538 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1539 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1540 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1541 }else{
1542 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1543 }
1544 }
1545
1546 static void decode_q_branch(SnowContext *s, int level, int x, int y){
1547 const int w= s->b_width << s->block_max_depth;
1548 const int rem_depth= s->block_max_depth - level;
1549 const int index= (x + y*w) << rem_depth;
1550 int trx= (x+1)<<rem_depth;
1551 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1552 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1553 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1554 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1555 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1556
1557 if(s->keyframe){
1558 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
1559 return;
1560 }
1561
1562 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1563 int type, mx, my;
1564 int l = left->color[0];
1565 int cb= left->color[1];
1566 int cr= left->color[2];
1567 int ref = 0;
1568 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1569 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
1570 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
1571
1572 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1573
1574 if(type){
1575 pred_mv(s, &mx, &my, 0, left, top, tr);
1576 l += get_symbol(&s->c, &s->block_state[32], 1);
1577 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1578 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1579 }else{
1580 if(s->ref_frames > 1)
1581 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
1582 pred_mv(s, &mx, &my, ref, left, top, tr);
1583 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
1584 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
1585 }
1586 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
1587 }else{
1588 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1589 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1590 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1591 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1592 }
1593 }
1594
1595 static void decode_blocks(SnowContext *s){
1596 int x, y;
1597 int w= s->b_width;
1598 int h= s->b_height;
1599
1600 for(y=0; y<h; y++){
1601 for(x=0; x<w; x++){
1602 decode_q_branch(s, 0, x, y);
1603 }
1604 }
1605 }
1606
1607 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1608 static const uint8_t weight[64]={
1609 8,7,6,5,4,3,2,1,
1610 7,7,0,0,0,0,0,1,
1611 6,0,6,0,0,0,2,0,
1612 5,0,0,5,0,3,0,0,
1613 4,0,0,0,4,0,0,0,
1614 3,0,0,5,0,3,0,0,
1615 2,0,6,0,0,0,2,0,
1616 1,7,0,0,0,0,0,1,
1617 };
1618
1619 static const uint8_t brane[256]={
1620 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
1621 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
1622 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
1623 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
1624 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
1625 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
1626 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
1627 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
1628 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
1629 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
1630 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
1631 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
1632 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
1633 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
1634 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
1635 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
1636 };
1637
1638 static const uint8_t needs[16]={
1639 0,1,0,0,
1640 2,4,2,0,
1641 0,1,0,0,
1642 15
1643 };
1644
1645 int x, y, b, r, l;
1646 int16_t tmpIt [64*(32+HTAPS_MAX)];
1647 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
1648 int16_t *tmpI= tmpIt;
1649 uint8_t *tmp2= tmp2t[0];
1650 const uint8_t *hpel[11];
1651 assert(dx<16 && dy<16);
1652 r= brane[dx + 16*dy]&15;
1653 l= brane[dx + 16*dy]>>4;
1654
1655 b= needs[l] | needs[r];
1656 if(p && !p->diag_mc)
1657 b= 15;
1658
1659 if(b&5){
1660 for(y=0; y < b_h+HTAPS_MAX-1; y++){
1661 for(x=0; x < b_w; x++){
1662 int a_1=src[x + HTAPS_MAX/2-4];
1663 int a0= src[x + HTAPS_MAX/2-3];
1664 int a1= src[x + HTAPS_MAX/2-2];
1665 int a2= src[x + HTAPS_MAX/2-1];
1666 int a3= src[x + HTAPS_MAX/2+0];
1667 int a4= src[x + HTAPS_MAX/2+1];
1668 int a5= src[x + HTAPS_MAX/2+2];
1669 int a6= src[x + HTAPS_MAX/2+3];
1670 int am=0;
1671 if(!p || p->fast_mc){
1672 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1673 tmpI[x]= am;
1674 am= (am+16)>>5;
1675 }else{
1676 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
1677 tmpI[x]= am;
1678 am= (am+32)>>6;
1679 }
1680
1681 if(am&(~255)) am= ~(am>>31);
1682 tmp2[x]= am;
1683 }
1684 tmpI+= 64;
1685 tmp2+= stride;
1686 src += stride;
1687 }
1688 src -= stride*y;
1689 }
1690 src += HTAPS_MAX/2 - 1;
1691 tmp2= tmp2t[1];
1692
1693 if(b&2){
1694 for(y=0; y < b_h; y++){
1695 for(x=0; x < b_w+1; x++){
1696 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1697 int a0= src[x + (HTAPS_MAX/2-3)*stride];
1698 int a1= src[x + (HTAPS_MAX/2-2)*stride];
1699 int a2= src[x + (HTAPS_MAX/2-1)*stride];
1700 int a3= src[x + (HTAPS_MAX/2+0)*stride];
1701 int a4= src[x + (HTAPS_MAX/2+1)*stride];
1702 int a5= src[x + (HTAPS_MAX/2+2)*stride];
1703 int a6= src[x + (HTAPS_MAX/2+3)*stride];
1704 int am=0;
1705 if(!p || p->fast_mc)
1706 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1707 else
1708 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1709
1710 if(am&(~255)) am= ~(am>>31);
1711 tmp2[x]= am;
1712 }
1713 src += stride;
1714 tmp2+= stride;
1715 }
1716 src -= stride*y;
1717 }
1718 src += stride*(HTAPS_MAX/2 - 1);
1719 tmp2= tmp2t[2];
1720 tmpI= tmpIt;
1721 if(b&4){
1722 for(y=0; y < b_h; y++){
1723 for(x=0; x < b_w; x++){
1724 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1725 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1726 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1727 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1728 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1729 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1730 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1731 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1732 int am=0;
1733 if(!p || p->fast_mc)
1734 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1735 else
1736 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1737 if(am&(~255)) am= ~(am>>31);
1738 tmp2[x]= am;
1739 }
1740 tmpI+= 64;
1741 tmp2+= stride;
1742 }
1743 }
1744
1745 hpel[ 0]= src;
1746 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1747 hpel[ 2]= src + 1;
1748
1749 hpel[ 4]= tmp2t[1];
1750 hpel[ 5]= tmp2t[2];
1751 hpel[ 6]= tmp2t[1] + 1;
1752
1753 hpel[ 8]= src + stride;
1754 hpel[ 9]= hpel[1] + stride;
1755 hpel[10]= hpel[8] + 1;
1756
1757 if(b==15){
1758 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
1759 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1760 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1761 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1762 dx&=7;
1763 dy&=7;
1764 for(y=0; y < b_h; y++){
1765 for(x=0; x < b_w; x++){
1766 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1767 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
1768 }
1769 src1+=stride;
1770 src2+=stride;
1771 src3+=stride;
1772 src4+=stride;
1773 dst +=stride;
1774 }
1775 }else{
1776 const uint8_t *src1= hpel[l];
1777 const uint8_t *src2= hpel[r];
1778 int a= weight[((dx&7) + (8*(dy&7)))];
1779 int b= 8-a;
1780 for(y=0; y < b_h; y++){
1781 for(x=0; x < b_w; x++){
1782 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1783 }
1784 src1+=stride;
1785 src2+=stride;
1786 dst +=stride;
1787 }
1788 }
1789 }
1790
1791 #define mca(dx,dy,b_w)\
1792 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1793 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
1794 assert(h==b_w);\
1795 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
1796 }
1797
1798 mca( 0, 0,16)
1799 mca( 8, 0,16)
1800 mca( 0, 8,16)
1801 mca( 8, 8,16)
1802 mca( 0, 0,8)
1803 mca( 8, 0,8)
1804 mca( 0, 8,8)
1805 mca( 8, 8,8)
1806
1807 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1808 if(block->type & BLOCK_INTRA){
1809 int x, y;
1810 const int color = block->color[plane_index];
1811 const int color4= color*0x01010101;
1812 if(b_w==32){
1813 for(y=0; y < b_h; y++){
1814 *(uint32_t*)&dst[0 + y*stride]= color4;
1815 *(uint32_t*)&dst[4 + y*stride]= color4;
1816 *(uint32_t*)&dst[8 + y*stride]= color4;
1817 *(uint32_t*)&dst[12+ y*stride]= color4;
1818 *(uint32_t*)&dst[16+ y*stride]= color4;
1819 *(uint32_t*)&dst[20+ y*stride]= color4;
1820 *(uint32_t*)&dst[24+ y*stride]= color4;
1821 *(uint32_t*)&dst[28+ y*stride]= color4;
1822 }
1823 }else if(b_w==16){
1824 for(y=0; y < b_h; y++){
1825 *(uint32_t*)&dst[0 + y*stride]= color4;
1826 *(uint32_t*)&dst[4 + y*stride]= color4;
1827 *(uint32_t*)&dst[8 + y*stride]= color4;
1828 *(uint32_t*)&dst[12+ y*stride]= color4;
1829 }
1830 }else if(b_w==8){
1831 for(y=0; y < b_h; y++){
1832 *(uint32_t*)&dst[0 + y*stride]= color4;
1833 *(uint32_t*)&dst[4 + y*stride]= color4;
1834 }
1835 }else if(b_w==4){
1836 for(y=0; y < b_h; y++){
1837 *(uint32_t*)&dst[0 + y*stride]= color4;
1838 }
1839 }else{
1840 for(y=0; y < b_h; y++){
1841 for(x=0; x < b_w; x++){
1842 dst[x + y*stride]= color;
1843 }
1844 }
1845 }
1846 }else{
1847 uint8_t *src= s->last_picture[block->ref].data[plane_index];
1848 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
1849 int mx= block->mx*scale;
1850 int my= block->my*scale;
1851 const int dx= mx&15;
1852 const int dy= my&15;
1853 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1854 sx += (mx>>4) - (HTAPS_MAX/2-1);
1855 sy += (my>>4) - (HTAPS_MAX/2-1);
1856 src += sx + sy*stride;
1857 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1858 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1859 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1860 src= tmp + MB_SIZE;
1861 }
1862 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1863 // assert(!(b_w&(b_w-1)));
1864 assert(b_w>1 && b_h>1);
1865 assert((tab_index>=0 && tab_index<4) || b_w==32);
1866 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1867 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1868 else if(b_w==32){
1869 int y;
1870 for(y=0; y<b_h; y+=16){
1871 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1872 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1873 }
1874 }else if(b_w==b_h)
1875 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1876 else if(b_w==2*b_h){
1877 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
1878 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1879 }else{
1880 assert(2*b_w==b_h);
1881 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
1882 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1883 }
1884 }
1885 }
1886
1887 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1888 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1889 int y, x;
1890 IDWTELEM * dst;
1891 for(y=0; y<b_h; y++){
1892 //FIXME ugly misuse of obmc_stride
1893 const uint8_t *obmc1= obmc + y*obmc_stride;
1894 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1895 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1896 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1897 dst = slice_buffer_get_line(sb, src_y + y);
1898 for(x=0; x<b_w; x++){
1899 int v= obmc1[x] * block[3][x + y*src_stride]
1900 +obmc2[x] * block[2][x + y*src_stride]
1901 +obmc3[x] * block[1][x + y*src_stride]
1902 +obmc4[x] * block[0][x + y*src_stride];
1903
1904 v <<= 8 - LOG2_OBMC_MAX;
1905 if(FRAC_BITS != 8){
1906 v >>= 8 - FRAC_BITS;
1907 }
1908 if(add){
1909 v += dst[x + src_x];
1910 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1911 if(v&(~255)) v= ~(v>>31);
1912 dst8[x + y*src_stride] = v;
1913 }else{
1914 dst[x + src_x] -= v;
1915 }
1916 }
1917 }
1918 }
1919
1920 //FIXME name cleanup (b_w, block_w, b_width stuff)
1921 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1922 const int b_width = s->b_width << s->block_max_depth;