2ff0d43462c967537311e2919bb71176d9837a15
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "avcodec.h"
22 #include "dsputil.h"
23 #include "snow.h"
24
25 #include "rangecoder.h"
26
27 #include "mpegvideo.h"
28
29 #undef NDEBUG
30 #include <assert.h>
31
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49 };
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 };
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 };
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103 };
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121 };
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139 };
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157 };
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
175 };
176
177 #if 0 //64*cubic
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 //error:0.000022
212 };
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
230 //error:0.000033
231 };
232 #elif 1 // 64*linear
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
266 //error:0.000020
267 };
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
285 //error:0.000015
286 };
287 #else //64*cos
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321 //error:0.000022
322 };
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
340 //error:0.000022
341 };
342 #endif
343
344 //linear *64
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
354 //error:0.000000
355 };
356
357 //linear *64
358 static const uint8_t obmc4[16]={
359 16, 48, 48, 16,
360 48,144,144, 48,
361 48,144,144, 48,
362 16, 48, 48, 16,
363 //error:0.000000
364 };
365
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
368 };
369
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371
372 typedef struct BlockNode{
373 int16_t mx;
374 int16_t my;
375 uint8_t ref;
376 uint8_t color[3];
377 uint8_t type;
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
380 #define BLOCK_OPT 2
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
383 }BlockNode;
384
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
387 .mx= 0,
388 .my= 0,
389 .ref= 0,
390 .type= 0,
391 .level= 0,
392 };
393
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4
397 #define HTAPS_MAX 8
398
399 typedef struct x_and_coeff{
400 int16_t x;
401 uint16_t coeff;
402 } x_and_coeff;
403
404 typedef struct SubBand{
405 int level;
406 int stride;
407 int width;
408 int height;
409 int qlog; ///< log(qscale)/log[2^(1/6)]
410 DWTELEM *buf;
411 IDWTELEM *ibuf;
412 int buf_x_offset;
413 int buf_y_offset;
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
418 }SubBand;
419
420 typedef struct Plane{
421 int width;
422 int height;
423 SubBand band[MAX_DECOMPOSITIONS][4];
424
425 int htaps;
426 int8_t hcoeff[HTAPS_MAX/2];
427 int diag_mc;
428 int fast_mc;
429
430 int last_htaps;
431 int8_t last_hcoeff[HTAPS_MAX/2];
432 int last_diag_mc;
433 }Plane;
434
435 typedef struct SnowContext{
436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
437
438 AVCodecContext *avctx;
439 RangeCoder c;
440 DSPContext dsp;
441 AVFrame new_picture;
442 AVFrame input_picture; ///< new_picture with the internal linesizes
443 AVFrame current_picture;
444 AVFrame last_picture[MAX_REF_FRAMES];
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446 AVFrame mconly_picture;
447 // uint8_t q_context[16];
448 uint8_t header_state[32];
449 uint8_t block_state[128 + 32*128];
450 int keyframe;
451 int always_reset;
452 int version;
453 int spatial_decomposition_type;
454 int last_spatial_decomposition_type;
455 int temporal_decomposition_type;
456 int spatial_decomposition_count;
457 int last_spatial_decomposition_count;
458 int temporal_decomposition_count;
459 int max_ref_frames;
460 int ref_frames;
461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462 uint32_t *ref_scores[MAX_REF_FRAMES];
463 DWTELEM *spatial_dwt_buffer;
464 IDWTELEM *spatial_idwt_buffer;
465 int colorspace_type;
466 int chroma_h_shift;
467 int chroma_v_shift;
468 int spatial_scalability;
469 int qlog;
470 int last_qlog;
471 int lambda;
472 int lambda2;
473 int pass1_rc;
474 int mv_scale;
475 int last_mv_scale;
476 int qbias;
477 int last_qbias;
478 #define QBIAS_SHIFT 3
479 int b_width;
480 int b_height;
481 int block_max_depth;
482 int last_block_max_depth;
483 Plane plane[MAX_PLANES];
484 BlockNode *block;
485 #define ME_CACHE_SIZE 1024
486 int me_cache[ME_CACHE_SIZE];
487 int me_cache_generation;
488 slice_buffer sb;
489
490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
491 }SnowContext;
492
493 typedef struct {
494 IDWTELEM *b0;
495 IDWTELEM *b1;
496 IDWTELEM *b2;
497 IDWTELEM *b3;
498 int y;
499 } dwt_compose_t;
500
501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
502 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
503
504 static void iterative_me(SnowContext *s);
505
506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
507 {
508 int i;
509
510 buf->base_buffer = base_buffer;
511 buf->line_count = line_count;
512 buf->line_width = line_width;
513 buf->data_count = max_allocated_lines;
514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
516
517 for (i = 0; i < max_allocated_lines; i++)
518 {
519 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
520 }
521
522 buf->data_stack_top = max_allocated_lines - 1;
523 }
524
525 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
526 {
527 int offset;
528 IDWTELEM * buffer;
529
530 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
531
532 assert(buf->data_stack_top >= 0);
533 // assert(!buf->line[line]);
534 if (buf->line[line])
535 return buf->line[line];
536
537 offset = buf->line_width * line;
538 buffer = buf->data_stack[buf->data_stack_top];
539 buf->data_stack_top--;
540 buf->line[line] = buffer;
541
542 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
543
544 return buffer;
545 }
546
547 static void slice_buffer_release(slice_buffer * buf, int line)
548 {
549 int offset;
550 IDWTELEM * buffer;
551
552 assert(line >= 0 && line < buf->line_count);
553 assert(buf->line[line]);
554
555 offset = buf->line_width * line;
556 buffer = buf->line[line];
557 buf->data_stack_top++;
558 buf->data_stack[buf->data_stack_top] = buffer;
559 buf->line[line] = NULL;
560
561 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
562 }
563
564 static void slice_buffer_flush(slice_buffer * buf)
565 {
566 int i;
567 for (i = 0; i < buf->line_count; i++)
568 {
569 if (buf->line[i])
570 {
571 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
572 slice_buffer_release(buf, i);
573 }
574 }
575 }
576
577 static void slice_buffer_destroy(slice_buffer * buf)
578 {
579 int i;
580 slice_buffer_flush(buf);
581
582 for (i = buf->data_count - 1; i >= 0; i--)
583 {
584 av_freep(&buf->data_stack[i]);
585 }
586 av_freep(&buf->data_stack);
587 av_freep(&buf->line);
588 }
589
590 #ifdef __sgi
591 // Avoid a name clash on SGI IRIX
592 #undef qexp
593 #endif
594 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
595 static uint8_t qexp[QROOT];
596
597 static inline int mirror(int v, int m){
598 while((unsigned)v > (unsigned)m){
599 v=-v;
600 if(v<0) v+= 2*m;
601 }
602 return v;
603 }
604
605 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
606 int i;
607
608 if(v){
609 const int a= FFABS(v);
610 const int e= av_log2(a);
611 #if 1
612 const int el= FFMIN(e, 10);
613 put_rac(c, state+0, 0);
614
615 for(i=0; i<el; i++){
616 put_rac(c, state+1+i, 1); //1..10
617 }
618 for(; i<e; i++){
619 put_rac(c, state+1+9, 1); //1..10
620 }
621 put_rac(c, state+1+FFMIN(i,9), 0);
622
623 for(i=e-1; i>=el; i--){
624 put_rac(c, state+22+9, (a>>i)&1); //22..31
625 }
626 for(; i>=0; i--){
627 put_rac(c, state+22+i, (a>>i)&1); //22..31
628 }
629
630 if(is_signed)
631 put_rac(c, state+11 + el, v < 0); //11..21
632 #else
633
634 put_rac(c, state+0, 0);
635 if(e<=9){
636 for(i=0; i<e; i++){
637 put_rac(c, state+1+i, 1); //1..10
638 }
639 put_rac(c, state+1+i, 0);
640
641 for(i=e-1; i>=0; i--){
642 put_rac(c, state+22+i, (a>>i)&1); //22..31
643 }
644
645 if(is_signed)
646 put_rac(c, state+11 + e, v < 0); //11..21
647 }else{
648 for(i=0; i<e; i++){
649 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
650 }
651 put_rac(c, state+1+FFMIN(i,9), 0);
652
653 for(i=e-1; i>=0; i--){
654 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
655 }
656
657 if(is_signed)
658 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
659 }
660 #endif
661 }else{
662 put_rac(c, state+0, 1);
663 }
664 }
665
666 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
667 if(get_rac(c, state+0))
668 return 0;
669 else{
670 int i, e, a;
671 e= 0;
672 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
673 e++;
674 }
675
676 a= 1;
677 for(i=e-1; i>=0; i--){
678 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
679 }
680
681 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
682 return -a;
683 else
684 return a;
685 }
686 }
687
688 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
689 int i;
690 int r= log2>=0 ? 1<<log2 : 1;
691
692 assert(v>=0);
693 assert(log2>=-4);
694
695 while(v >= r){
696 put_rac(c, state+4+log2, 1);
697 v -= r;
698 log2++;
699 if(log2>0) r+=r;
700 }
701 put_rac(c, state+4+log2, 0);
702
703 for(i=log2-1; i>=0; i--){
704 put_rac(c, state+31-i, (v>>i)&1);
705 }
706 }
707
708 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
709 int i;
710 int r= log2>=0 ? 1<<log2 : 1;
711 int v=0;
712
713 assert(log2>=-4);
714
715 while(get_rac(c, state+4+log2)){
716 v+= r;
717 log2++;
718 if(log2>0) r+=r;
719 }
720
721 for(i=log2-1; i>=0; i--){
722 v+= get_rac(c, state+31-i)<<i;
723 }
724
725 return v;
726 }
727
728 static av_always_inline void
729 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
730 int dst_step, int src_step, int ref_step,
731 int width, int mul, int add, int shift,
732 int highpass, int inverse){
733 const int mirror_left= !highpass;
734 const int mirror_right= (width&1) ^ highpass;
735 const int w= (width>>1) - 1 + (highpass & width);
736 int i;
737
738 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
739 if(mirror_left){
740 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
741 dst += dst_step;
742 src += src_step;
743 }
744
745 for(i=0; i<w; i++){
746 dst[i*dst_step] =
747 LIFT(src[i*src_step],
748 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
749 inverse);
750 }
751
752 if(mirror_right){
753 dst[w*dst_step] =
754 LIFT(src[w*src_step],
755 ((mul*2*ref[w*ref_step]+add)>>shift),
756 inverse);
757 }
758 }
759
760 static av_always_inline void
761 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
762 int dst_step, int src_step, int ref_step,
763 int width, int mul, int add, int shift,
764 int highpass, int inverse){
765 const int mirror_left= !highpass;
766 const int mirror_right= (width&1) ^ highpass;
767 const int w= (width>>1) - 1 + (highpass & width);
768 int i;
769
770 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
771 if(mirror_left){
772 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
773 dst += dst_step;
774 src += src_step;
775 }
776
777 for(i=0; i<w; i++){
778 dst[i*dst_step] =
779 LIFT(src[i*src_step],
780 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
781 inverse);
782 }
783
784 if(mirror_right){
785 dst[w*dst_step] =
786 LIFT(src[w*src_step],
787 ((mul*2*ref[w*ref_step]+add)>>shift),
788 inverse);
789 }
790 }
791
792 #ifndef liftS
793 static av_always_inline void
794 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
795 int dst_step, int src_step, int ref_step,
796 int width, int mul, int add, int shift,
797 int highpass, int inverse){
798 const int mirror_left= !highpass;
799 const int mirror_right= (width&1) ^ highpass;
800 const int w= (width>>1) - 1 + (highpass & width);
801 int i;
802
803 assert(shift == 4);
804 #define LIFTS(src, ref, inv) \
805 ((inv) ? \
806 (src) + (((ref) + 4*(src))>>shift): \
807 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
808 if(mirror_left){
809 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
810 dst += dst_step;
811 src += src_step;
812 }
813
814 for(i=0; i<w; i++){
815 dst[i*dst_step] =
816 LIFTS(src[i*src_step],
817 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
818 inverse);
819 }
820
821 if(mirror_right){
822 dst[w*dst_step] =
823 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
824 }
825 }
826 static av_always_inline void
827 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
828 int dst_step, int src_step, int ref_step,
829 int width, int mul, int add, int shift,
830 int highpass, int inverse){
831 const int mirror_left= !highpass;
832 const int mirror_right= (width&1) ^ highpass;
833 const int w= (width>>1) - 1 + (highpass & width);
834 int i;
835
836 assert(shift == 4);
837 #define LIFTS(src, ref, inv) \
838 ((inv) ? \
839 (src) + (((ref) + 4*(src))>>shift): \
840 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
841 if(mirror_left){
842 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
843 dst += dst_step;
844 src += src_step;
845 }
846
847 for(i=0; i<w; i++){
848 dst[i*dst_step] =
849 LIFTS(src[i*src_step],
850 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
851 inverse);
852 }
853
854 if(mirror_right){
855 dst[w*dst_step] =
856 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
857 }
858 }
859 #endif
860
861 static void horizontal_decompose53i(DWTELEM *b, int width){
862 DWTELEM temp[width];
863 const int width2= width>>1;
864 int x;
865 const int w2= (width+1)>>1;
866
867 for(x=0; x<width2; x++){
868 temp[x ]= b[2*x ];
869 temp[x+w2]= b[2*x + 1];
870 }
871 if(width&1)
872 temp[x ]= b[2*x ];
873 #if 0
874 {
875 int A1,A2,A3,A4;
876 A2= temp[1 ];
877 A4= temp[0 ];
878 A1= temp[0+width2];
879 A1 -= (A2 + A4)>>1;
880 A4 += (A1 + 1)>>1;
881 b[0+width2] = A1;
882 b[0 ] = A4;
883 for(x=1; x+1<width2; x+=2){
884 A3= temp[x+width2];
885 A4= temp[x+1 ];
886 A3 -= (A2 + A4)>>1;
887 A2 += (A1 + A3 + 2)>>2;
888 b[x+width2] = A3;
889 b[x ] = A2;
890
891 A1= temp[x+1+width2];
892 A2= temp[x+2 ];
893 A1 -= (A2 + A4)>>1;
894 A4 += (A1 + A3 + 2)>>2;
895 b[x+1+width2] = A1;
896 b[x+1 ] = A4;
897 }
898 A3= temp[width-1];
899 A3 -= A2;
900 A2 += (A1 + A3 + 2)>>2;
901 b[width -1] = A3;
902 b[width2-1] = A2;
903 }
904 #else
905 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
906 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
907 #endif
908 }
909
910 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
911 int i;
912
913 for(i=0; i<width; i++){
914 b1[i] -= (b0[i] + b2[i])>>1;
915 }
916 }
917
918 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
919 int i;
920
921 for(i=0; i<width; i++){
922 b1[i] += (b0[i] + b2[i] + 2)>>2;
923 }
924 }
925
926 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
927 int y;
928 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
929 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
930
931 for(y=-2; y<height; y+=2){
932 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
933 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
934
935 {START_TIMER
936 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
937 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
938 STOP_TIMER("horizontal_decompose53i")}
939
940 {START_TIMER
941 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
942 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
943 STOP_TIMER("vertical_decompose53i*")}
944
945 b0=b2;
946 b1=b3;
947 }
948 }
949
950 static void horizontal_decompose97i(DWTELEM *b, int width){
951 DWTELEM temp[width];
952 const int w2= (width+1)>>1;
953
954 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
955 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
956 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
957 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
958 }
959
960
961 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
962 int i;
963
964 for(i=0; i<width; i++){
965 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
966 }
967 }
968
969 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
970 int i;
971
972 for(i=0; i<width; i++){
973 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
974 }
975 }
976
977 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
978 int i;
979
980 for(i=0; i<width; i++){
981 #ifdef liftS
982 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
983 #else
984 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
985 #endif
986 }
987 }
988
989 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
990 int i;
991
992 for(i=0; i<width; i++){
993 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
994 }
995 }
996
997 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
998 int y;
999 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1000 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1001 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1002 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1003
1004 for(y=-4; y<height; y+=2){
1005 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1006 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1007
1008 {START_TIMER
1009 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1010 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1011 if(width>400){
1012 STOP_TIMER("horizontal_decompose97i")
1013 }}
1014
1015 {START_TIMER
1016 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1017 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1018 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1019 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1020
1021 if(width>400){
1022 STOP_TIMER("vertical_decompose97i")
1023 }}
1024
1025 b0=b2;
1026 b1=b3;
1027 b2=b4;
1028 b3=b5;
1029 }
1030 }
1031
1032 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1033 int level;
1034
1035 for(level=0; level<decomposition_count; level++){
1036 switch(type){
1037 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1038 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1039 }
1040 }
1041 }
1042
1043 static void horizontal_compose53i(IDWTELEM *b, int width){
1044 IDWTELEM temp[width];
1045 const int width2= width>>1;
1046 const int w2= (width+1)>>1;
1047 int x;
1048
1049 #if 0
1050 int A1,A2,A3,A4;
1051 A2= temp[1 ];
1052 A4= temp[0 ];
1053 A1= temp[0+width2];
1054 A1 -= (A2 + A4)>>1;
1055 A4 += (A1 + 1)>>1;
1056 b[0+width2] = A1;
1057 b[0 ] = A4;
1058 for(x=1; x+1<width2; x+=2){
1059 A3= temp[x+width2];
1060 A4= temp[x+1 ];
1061 A3 -= (A2 + A4)>>1;
1062 A2 += (A1 + A3 + 2)>>2;
1063 b[x+width2] = A3;
1064 b[x ] = A2;
1065
1066 A1= temp[x+1+width2];
1067 A2= temp[x+2 ];
1068 A1 -= (A2 + A4)>>1;
1069 A4 += (A1 + A3 + 2)>>2;
1070 b[x+1+width2] = A1;
1071 b[x+1 ] = A4;
1072 }
1073 A3= temp[width-1];
1074 A3 -= A2;
1075 A2 += (A1 + A3 + 2)>>2;
1076 b[width -1] = A3;
1077 b[width2-1] = A2;
1078 #else
1079 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1080 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1081 #endif
1082 for(x=0; x<width2; x++){
1083 b[2*x ]= temp[x ];
1084 b[2*x + 1]= temp[x+w2];
1085 }
1086 if(width&1)
1087 b[2*x ]= temp[x ];
1088 }
1089
1090 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1091 int i;
1092
1093 for(i=0; i<width; i++){
1094 b1[i] += (b0[i] + b2[i])>>1;
1095 }
1096 }
1097
1098 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1099 int i;
1100
1101 for(i=0; i<width; i++){
1102 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1103 }
1104 }
1105
1106 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1107 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1108 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1109 cs->y = -1;
1110 }
1111
1112 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1113 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1114 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1115 cs->y = -1;
1116 }
1117
1118 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1119 int y= cs->y;
1120
1121 IDWTELEM *b0= cs->b0;
1122 IDWTELEM *b1= cs->b1;
1123 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1124 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1125
1126 {START_TIMER
1127 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1128 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1129 STOP_TIMER("vertical_compose53i*")}
1130
1131 {START_TIMER
1132 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1133 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1134 STOP_TIMER("horizontal_compose53i")}
1135
1136 cs->b0 = b2;
1137 cs->b1 = b3;
1138 cs->y += 2;
1139 }
1140
1141 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1142 int y= cs->y;
1143 IDWTELEM *b0= cs->b0;
1144 IDWTELEM *b1= cs->b1;
1145 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1146 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1147
1148 {START_TIMER
1149 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1150 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1151 STOP_TIMER("vertical_compose53i*")}
1152
1153 {START_TIMER
1154 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1155 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1156 STOP_TIMER("horizontal_compose53i")}
1157
1158 cs->b0 = b2;
1159 cs->b1 = b3;
1160 cs->y += 2;
1161 }
1162
1163 static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1164 dwt_compose_t cs;
1165 spatial_compose53i_init(&cs, buffer, height, stride);
1166 while(cs.y <= height)
1167 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1168 }
1169
1170
1171 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1172 IDWTELEM temp[width];
1173 const int w2= (width+1)>>1;
1174
1175 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1176 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1177 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1178 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1179 }
1180
1181 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1182 int i;
1183
1184 for(i=0; i<width; i++){
1185 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1186 }
1187 }
1188
1189 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1190 int i;
1191
1192 for(i=0; i<width; i++){
1193 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1194 }
1195 }
1196
1197 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1198 int i;
1199
1200 for(i=0; i<width; i++){
1201 #ifdef liftS
1202 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1203 #else
1204 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1205 #endif
1206 }
1207 }
1208
1209 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1210 int i;
1211
1212 for(i=0; i<width; i++){
1213 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1214 }
1215 }
1216
1217 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1218 int i;
1219
1220 for(i=0; i<width; i++){
1221 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1222 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1223 #ifdef liftS
1224 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1225 #else
1226 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1227 #endif
1228 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1229 }
1230 }
1231
1232 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1233 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1234 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1235 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1236 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1237 cs->y = -3;
1238 }
1239
1240 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1241 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1242 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1243 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1244 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1245 cs->y = -3;
1246 }
1247
1248 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1249 int y = cs->y;
1250
1251 IDWTELEM *b0= cs->b0;
1252 IDWTELEM *b1= cs->b1;
1253 IDWTELEM *b2= cs->b2;
1254 IDWTELEM *b3= cs->b3;
1255 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1256 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1257
1258 {START_TIMER
1259 if(y>0 && y+4<height){
1260 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1261 }else{
1262 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1263 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1264 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1265 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1266 }
1267 if(width>400){
1268 STOP_TIMER("vertical_compose97i")}}
1269
1270 {START_TIMER
1271 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1272 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1273 if(width>400 && y+0<(unsigned)height){
1274 STOP_TIMER("horizontal_compose97i")}}
1275
1276 cs->b0=b2;
1277 cs->b1=b3;
1278 cs->b2=b4;
1279 cs->b3=b5;
1280 cs->y += 2;
1281 }
1282
1283 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1284 int y = cs->y;
1285 IDWTELEM *b0= cs->b0;
1286 IDWTELEM *b1= cs->b1;
1287 IDWTELEM *b2= cs->b2;
1288 IDWTELEM *b3= cs->b3;
1289 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1290 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1291
1292 {START_TIMER
1293 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1294 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1295 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1296 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1297 if(width>400){
1298 STOP_TIMER("vertical_compose97i")}}
1299
1300 {START_TIMER
1301 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1302 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1303 if(width>400 && b0 <= b2){
1304 STOP_TIMER("horizontal_compose97i")}}
1305
1306 cs->b0=b2;
1307 cs->b1=b3;
1308 cs->b2=b4;
1309 cs->b3=b5;
1310 cs->y += 2;
1311 }
1312
1313 static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1314 dwt_compose_t cs;
1315 spatial_compose97i_init(&cs, buffer, height, stride);
1316 while(cs.y <= height)
1317 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1318 }
1319
1320 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1321 int level;
1322 for(level=decomposition_count-1; level>=0; level--){
1323 switch(type){
1324 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1325 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1326 }
1327 }
1328 }
1329
1330 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1331 int level;
1332 for(level=decomposition_count-1; level>=0; level--){
1333 switch(type){
1334 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1335 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1336 }
1337 }
1338 }
1339
1340 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1341 const int support = type==1 ? 3 : 5;
1342 int level;
1343 if(type==2) return;
1344
1345 for(level=decomposition_count-1; level>=0; level--){
1346 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1347 switch(type){
1348 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1349 break;
1350 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1351 break;
1352 }
1353 }
1354 }
1355 }
1356
1357 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1358 const int support = type==1 ? 3 : 5;
1359 int level;
1360 if(type==2) return;
1361
1362 for(level=decomposition_count-1; level>=0; level--){
1363 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1364 switch(type){
1365 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1366 break;
1367 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1368 break;
1369 }
1370 }
1371 }
1372 }
1373
1374 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1375 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1376 int y;
1377 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1378 for(y=0; y<height; y+=4)
1379 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1380 }
1381
1382 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1383 const int w= b->width;
1384 const int h= b->height;
1385 int x, y;
1386
1387 if(1){
1388 int run=0;
1389 int runs[w*h];
1390 int run_index=0;
1391 int max_index;
1392
1393 for(y=0; y<h; y++){
1394 for(x=0; x<w; x++){
1395 int v, p=0;
1396 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1397 v= src[x + y*stride];
1398
1399 if(y){
1400 t= src[x + (y-1)*stride];
1401 if(x){
1402 lt= src[x - 1 + (y-1)*stride];
1403 }
1404 if(x + 1 < w){
1405 rt= src[x + 1 + (y-1)*stride];
1406 }
1407 }
1408 if(x){
1409 l= src[x - 1 + y*stride];
1410 /*if(x > 1){
1411 if(orientation==1) ll= src[y + (x-2)*stride];
1412 else ll= src[x - 2 + y*stride];
1413 }*/
1414 }
1415 if(parent){
1416 int px= x>>1;
1417 int py= y>>1;
1418 if(px<b->parent->width && py<b->parent->height)
1419 p= parent[px + py*2*stride];
1420 }
1421 if(!(/*ll|*/l|lt|t|rt|p)){
1422 if(v){
1423 runs[run_index++]= run;
1424 run=0;
1425 }else{
1426 run++;
1427 }
1428 }
1429 }
1430 }
1431 max_index= run_index;
1432 runs[run_index++]= run;
1433 run_index=0;
1434 run= runs[run_index++];
1435
1436 put_symbol2(&s->c, b->state[30], max_index, 0);
1437 if(run_index <= max_index)
1438 put_symbol2(&s->c, b->state[1], run, 3);
1439
1440 for(y=0; y<h; y++){
1441 if(s->c.bytestream_end - s->c.bytestream < w*40){
1442 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1443 return -1;
1444 }
1445 for(x=0; x<w; x++){
1446 int v, p=0;
1447 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1448 v= src[x + y*stride];
1449
1450 if(y){
1451 t= src[x + (y-1)*stride];
1452 if(x){
1453 lt= src[x - 1 + (y-1)*stride];
1454 }
1455 if(x + 1 < w){
1456 rt= src[x + 1 + (y-1)*stride];
1457 }
1458 }
1459 if(x){
1460 l= src[x - 1 + y*stride];
1461 /*if(x > 1){
1462 if(orientation==1) ll= src[y + (x-2)*stride];
1463 else ll= src[x - 2 + y*stride];
1464 }*/
1465 }
1466 if(parent){
1467 int px= x>>1;
1468 int py= y>>1;
1469 if(px<b->parent->width && py<b->parent->height)
1470 p= parent[px + py*2*stride];
1471 }
1472 if(/*ll|*/l|lt|t|rt|p){
1473 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1474
1475 put_rac(&s->c, &b->state[0][context], !!v);
1476 }else{
1477 if(!run){
1478 run= runs[run_index++];
1479
1480 if(run_index <= max_index)
1481 put_symbol2(&s->c, b->state[1], run, 3);
1482 assert(v);
1483 }else{
1484 run--;
1485 assert(!v);
1486 }
1487 }
1488 if(v){
1489 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1490 int l2= 2*FFABS(l) + (l<0);
1491 int t2= 2*FFABS(t) + (t<0);
1492
1493 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1494 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1495 }
1496 }
1497 }
1498 }
1499 return 0;
1500 }
1501
1502 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1503 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1504 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1505 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1506 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1507 }
1508
1509 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1510 const int w= b->width;
1511 const int h= b->height;
1512 int x,y;
1513
1514 if(1){
1515 int run, runs;
1516 x_and_coeff *xc= b->x_coeff;
1517 x_and_coeff *prev_xc= NULL;
1518 x_and_coeff *prev2_xc= xc;
1519 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1520 x_and_coeff *prev_parent_xc= parent_xc;
1521
1522 runs= get_symbol2(&s->c, b->state[30], 0);
1523 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1524 else run= INT_MAX;
1525
1526 for(y=0; y<h; y++){
1527 int v=0;
1528 int lt=0, t=0, rt=0;
1529
1530 if(y && prev_xc->x == 0){
1531 rt= prev_xc->coeff;
1532 }
1533 for(x=0; x<w; x++){
1534 int p=0;
1535 const int l= v;
1536
1537 lt= t; t= rt;
1538
1539 if(y){
1540 if(prev_xc->x <= x)
1541 prev_xc++;
1542 if(prev_xc->x == x + 1)
1543 rt= prev_xc->coeff;
1544 else
1545 rt=0;
1546 }
1547 if(parent_xc){
1548 if(x>>1 > parent_xc->x){
1549 parent_xc++;
1550 }
1551 if(x>>1 == parent_xc->x){
1552 p= parent_xc->coeff;
1553 }
1554 }
1555 if(/*ll|*/l|lt|t|rt|p){
1556 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1557
1558 v=get_rac(&s->c, &b->state[0][context]);
1559 if(v){
1560 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1561 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1562
1563 xc->x=x;
1564 (xc++)->coeff= v;
1565 }
1566 }else{
1567 if(!run){
1568 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1569 else run= INT_MAX;
1570 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1571 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1572
1573 xc->x=x;
1574 (xc++)->coeff= v;
1575 }else{
1576 int max_run;
1577 run--;
1578 v=0;
1579
1580 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1581 else max_run= FFMIN(run, w-x-1);
1582 if(parent_xc)
1583 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1584 x+= max_run;
1585 run-= max_run;
1586 }
1587 }
1588 }
1589 (xc++)->x= w+1; //end marker
1590 prev_xc= prev2_xc;
1591 prev2_xc= xc;
1592
1593 if(parent_xc){
1594 if(y&1){
1595 while(parent_xc->x != parent->width+1)
1596 parent_xc++;
1597 parent_xc++;
1598 prev_parent_xc= parent_xc;
1599 }else{
1600 parent_xc= prev_parent_xc;
1601 }
1602 }
1603 }
1604
1605 (xc++)->x= w+1; //end marker
1606 }
1607 }
1608
1609 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1610 const int w= b->width;
1611 int y;
1612 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1613 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1614 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1615 int new_index = 0;
1616
1617 START_TIMER
1618
1619 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1620 qadd= 0;
1621 qmul= 1<<QEXPSHIFT;
1622 }
1623
1624 /* If we are on the second or later slice, restore our index. */
1625 if (start_y != 0)
1626 new_index = save_state[0];
1627
1628
1629 for(y=start_y; y<h; y++){
1630 int x = 0;
1631 int v;
1632 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1633 memset(line, 0, b->width*sizeof(IDWTELEM));
1634 v = b->x_coeff[new_index].coeff;
1635 x = b->x_coeff[new_index++].x;
1636 while(x < w)
1637 {
1638 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1639 register int u= -(v&1);
1640 line[x] = (t^u) - u;
1641
1642 v = b->x_coeff[new_index].coeff;
1643 x = b->x_coeff[new_index++].x;
1644 }
1645 }
1646 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1647 STOP_TIMER("decode_subband")
1648 }
1649
1650 /* Save our variables for the next slice. */
1651 save_state[0] = new_index;
1652
1653 return;
1654 }
1655
1656 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1657 int plane_index, level, orientation;
1658
1659 for(plane_index=0; plane_index<3; plane_index++){
1660 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1661 for(orientation=level ? 1:0; orientation<4; orientation++){
1662 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1663 }
1664 }
1665 }
1666 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1667 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1668 }
1669
1670 static int alloc_blocks(SnowContext *s){
1671 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1672 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1673
1674 s->b_width = w;
1675 s->b_height= h;
1676
1677 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1678 return 0;
1679 }
1680
1681 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1682 uint8_t *bytestream= d->bytestream;
1683 uint8_t *bytestream_start= d->bytestream_start;
1684 *d= *s;
1685 d->bytestream= bytestream;
1686 d->bytestream_start= bytestream_start;
1687 }
1688
1689 //near copy & paste from dsputil, FIXME
1690 static int pix_sum(uint8_t * pix, int line_size, int w)
1691 {
1692 int s, i, j;
1693
1694 s = 0;
1695 for (i = 0; i < w; i++) {
1696 for (j = 0; j < w; j++) {
1697 s += pix[0];
1698 pix ++;
1699 }
1700 pix += line_size - w;
1701 }
1702 return s;
1703 }
1704
1705 //near copy & paste from dsputil, FIXME
1706 static int pix_norm1(uint8_t * pix, int line_size, int w)
1707 {
1708 int s, i, j;
1709 uint32_t *sq = ff_squareTbl + 256;
1710
1711 s = 0;
1712 for (i = 0; i < w; i++) {
1713 for (j = 0; j < w; j ++) {
1714 s += sq[pix[0]];
1715 pix ++;
1716 }
1717 pix += line_size - w;
1718 }
1719 return s;
1720 }
1721
1722 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1723 const int w= s->b_width << s->block_max_depth;
1724 const int rem_depth= s->block_max_depth - level;
1725 const int index= (x + y*w) << rem_depth;
1726 const int block_w= 1<<rem_depth;
1727 BlockNode block;
1728 int i,j;
1729
1730 block.color[0]= l;
1731 block.color[1]= cb;
1732 block.color[2]= cr;
1733 block.mx= mx;
1734 block.my= my;
1735 block.ref= ref;
1736 block.type= type;
1737 block.level= level;
1738
1739 for(j=0; j<block_w; j++){
1740 for(i=0; i<block_w; i++){
1741 s->block[index + i + j*w]= block;
1742 }
1743 }
1744 }
1745
1746 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1747 const int offset[3]= {
1748 y*c-> stride + x,
1749 ((y*c->uvstride + x)>>1),
1750 ((y*c->uvstride + x)>>1),
1751 };
1752 int i;
1753 for(i=0; i<3; i++){
1754 c->src[0][i]= src [i];
1755 c->ref[0][i]= ref [i] + offset[i];
1756 }
1757 assert(!ref_index);
1758 }
1759
1760 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1761 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1762 if(s->ref_frames == 1){
1763 *mx = mid_pred(left->mx, top->mx, tr->mx);
1764 *my = mid_pred(left->my, top->my, tr->my);
1765 }else{
1766 const int *scale = scale_mv_ref[ref];
1767 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1768 (top ->mx * scale[top ->ref] + 128) >>8,
1769 (tr ->mx * scale[tr ->ref] + 128) >>8);
1770 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1771 (top ->my * scale[top ->ref] + 128) >>8,
1772 (tr ->my * scale[tr ->ref] + 128) >>8);
1773 }
1774 }
1775
1776 //FIXME copy&paste
1777 #define P_LEFT P[1]
1778 #define P_TOP P[2]
1779 #define P_TOPRIGHT P[3]
1780 #define P_MEDIAN P[4]
1781 #define P_MV1 P[9]
1782 #define FLAG_QPEL 1 //must be 1
1783
1784 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1785 uint8_t p_buffer[1024];
1786 uint8_t i_buffer[1024];
1787 uint8_t p_state[sizeof(s->block_state)];
1788 uint8_t i_state[sizeof(s->block_state)];
1789 RangeCoder pc, ic;
1790 uint8_t *pbbak= s->c.bytestream;
1791 uint8_t *pbbak_start= s->c.bytestream_start;
1792 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1793 const int w= s->b_width << s->block_max_depth;
1794 const int h= s->b_height << s->block_max_depth;
1795 const int rem_depth= s->block_max_depth - level;
1796 const int index= (x + y*w) << rem_depth;
1797 const int block_w= 1<<(LOG2_MB_SIZE - level);
1798 int trx= (x+1)<<rem_depth;
1799 int try= (y+1)<<rem_depth;
1800 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1801 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1802 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1803 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1804 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1805 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1806 int pl = left->color[0];
1807 int pcb= left->color[1];
1808 int pcr= left->color[2];
1809 int pmx, pmy;
1810 int mx=0, my=0;
1811 int l,cr,cb;
1812 const int stride= s->current_picture.linesize[0];
1813 const int uvstride= s->current_picture.linesize[1];
1814 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1815 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1816 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1817 int P[10][2];
1818 int16_t last_mv[3][2];
1819 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1820 const int shift= 1+qpel;
1821 MotionEstContext *c= &s->m.me;
1822 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1823 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1824 int my_context= av_log2(2*FFABS(left->my - top->my));
1825 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1826 int ref, best_ref, ref_score, ref_mx, ref_my;
1827
1828 assert(sizeof(s->block_state) >= 256);
1829 if(s->keyframe){
1830 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1831 return 0;
1832 }
1833
1834 // clip predictors / edge ?
1835
1836 P_LEFT[0]= left->mx;
1837 P_LEFT[1]= left->my;
1838 P_TOP [0]= top->mx;
1839 P_TOP [1]= top->my;
1840 P_TOPRIGHT[0]= tr->mx;
1841 P_TOPRIGHT[1]= tr->my;
1842
1843 last_mv[0][0]= s->block[index].mx;
1844 last_mv[0][1]= s->block[index].my;
1845 last_mv[1][0]= right->mx;
1846 last_mv[1][1]= right->my;
1847 last_mv[2][0]= bottom->mx;
1848 last_mv[2][1]= bottom->my;
1849
1850 s->m.mb_stride=2;
1851 s->m.mb_x=
1852 s->m.mb_y= 0;
1853 c->skip= 0;
1854
1855 assert(c-> stride == stride);
1856 assert(c->uvstride == uvstride);
1857
1858 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1859 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1860 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1861 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1862
1863 c->xmin = - x*block_w - 16+2;
1864 c->ymin = - y*block_w - 16+2;
1865 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1866 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1867
1868 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1869 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1870 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1871 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1872 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1873 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1874 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1875
1876 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1877 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1878
1879 if (!y) {
1880 c->pred_x= P_LEFT[0];
1881 c->pred_y= P_LEFT[1];
1882 } else {
1883 c->pred_x = P_MEDIAN[0];
1884 c->pred_y = P_MEDIAN[1];
1885 }
1886
1887 score= INT_MAX;
1888 best_ref= 0;
1889 for(ref=0; ref<s->ref_frames; ref++){
1890 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1891
1892 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1893 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1894
1895 assert(ref_mx >= c->xmin);
1896 assert(ref_mx <= c->xmax);
1897 assert(ref_my >= c->ymin);
1898 assert(ref_my <= c->ymax);
1899
1900 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1901 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1902 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1903 if(s->ref_mvs[ref]){
1904 s->ref_mvs[ref][index][0]= ref_mx;
1905 s->ref_mvs[ref][index][1]= ref_my;
1906 s->ref_scores[ref][index]= ref_score;
1907 }
1908 if(score > ref_score){
1909 score= ref_score;
1910 best_ref= ref;
1911 mx= ref_mx;
1912 my= ref_my;
1913 }
1914 }
1915 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1916
1917 // subpel search
1918 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1919 pc= s->c;
1920 pc.bytestream_start=
1921 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1922 memcpy(p_state, s->block_state, sizeof(s->block_state));
1923
1924 if(level!=s->block_max_depth)
1925 put_rac(&pc, &p_state[4 + s_context], 1);
1926 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1927 if(s->ref_frames > 1)
1928 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1929 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1930 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1931 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1932 p_len= pc.bytestream - pc.bytestream_start;
1933 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1934
1935 block_s= block_w*block_w;
1936 sum = pix_sum(current_data[0], stride, block_w);
1937 l= (sum + block_s/2)/block_s;
1938 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1939
1940 block_s= block_w*block_w>>2;
1941 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1942 cb= (sum + block_s/2)/block_s;
1943 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1944 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1945 cr= (sum + block_s/2)/block_s;
1946 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1947
1948 ic= s->c;
1949 ic.bytestream_start=
1950 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1951 memcpy(i_state, s->block_state, sizeof(s->block_state));
1952 if(level!=s->block_max_depth)
1953 put_rac(&ic, &i_state[4 + s_context], 1);
1954 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1955 put_symbol(&ic, &i_state[32], l-pl , 1);
1956 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1957 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1958 i_len= ic.bytestream - ic.bytestream_start;
1959 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1960
1961 // assert(score==256*256*256*64-1);
1962 assert(iscore < 255*255*256 + s->lambda2*10);
1963 assert(iscore >= 0);
1964 assert(l>=0 && l<=255);
1965 assert(pl>=0 && pl<=255);
1966
1967 if(level==0){
1968 int varc= iscore >> 8;
1969 int vard= score >> 8;
1970 if (vard <= 64 || vard < varc)
1971 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1972 else
1973 c->scene_change_score+= s->m.qscale;
1974 }
1975
1976 if(level!=s->block_max_depth){
1977 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1978 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1979 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1980 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1981 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1982 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1983
1984 if(score2 < score && score2 < iscore)
1985 return score2;
1986 }
1987
1988 if(iscore < score){
1989 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1990 memcpy(pbbak, i_buffer, i_len);
1991 s->c= ic;
1992 s->c.bytestream_start= pbbak_start;
1993 s->c.bytestream= pbbak + i_len;
1994 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1995 memcpy(s->block_state, i_state, sizeof(s->block_state));
1996 return iscore;
1997 }else{
1998 memcpy(pbbak, p_buffer, p_len);
1999 s->c= pc;
2000 s->c.bytestream_start= pbbak_start;
2001 s->c.bytestream= pbbak + p_len;
2002 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2003 memcpy(s->block_state, p_state, sizeof(s->block_state));
2004 return score;
2005 }
2006 }
2007
2008 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2009 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2010 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2011 }else{
2012 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2013 }
2014 }
2015
2016 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2017 const int w= s->b_width << s->block_max_depth;
2018 const int rem_depth= s->block_max_depth - level;
2019 const int index= (x + y*w) << rem_depth;
2020 int trx= (x+1)<<rem_depth;
2021 BlockNode *b= &s->block[index];
2022 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2023 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2024 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2025 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2026 int pl = left->color[0];
2027 int pcb= left->color[1];
2028 int pcr= left->color[2];
2029 int pmx, pmy;
2030 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2031 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2032 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2033 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2034
2035 if(s->keyframe){
2036 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2037 return;
2038 }
2039
2040 if(level!=s->block_max_depth){
2041 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2042 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2043 }else{
2044 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2045 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2046 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2047 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2048 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2049 return;
2050 }
2051 }
2052 if(b->type & BLOCK_INTRA){
2053 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2054 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2055 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2056 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2057 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2058 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2059 }else{
2060 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2061 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2062 if(s->ref_frames > 1)
2063 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2064 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2065 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2066 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2067 }
2068 }
2069
2070 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2071 const int w= s->b_width << s->block_max_depth;
2072 const int rem_depth= s->block_max_depth - level;
2073 const int index= (x + y*w) << rem_depth;
2074 int trx= (x+1)<<rem_depth;
2075 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2076 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2077 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2078 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2079 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2080
2081 if(s->keyframe){
2082 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2083 return;
2084 }
2085
2086 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2087 int type, mx, my;
2088 int l = left->color[0];
2089 int cb= left->color[1];
2090 int cr= left->color[2];
2091 int ref = 0;
2092 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2093 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2094 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2095
2096 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2097
2098 if(type){
2099 pred_mv(s, &mx, &my, 0, left, top, tr);
2100 l += get_symbol(&s->c, &s->block_state[32], 1);
2101 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2102 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2103 }else{
2104 if(s->ref_frames > 1)
2105 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2106 pred_mv(s, &mx, &my, ref, left, top, tr);
2107 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2108 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2109 }
2110 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2111 }else{
2112 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2113 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2114 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2115 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2116 }
2117 }
2118
2119 static void encode_blocks(SnowContext *s, int search){
2120 int x, y;
2121 int w= s->b_width;
2122 int h= s->b_height;
2123
2124 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2125 iterative_me(s);
2126
2127 for(y=0; y<h; y++){
2128 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2129 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2130 return;
2131 }
2132 for(x=0; x<w; x++){
2133 if(s->avctx->me_method == ME_ITER || !search)
2134 encode_q_branch2(s, 0, x, y);
2135 else
2136 encode_q_branch (s, 0, x, y);
2137 }
2138 }
2139 }
2140
2141 static void decode_blocks(SnowContext *s){
2142 int x, y;
2143 int w= s->b_width;
2144 int h= s->b_height;
2145
2146 for(y=0; y<h; y++){
2147 for(x=0; x<w; x++){
2148 decode_q_branch(s, 0, x, y);
2149 }
2150 }
2151 }
2152
2153 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2154 const static uint8_t weight[64]={
2155 8,7,6,5,4,3,2,1,
2156 7,7,0,0,0,0,0,1,
2157 6,0,6,0,0,0,2,0,
2158 5,0,0,5,0,3,0,0,
2159 4,0,0,0,4,0,0,0,
2160 3,0,0,5,0,3,0,0,
2161 2,0,6,0,0,0,2,0,
2162 1,7,0,0,0,0,0,1,
2163 };
2164
2165 const static uint8_t brane[256]={
2166 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2167 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2168 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2169 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2170 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2171 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2172 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2173 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2174 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2175 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2176 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2177 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2178 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2179 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2180 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2181 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2182 };
2183
2184 const static uint8_t needs[16]={
2185 0,1,0,0,
2186 2,4,2,0,
2187 0,1,0,0,
2188 15
2189 };
2190
2191 int x, y, b, r, l;
2192 int16_t tmpIt [64*(32+HTAPS_MAX)];
2193 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2194 int16_t *tmpI= tmpIt;
2195 uint8_t *tmp2= tmp2t[0];
2196 uint8_t *hpel[11];
2197 START_TIMER
2198 assert(dx<16 && dy<16);