rename always_inline to av_always_inline and move to common.h
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "avcodec.h"
22 #include "common.h"
23 #include "dsputil.h"
24 #include "snow.h"
25
26 #include "rangecoder.h"
27
28 #include "mpegvideo.h"
29
30 #undef NDEBUG
31 #include <assert.h>
32
33 static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 };
51 static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 };
69 static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 };
87 static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 };
105 static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 };
123 static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 };
141 static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 };
159 static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
176 };
177
178 #if 0 //64*cubic
179 static const uint8_t obmc32[1024]={
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
182 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
183 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
184 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
185 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
186 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
187 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
188 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
190 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
192 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
193 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
194 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
197 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
198 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
199 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
200 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
201 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
202 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
203 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
204 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
205 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
206 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
207 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
208 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
209 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
210 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 //error:0.000022
213 };
214 static const uint8_t obmc16[256]={
215 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
217 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
219 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
220 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
221 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
224 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
225 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
226 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
227 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
228 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
229 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
230 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
231 //error:0.000033
232 };
233 #elif 1 // 64*linear
234 static const uint8_t obmc32[1024]={
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
267 //error:0.000020
268 };
269 static const uint8_t obmc16[256]={
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
286 //error:0.000015
287 };
288 #else //64*cos
289 static const uint8_t obmc32[1024]={
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
293 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
294 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
295 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
296 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
297 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
298 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
300 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
302 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
303 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
304 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
307 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
308 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
309 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
310 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
311 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
312 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
313 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
314 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
315 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
316 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
317 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
318 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
319 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322 //error:0.000022
323 };
324 static const uint8_t obmc16[256]={
325 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
327 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
329 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
330 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
331 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
334 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
335 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
336 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
337 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
338 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
339 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
340 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
341 //error:0.000022
342 };
343 #endif
344
345 //linear *64
346 static const uint8_t obmc8[64]={
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
355 //error:0.000000
356 };
357
358 //linear *64
359 static const uint8_t obmc4[16]={
360 16, 48, 48, 16,
361 48,144,144, 48,
362 48,144,144, 48,
363 16, 48, 48, 16,
364 //error:0.000000
365 };
366
367 static const uint8_t *obmc_tab[4]={
368 obmc32, obmc16, obmc8, obmc4
369 };
370
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372
373 typedef struct BlockNode{
374 int16_t mx;
375 int16_t my;
376 uint8_t ref;
377 uint8_t color[3];
378 uint8_t type;
379 //#define TYPE_SPLIT 1
380 #define BLOCK_INTRA 1
381 #define BLOCK_OPT 2
382 //#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
384 }BlockNode;
385
386 static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
388 .mx= 0,
389 .my= 0,
390 .ref= 0,
391 .type= 0,
392 .level= 0,
393 };
394
395 #define LOG2_MB_SIZE 4
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
397
398 typedef struct x_and_coeff{
399 int16_t x;
400 uint16_t coeff;
401 } x_and_coeff;
402
403 typedef struct SubBand{
404 int level;
405 int stride;
406 int width;
407 int height;
408 int qlog; ///< log(qscale)/log[2^(1/6)]
409 DWTELEM *buf;
410 int buf_x_offset;
411 int buf_y_offset;
412 int stride_line; ///< Stride measured in lines, not pixels.
413 x_and_coeff * x_coeff;
414 struct SubBand *parent;
415 uint8_t state[/*7*2*/ 7 + 512][32];
416 }SubBand;
417
418 typedef struct Plane{
419 int width;
420 int height;
421 SubBand band[MAX_DECOMPOSITIONS][4];
422 }Plane;
423
424 typedef struct SnowContext{
425 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
426
427 AVCodecContext *avctx;
428 RangeCoder c;
429 DSPContext dsp;
430 AVFrame new_picture;
431 AVFrame input_picture; ///< new_picture with the internal linesizes
432 AVFrame current_picture;
433 AVFrame last_picture[MAX_REF_FRAMES];
434 AVFrame mconly_picture;
435 // uint8_t q_context[16];
436 uint8_t header_state[32];
437 uint8_t block_state[128 + 32*128];
438 int keyframe;
439 int always_reset;
440 int version;
441 int spatial_decomposition_type;
442 int temporal_decomposition_type;
443 int spatial_decomposition_count;
444 int temporal_decomposition_count;
445 int max_ref_frames;
446 int ref_frames;
447 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
448 uint32_t *ref_scores[MAX_REF_FRAMES];
449 DWTELEM *spatial_dwt_buffer;
450 int colorspace_type;
451 int chroma_h_shift;
452 int chroma_v_shift;
453 int spatial_scalability;
454 int qlog;
455 int lambda;
456 int lambda2;
457 int pass1_rc;
458 int mv_scale;
459 int qbias;
460 #define QBIAS_SHIFT 3
461 int b_width;
462 int b_height;
463 int block_max_depth;
464 Plane plane[MAX_PLANES];
465 BlockNode *block;
466 #define ME_CACHE_SIZE 1024
467 int me_cache[ME_CACHE_SIZE];
468 int me_cache_generation;
469 slice_buffer sb;
470
471 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
472 }SnowContext;
473
474 typedef struct {
475 DWTELEM *b0;
476 DWTELEM *b1;
477 DWTELEM *b2;
478 DWTELEM *b3;
479 int y;
480 } dwt_compose_t;
481
482 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
483 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
484
485 static void iterative_me(SnowContext *s);
486
487 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
488 {
489 int i;
490
491 buf->base_buffer = base_buffer;
492 buf->line_count = line_count;
493 buf->line_width = line_width;
494 buf->data_count = max_allocated_lines;
495 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
496 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
497
498 for (i = 0; i < max_allocated_lines; i++)
499 {
500 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
501 }
502
503 buf->data_stack_top = max_allocated_lines - 1;
504 }
505
506 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
507 {
508 int offset;
509 DWTELEM * buffer;
510
511 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
512
513 assert(buf->data_stack_top >= 0);
514 // assert(!buf->line[line]);
515 if (buf->line[line])
516 return buf->line[line];
517
518 offset = buf->line_width * line;
519 buffer = buf->data_stack[buf->data_stack_top];
520 buf->data_stack_top--;
521 buf->line[line] = buffer;
522
523 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
524
525 return buffer;
526 }
527
528 static void slice_buffer_release(slice_buffer * buf, int line)
529 {
530 int offset;
531 DWTELEM * buffer;
532
533 assert(line >= 0 && line < buf->line_count);
534 assert(buf->line[line]);
535
536 offset = buf->line_width * line;
537 buffer = buf->line[line];
538 buf->data_stack_top++;
539 buf->data_stack[buf->data_stack_top] = buffer;
540 buf->line[line] = NULL;
541
542 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
543 }
544
545 static void slice_buffer_flush(slice_buffer * buf)
546 {
547 int i;
548 for (i = 0; i < buf->line_count; i++)
549 {
550 if (buf->line[i])
551 {
552 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
553 slice_buffer_release(buf, i);
554 }
555 }
556 }
557
558 static void slice_buffer_destroy(slice_buffer * buf)
559 {
560 int i;
561 slice_buffer_flush(buf);
562
563 for (i = buf->data_count - 1; i >= 0; i--)
564 {
565 assert(buf->data_stack[i]);
566 av_freep(&buf->data_stack[i]);
567 }
568 assert(buf->data_stack);
569 av_freep(&buf->data_stack);
570 assert(buf->line);
571 av_freep(&buf->line);
572 }
573
574 #ifdef __sgi
575 // Avoid a name clash on SGI IRIX
576 #undef qexp
577 #endif
578 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
579 static uint8_t qexp[QROOT];
580
581 static inline int mirror(int v, int m){
582 while((unsigned)v > (unsigned)m){
583 v=-v;
584 if(v<0) v+= 2*m;
585 }
586 return v;
587 }
588
589 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
590 int i;
591
592 if(v){
593 const int a= FFABS(v);
594 const int e= av_log2(a);
595 #if 1
596 const int el= FFMIN(e, 10);
597 put_rac(c, state+0, 0);
598
599 for(i=0; i<el; i++){
600 put_rac(c, state+1+i, 1); //1..10
601 }
602 for(; i<e; i++){
603 put_rac(c, state+1+9, 1); //1..10
604 }
605 put_rac(c, state+1+FFMIN(i,9), 0);
606
607 for(i=e-1; i>=el; i--){
608 put_rac(c, state+22+9, (a>>i)&1); //22..31
609 }
610 for(; i>=0; i--){
611 put_rac(c, state+22+i, (a>>i)&1); //22..31
612 }
613
614 if(is_signed)
615 put_rac(c, state+11 + el, v < 0); //11..21
616 #else
617
618 put_rac(c, state+0, 0);
619 if(e<=9){
620 for(i=0; i<e; i++){
621 put_rac(c, state+1+i, 1); //1..10
622 }
623 put_rac(c, state+1+i, 0);
624
625 for(i=e-1; i>=0; i--){
626 put_rac(c, state+22+i, (a>>i)&1); //22..31
627 }
628
629 if(is_signed)
630 put_rac(c, state+11 + e, v < 0); //11..21
631 }else{
632 for(i=0; i<e; i++){
633 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
634 }
635 put_rac(c, state+1+FFMIN(i,9), 0);
636
637 for(i=e-1; i>=0; i--){
638 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
639 }
640
641 if(is_signed)
642 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
643 }
644 #endif
645 }else{
646 put_rac(c, state+0, 1);
647 }
648 }
649
650 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
651 if(get_rac(c, state+0))
652 return 0;
653 else{
654 int i, e, a;
655 e= 0;
656 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
657 e++;
658 }
659
660 a= 1;
661 for(i=e-1; i>=0; i--){
662 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
663 }
664
665 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
666 return -a;
667 else
668 return a;
669 }
670 }
671
672 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
673 int i;
674 int r= log2>=0 ? 1<<log2 : 1;
675
676 assert(v>=0);
677 assert(log2>=-4);
678
679 while(v >= r){
680 put_rac(c, state+4+log2, 1);
681 v -= r;
682 log2++;
683 if(log2>0) r+=r;
684 }
685 put_rac(c, state+4+log2, 0);
686
687 for(i=log2-1; i>=0; i--){
688 put_rac(c, state+31-i, (v>>i)&1);
689 }
690 }
691
692 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
693 int i;
694 int r= log2>=0 ? 1<<log2 : 1;
695 int v=0;
696
697 assert(log2>=-4);
698
699 while(get_rac(c, state+4+log2)){
700 v+= r;
701 log2++;
702 if(log2>0) r+=r;
703 }
704
705 for(i=log2-1; i>=0; i--){
706 v+= get_rac(c, state+31-i)<<i;
707 }
708
709 return v;
710 }
711
712 static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
713 const int mirror_left= !highpass;
714 const int mirror_right= (width&1) ^ highpass;
715 const int w= (width>>1) - 1 + (highpass & width);
716 int i;
717
718 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
719 if(mirror_left){
720 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
721 dst += dst_step;
722 src += src_step;
723 }
724
725 for(i=0; i<w; i++){
726 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
727 }
728
729 if(mirror_right){
730 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
731 }
732 }
733
734 #ifndef lift5
735 static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
736 const int mirror_left= !highpass;
737 const int mirror_right= (width&1) ^ highpass;
738 const int w= (width>>1) - 1 + (highpass & width);
739 int i;
740
741 if(mirror_left){
742 int r= 3*2*ref[0];
743 r += r>>4;
744 r += r>>8;
745 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
746 dst += dst_step;
747 src += src_step;
748 }
749
750 for(i=0; i<w; i++){
751 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
752 r += r>>4;
753 r += r>>8;
754 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
755 }
756
757 if(mirror_right){
758 int r= 3*2*ref[w*ref_step];
759 r += r>>4;
760 r += r>>8;
761 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
762 }
763 }
764 #endif
765
766 #ifndef liftS
767 static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
768 const int mirror_left= !highpass;
769 const int mirror_right= (width&1) ^ highpass;
770 const int w= (width>>1) - 1 + (highpass & width);
771 int i;
772
773 assert(shift == 4);
774 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
775 if(mirror_left){
776 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
777 dst += dst_step;
778 src += src_step;
779 }
780
781 for(i=0; i<w; i++){
782 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
783 }
784
785 if(mirror_right){
786 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
787 }
788 }
789 #endif
790
791
792 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
793 int x, i;
794
795 for(x=start; x<width; x+=2){
796 int64_t sum=0;
797
798 for(i=0; i<n; i++){
799 int x2= x + 2*i - n + 1;
800 if (x2< 0) x2= -x2;
801 else if(x2>=width) x2= 2*width-x2-2;
802 sum += coeffs[i]*(int64_t)dst[x2];
803 }
804 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
805 else dst[x] += (sum + (1<<shift)/2)>>shift;
806 }
807 }
808
809 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
810 int x, y, i;
811 for(y=start; y<height; y+=2){
812 for(x=0; x<width; x++){
813 int64_t sum=0;
814
815 for(i=0; i<n; i++){
816 int y2= y + 2*i - n + 1;
817 if (y2< 0) y2= -y2;
818 else if(y2>=height) y2= 2*height-y2-2;
819 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
820 }
821 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
822 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
823 }
824 }
825 }
826
827 #define SCALEX 1
828 #define LX0 0
829 #define LX1 1
830
831 #if 0 // more accurate 9/7
832 #define N1 2
833 #define SHIFT1 14
834 #define COEFFS1 (int[]){-25987,-25987}
835 #define N2 2
836 #define SHIFT2 19
837 #define COEFFS2 (int[]){-27777,-27777}
838 #define N3 2
839 #define SHIFT3 15
840 #define COEFFS3 (int[]){28931,28931}
841 #define N4 2
842 #define SHIFT4 15
843 #define COEFFS4 (int[]){14533,14533}
844 #elif 1 // 13/7 CRF
845 #define N1 4
846 #define SHIFT1 4
847 #define COEFFS1 (int[]){1,-9,-9,1}
848 #define N2 4
849 #define SHIFT2 4
850 #define COEFFS2 (int[]){-1,5,5,-1}
851 #define N3 0
852 #define SHIFT3 1
853 #define COEFFS3 NULL
854 #define N4 0
855 #define SHIFT4 1
856 #define COEFFS4 NULL
857 #elif 1 // 3/5
858 #define LX0 1
859 #define LX1 0
860 #define SCALEX 0.5
861 #define N1 2
862 #define SHIFT1 1
863 #define COEFFS1 (int[]){1,1}
864 #define N2 2
865 #define SHIFT2 2
866 #define COEFFS2 (int[]){-1,-1}
867 #define N3 0
868 #define SHIFT3 0
869 #define COEFFS3 NULL
870 #define N4 0
871 #define SHIFT4 0
872 #define COEFFS4 NULL
873 #elif 1 // 11/5
874 #define N1 0
875 #define SHIFT1 1
876 #define COEFFS1 NULL
877 #define N2 2
878 #define SHIFT2 2
879 #define COEFFS2 (int[]){-1,-1}
880 #define N3 2
881 #define SHIFT3 0
882 #define COEFFS3 (int[]){-1,-1}
883 #define N4 4
884 #define SHIFT4 7
885 #define COEFFS4 (int[]){-5,29,29,-5}
886 #define SCALEX 4
887 #elif 1 // 9/7 CDF
888 #define N1 2
889 #define SHIFT1 7
890 #define COEFFS1 (int[]){-203,-203}
891 #define N2 2
892 #define SHIFT2 12
893 #define COEFFS2 (int[]){-217,-217}
894 #define N3 2
895 #define SHIFT3 7
896 #define COEFFS3 (int[]){113,113}
897 #define N4 2
898 #define SHIFT4 9
899 #define COEFFS4 (int[]){227,227}
900 #define SCALEX 1
901 #elif 1 // 7/5 CDF
902 #define N1 0
903 #define SHIFT1 1
904 #define COEFFS1 NULL
905 #define N2 2
906 #define SHIFT2 2
907 #define COEFFS2 (int[]){-1,-1}
908 #define N3 2
909 #define SHIFT3 0
910 #define COEFFS3 (int[]){-1,-1}
911 #define N4 2
912 #define SHIFT4 4
913 #define COEFFS4 (int[]){3,3}
914 #elif 1 // 9/7 MN
915 #define N1 4
916 #define SHIFT1 4
917 #define COEFFS1 (int[]){1,-9,-9,1}
918 #define N2 2
919 #define SHIFT2 2
920 #define COEFFS2 (int[]){1,1}
921 #define N3 0
922 #define SHIFT3 1
923 #define COEFFS3 NULL
924 #define N4 0
925 #define SHIFT4 1
926 #define COEFFS4 NULL
927 #else // 13/7 CRF
928 #define N1 4
929 #define SHIFT1 4
930 #define COEFFS1 (int[]){1,-9,-9,1}
931 #define N2 4
932 #define SHIFT2 4
933 #define COEFFS2 (int[]){-1,5,5,-1}
934 #define N3 0
935 #define SHIFT3 1
936 #define COEFFS3 NULL
937 #define N4 0
938 #define SHIFT4 1
939 #define COEFFS4 NULL
940 #endif
941 static void horizontal_decomposeX(DWTELEM *b, int width){
942 DWTELEM temp[width];
943 const int width2= width>>1;
944 const int w2= (width+1)>>1;
945 int x;
946
947 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
948 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
949 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
950 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
951
952 for(x=0; x<width2; x++){
953 temp[x ]= b[2*x ];
954 temp[x+w2]= b[2*x + 1];
955 }
956 if(width&1)
957 temp[x ]= b[2*x ];
958 memcpy(b, temp, width*sizeof(int));
959 }
960
961 static void horizontal_composeX(DWTELEM *b, int width){
962 DWTELEM temp[width];
963 const int width2= width>>1;
964 int x;
965 const int w2= (width+1)>>1;
966
967 memcpy(temp, b, width*sizeof(int));
968 for(x=0; x<width2; x++){
969 b[2*x ]= temp[x ];
970 b[2*x + 1]= temp[x+w2];
971 }
972 if(width&1)
973 b[2*x ]= temp[x ];
974
975 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
976 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
977 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
978 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
979 }
980
981 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
982 int x, y;
983
984 for(y=0; y<height; y++){
985 for(x=0; x<width; x++){
986 buffer[y*stride + x] *= SCALEX;
987 }
988 }
989
990 for(y=0; y<height; y++){
991 horizontal_decomposeX(buffer + y*stride, width);
992 }
993
994 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
995 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
996 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
997 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
998 }
999
1000 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1001 int x, y;
1002
1003 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1004 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1005 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1006 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1007
1008 for(y=0; y<height; y++){
1009 horizontal_composeX(buffer + y*stride, width);
1010 }
1011
1012 for(y=0; y<height; y++){
1013 for(x=0; x<width; x++){
1014 buffer[y*stride + x] /= SCALEX;
1015 }
1016 }
1017 }
1018
1019 static void horizontal_decompose53i(DWTELEM *b, int width){
1020 DWTELEM temp[width];
1021 const int width2= width>>1;
1022 int x;
1023 const int w2= (width+1)>>1;
1024
1025 for(x=0; x<width2; x++){
1026 temp[x ]= b[2*x ];
1027 temp[x+w2]= b[2*x + 1];
1028 }
1029 if(width&1)
1030 temp[x ]= b[2*x ];
1031 #if 0
1032 {
1033 int A1,A2,A3,A4;
1034 A2= temp[1 ];
1035 A4= temp[0 ];
1036 A1= temp[0+width2];
1037 A1 -= (A2 + A4)>>1;
1038 A4 += (A1 + 1)>>1;
1039 b[0+width2] = A1;
1040 b[0 ] = A4;
1041 for(x=1; x+1<width2; x+=2){
1042 A3= temp[x+width2];
1043 A4= temp[x+1 ];
1044 A3 -= (A2 + A4)>>1;
1045 A2 += (A1 + A3 + 2)>>2;
1046 b[x+width2] = A3;
1047 b[x ] = A2;
1048
1049 A1= temp[x+1+width2];
1050 A2= temp[x+2 ];
1051 A1 -= (A2 + A4)>>1;
1052 A4 += (A1 + A3 + 2)>>2;
1053 b[x+1+width2] = A1;
1054 b[x+1 ] = A4;
1055 }
1056 A3= temp[width-1];
1057 A3 -= A2;
1058 A2 += (A1 + A3 + 2)>>2;
1059 b[width -1] = A3;
1060 b[width2-1] = A2;
1061 }
1062 #else
1063 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1064 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1065 #endif
1066 }
1067
1068 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1069 int i;
1070
1071 for(i=0; i<width; i++){
1072 b1[i] -= (b0[i] + b2[i])>>1;
1073 }
1074 }
1075
1076 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1077 int i;
1078
1079 for(i=0; i<width; i++){
1080 b1[i] += (b0[i] + b2[i] + 2)>>2;
1081 }
1082 }
1083
1084 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1085 int y;
1086 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1087 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1088
1089 for(y=-2; y<height; y+=2){
1090 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1091 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1092
1093 {START_TIMER
1094 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1095 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1096 STOP_TIMER("horizontal_decompose53i")}
1097
1098 {START_TIMER
1099 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1100 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1101 STOP_TIMER("vertical_decompose53i*")}
1102
1103 b0=b2;
1104 b1=b3;
1105 }
1106 }
1107
1108 static void horizontal_decompose97i(DWTELEM *b, int width){
1109 DWTELEM temp[width];
1110 const int w2= (width+1)>>1;
1111
1112 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1113 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1114 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1115 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1116 }
1117
1118
1119 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1120 int i;
1121
1122 for(i=0; i<width; i++){
1123 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1124 }
1125 }
1126
1127 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1128 int i;
1129
1130 for(i=0; i<width; i++){
1131 #ifdef lift5
1132 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1133 #else
1134 int r= 3*(b0[i] + b2[i]);
1135 r+= r>>4;
1136 r+= r>>8;
1137 b1[i] += (r+W_CO)>>W_CS;
1138 #endif
1139 }
1140 }
1141
1142 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1143 int i;
1144
1145 for(i=0; i<width; i++){
1146 #ifdef liftS
1147 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1148 #else
1149 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1150 #endif
1151 }
1152 }
1153
1154 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1155 int i;
1156
1157 for(i=0; i<width; i++){
1158 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1159 }
1160 }
1161
1162 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1163 int y;
1164 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1165 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1166 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1167 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1168
1169 for(y=-4; y<height; y+=2){
1170 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1171 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1172
1173 {START_TIMER
1174 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1175 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1176 if(width>400){
1177 STOP_TIMER("horizontal_decompose97i")
1178 }}
1179
1180 {START_TIMER
1181 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1182 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1183 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1184 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1185
1186 if(width>400){
1187 STOP_TIMER("vertical_decompose97i")
1188 }}
1189
1190 b0=b2;
1191 b1=b3;
1192 b2=b4;
1193 b3=b5;
1194 }
1195 }
1196
1197 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1198 int level;
1199
1200 for(level=0; level<decomposition_count; level++){
1201 switch(type){
1202 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1203 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1204 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1205 }
1206 }
1207 }
1208
1209 static void horizontal_compose53i(DWTELEM *b, int width){
1210 DWTELEM temp[width];
1211 const int width2= width>>1;
1212 const int w2= (width+1)>>1;
1213 int x;
1214
1215 #if 0
1216 int A1,A2,A3,A4;
1217 A2= temp[1 ];
1218 A4= temp[0 ];
1219 A1= temp[0+width2];
1220 A1 -= (A2 + A4)>>1;
1221 A4 += (A1 + 1)>>1;
1222 b[0+width2] = A1;
1223 b[0 ] = A4;
1224 for(x=1; x+1<width2; x+=2){
1225 A3= temp[x+width2];
1226 A4= temp[x+1 ];
1227 A3 -= (A2 + A4)>>1;
1228 A2 += (A1 + A3 + 2)>>2;
1229 b[x+width2] = A3;
1230 b[x ] = A2;
1231
1232 A1= temp[x+1+width2];
1233 A2= temp[x+2 ];
1234 A1 -= (A2 + A4)>>1;
1235 A4 += (A1 + A3 + 2)>>2;
1236 b[x+1+width2] = A1;
1237 b[x+1 ] = A4;
1238 }
1239 A3= temp[width-1];
1240 A3 -= A2;
1241 A2 += (A1 + A3 + 2)>>2;
1242 b[width -1] = A3;
1243 b[width2-1] = A2;
1244 #else
1245 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1246 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1247 #endif
1248 for(x=0; x<width2; x++){
1249 b[2*x ]= temp[x ];
1250 b[2*x + 1]= temp[x+w2];
1251 }
1252 if(width&1)
1253 b[2*x ]= temp[x ];
1254 }
1255
1256 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1257 int i;
1258
1259 for(i=0; i<width; i++){
1260 b1[i] += (b0[i] + b2[i])>>1;
1261 }
1262 }
1263
1264 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1265 int i;
1266
1267 for(i=0; i<width; i++){
1268 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1269 }
1270 }
1271
1272 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1273 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1274 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1275 cs->y = -1;
1276 }
1277
1278 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1279 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1280 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1281 cs->y = -1;
1282 }
1283
1284 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1285 int y= cs->y;
1286
1287 DWTELEM *b0= cs->b0;
1288 DWTELEM *b1= cs->b1;
1289 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1290 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1291
1292 {START_TIMER
1293 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1294 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1295 STOP_TIMER("vertical_compose53i*")}
1296
1297 {START_TIMER
1298 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1299 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1300 STOP_TIMER("horizontal_compose53i")}
1301
1302 cs->b0 = b2;
1303 cs->b1 = b3;
1304 cs->y += 2;
1305 }
1306
1307 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1308 int y= cs->y;
1309 DWTELEM *b0= cs->b0;
1310 DWTELEM *b1= cs->b1;
1311 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1312 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1313
1314 {START_TIMER
1315 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1316 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1317 STOP_TIMER("vertical_compose53i*")}
1318
1319 {START_TIMER
1320 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1321 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1322 STOP_TIMER("horizontal_compose53i")}
1323
1324 cs->b0 = b2;
1325 cs->b1 = b3;
1326 cs->y += 2;
1327 }
1328
1329 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1330 dwt_compose_t cs;
1331 spatial_compose53i_init(&cs, buffer, height, stride);
1332 while(cs.y <= height)
1333 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1334 }
1335
1336
1337 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1338 DWTELEM temp[width];
1339 const int w2= (width+1)>>1;
1340
1341 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1342 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1343 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1344 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1345 }
1346
1347 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1348 int i;
1349
1350 for(i=0; i<width; i++){
1351 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1352 }
1353 }
1354
1355 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1356 int i;
1357
1358 for(i=0; i<width; i++){
1359 #ifdef lift5
1360 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1361 #else
1362 int r= 3*(b0[i] + b2[i]);
1363 r+= r>>4;
1364 r+= r>>8;
1365 b1[i] -= (r+W_CO)>>W_CS;
1366 #endif
1367 }
1368 }
1369
1370 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1371 int i;
1372
1373 for(i=0; i<width; i++){
1374 #ifdef liftS
1375 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1376 #else
1377 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1378 #endif
1379 }
1380 }
1381
1382 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1383 int i;
1384
1385 for(i=0; i<width; i++){
1386 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1387 }
1388 }
1389
1390 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1391 int i;
1392
1393 for(i=0; i<width; i++){
1394 #ifndef lift5
1395 int r;
1396 #endif
1397 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1398 #ifdef lift5
1399 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1400 #else
1401 r= 3*(b2[i] + b4[i]);
1402 r+= r>>4;
1403 r+= r>>8;
1404 b3[i] -= (r+W_CO)>>W_CS;
1405 #endif
1406 #ifdef liftS
1407 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1408 #else
1409 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1410 #endif
1411 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1412 }
1413 }
1414
1415 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1416 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1417 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1418 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1419 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1420 cs->y = -3;
1421 }
1422
1423 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1424 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1425 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1426 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1427 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1428 cs->y = -3;
1429 }
1430
1431 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1432 int y = cs->y;
1433
1434 DWTELEM *b0= cs->b0;
1435 DWTELEM *b1= cs->b1;
1436 DWTELEM *b2= cs->b2;
1437 DWTELEM *b3= cs->b3;
1438 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1439 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1440
1441 {START_TIMER
1442 if(y>0 && y+4<height){
1443 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1444 }else{
1445 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1446 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1447 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1448 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1449 }
1450 if(width>400){
1451 STOP_TIMER("vertical_compose97i")}}
1452
1453 {START_TIMER
1454 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1455 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1456 if(width>400 && y+0<(unsigned)height){
1457 STOP_TIMER("horizontal_compose97i")}}
1458
1459 cs->b0=b2;
1460 cs->b1=b3;
1461 cs->b2=b4;
1462 cs->b3=b5;
1463 cs->y += 2;
1464 }
1465
1466 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1467 int y = cs->y;
1468 DWTELEM *b0= cs->b0;
1469 DWTELEM *b1= cs->b1;
1470 DWTELEM *b2= cs->b2;
1471 DWTELEM *b3= cs->b3;
1472 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1473 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1474
1475 {START_TIMER
1476 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1477 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1478 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1479 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1480 if(width>400){
1481 STOP_TIMER("vertical_compose97i")}}
1482
1483 {START_TIMER
1484 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1485 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1486 if(width>400 && b0 <= b2){
1487 STOP_TIMER("horizontal_compose97i")}}
1488
1489 cs->b0=b2;
1490 cs->b1=b3;
1491 cs->b2=b4;
1492 cs->b3=b5;
1493 cs->y += 2;
1494 }
1495
1496 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1497 dwt_compose_t cs;
1498 spatial_compose97i_init(&cs, buffer, height, stride);
1499 while(cs.y <= height)
1500 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1501 }
1502
1503 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1504 int level;
1505 for(level=decomposition_count-1; level>=0; level--){
1506 switch(type){
1507 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1508 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1509 /* not slicified yet */
1510 case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1511 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1512 }
1513 }
1514 }
1515
1516 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1517 int level;
1518 for(level=decomposition_count-1; level>=0; level--){
1519 switch(type){
1520 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1521 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1522 /* not slicified yet */
1523 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1524 }
1525 }
1526 }
1527
1528 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1529 const int support = type==1 ? 3 : 5;
1530 int level;
1531 if(type==2) return;
1532
1533 for(level=decomposition_count-1; level>=0; level--){
1534 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1535 switch(type){
1536 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1537 break;
1538 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1539 break;
1540 case DWT_X: break;
1541 }
1542 }
1543 }
1544 }
1545
1546 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1547 const int support = type==1 ? 3 : 5;
1548 int level;
1549 if(type==2) return;
1550
1551 for(level=decomposition_count-1; level>=0; level--){
1552 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1553 switch(type){
1554 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1555 break;
1556 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1557 break;
1558 case DWT_X: break;
1559 }
1560 }
1561 }
1562 }
1563
1564 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1565 if(type==2){
1566 int level;
1567 for(level=decomposition_count-1; level>=0; level--)
1568 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1569 }else{
1570 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1571 int y;
1572 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1573 for(y=0; y<height; y+=4)
1574 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1575 }
1576 }
1577
1578 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1579 const int w= b->width;
1580 const int h= b->height;
1581 int x, y;
1582
1583 if(1){
1584 int run=0;
1585 int runs[w*h];
1586 int run_index=0;
1587 int max_index;
1588
1589 for(y=0; y<h; y++){
1590 for(x=0; x<w; x++){
1591 int v, p=0;
1592 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1593 v= src[x + y*stride];
1594
1595 if(y){
1596 t= src[x + (y-1)*stride];
1597 if(x){
1598 lt= src[x - 1 + (y-1)*stride];
1599 }
1600 if(x + 1 < w){
1601 rt= src[x + 1 + (y-1)*stride];
1602 }
1603 }
1604 if(x){
1605 l= src[x - 1 + y*stride];
1606 /*if(x > 1){
1607 if(orientation==1) ll= src[y + (x-2)*stride];
1608 else ll= src[x - 2 + y*stride];
1609 }*/
1610 }
1611 if(parent){
1612 int px= x>>1;
1613 int py= y>>1;
1614 if(px<b->parent->width && py<b->parent->height)
1615 p= parent[px + py*2*stride];
1616 }
1617 if(!(/*ll|*/l|lt|t|rt|p)){
1618 if(v){
1619 runs[run_index++]= run;
1620 run=0;
1621 }else{
1622 run++;
1623 }
1624 }
1625 }
1626 }
1627 max_index= run_index;
1628 runs[run_index++]= run;
1629 run_index=0;
1630 run= runs[run_index++];
1631
1632 put_symbol2(&s->c, b->state[30], max_index, 0);
1633 if(run_index <= max_index)
1634 put_symbol2(&s->c, b->state[1], run, 3);
1635
1636 for(y=0; y<h; y++){
1637 if(s->c.bytestream_end - s->c.bytestream < w*40){
1638 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1639 return -1;
1640 }
1641 for(x=0; x<w; x++){
1642 int v, p=0;
1643 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1644 v= src[x + y*stride];
1645
1646 if(y){
1647 t= src[x + (y-1)*stride];
1648 if(x){
1649 lt= src[x - 1 + (y-1)*stride];
1650 }
1651 if(x + 1 < w){
1652 rt= src[x + 1 + (y-1)*stride];
1653 }
1654 }
1655 if(x){
1656 l= src[x - 1 + y*stride];
1657 /*if(x > 1){
1658 if(orientation==1) ll= src[y + (x-2)*stride];
1659 else ll= src[x - 2 + y*stride];
1660 }*/
1661 }
1662 if(parent){
1663 int px= x>>1;
1664 int py= y>>1;
1665 if(px<b->parent->width && py<b->parent->height)
1666 p= parent[px + py*2*stride];
1667 }
1668 if(/*ll|*/l|lt|t|rt|p){
1669 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1670
1671 put_rac(&s->c, &b->state[0][context], !!v);
1672 }else{
1673 if(!run){
1674 run= runs[run_index++];
1675
1676 if(run_index <= max_index)
1677 put_symbol2(&s->c, b->state[1], run, 3);
1678 assert(v);
1679 }else{
1680 run--;
1681 assert(!v);
1682 }
1683 }
1684 if(v){
1685 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1686 int l2= 2*FFABS(l) + (l<0);
1687 int t2= 2*FFABS(t) + (t<0);
1688
1689 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1690 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1691 }
1692 }
1693 }
1694 }
1695 return 0;
1696 }
1697
1698 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1699 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1700 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1701 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1702 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1703 }
1704
1705 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1706 const int w= b->width;
1707 const int h= b->height;
1708 int x,y;
1709
1710 if(1){
1711 int run, runs;
1712 x_and_coeff *xc= b->x_coeff;
1713 x_and_coeff *prev_xc= NULL;
1714 x_and_coeff *prev2_xc= xc;
1715 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1716 x_and_coeff *prev_parent_xc= parent_xc;
1717
1718 runs= get_symbol2(&s->c, b->state[30], 0);
1719 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1720 else run= INT_MAX;
1721
1722 for(y=0; y<h; y++){
1723 int v=0;
1724 int lt=0, t=0, rt=0;
1725
1726 if(y && prev_xc->x == 0){
1727 rt= prev_xc->coeff;
1728 }
1729 for(x=0; x<w; x++){
1730 int p=0;
1731 const int l= v;
1732
1733 lt= t; t= rt;
1734
1735 if(y){
1736 if(prev_xc->x <= x)
1737 prev_xc++;
1738 if(prev_xc->x == x + 1)
1739 rt= prev_xc->coeff;
1740 else
1741 rt=0;
1742 }
1743 if(parent_xc){
1744 if(x>>1 > parent_xc->x){
1745 parent_xc++;
1746 }
1747 if(x>>1 == parent_xc->x){
1748 p= parent_xc->coeff;
1749 }
1750 }
1751 if(/*ll|*/l|lt|t|rt|p){
1752 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1753
1754 v=get_rac(&s->c, &b->state[0][context]);
1755 if(v){
1756 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1757 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1758
1759 xc->x=x;
1760 (xc++)->coeff= v;
1761 }
1762 }else{
1763 if(!run){
1764 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1765 else run= INT_MAX;
1766 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1767 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1768
1769 xc->x=x;
1770 (xc++)->coeff= v;
1771 }else{
1772 int max_run;
1773 run--;
1774 v=0;
1775
1776 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1777 else max_run= FFMIN(run, w-x-1);
1778 if(parent_xc)
1779 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1780 x+= max_run;
1781 run-= max_run;
1782 }
1783 }
1784 }
1785 (xc++)->x= w+1; //end marker
1786 prev_xc= prev2_xc;
1787 prev2_xc= xc;
1788
1789 if(parent_xc){
1790 if(y&1){
1791 while(parent_xc->x != parent->width+1)
1792 parent_xc++;
1793 parent_xc++;
1794 prev_parent_xc= parent_xc;
1795 }else{
1796 parent_xc= prev_parent_xc;
1797 }
1798 }
1799 }
1800
1801 (xc++)->x= w+1; //end marker
1802 }
1803 }
1804
1805 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1806 const int w= b->width;
1807 int y;
1808 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1809 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1810 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1811 int new_index = 0;
1812
1813 START_TIMER
1814
1815 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1816 qadd= 0;
1817 qmul= 1<<QEXPSHIFT;
1818 }
1819
1820 /* If we are on the second or later slice, restore our index. */
1821 if (start_y != 0)
1822 new_index = save_state[0];
1823
1824
1825 for(y=start_y; y<h; y++){
1826 int x = 0;
1827 int v;
1828 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1829 memset(line, 0, b->width*sizeof(DWTELEM));
1830 v = b->x_coeff[new_index].coeff;
1831 x = b->x_coeff[new_index++].x;
1832 while(x < w)
1833 {
1834 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1835 register int u= -(v&1);
1836 line[x] = (t^u) - u;
1837
1838 v = b->x_coeff[new_index].coeff;
1839 x = b->x_coeff[new_index++].x;
1840 }
1841 }
1842 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1843 STOP_TIMER("decode_subband")
1844 }
1845
1846 /* Save our variables for the next slice. */
1847 save_state[0] = new_index;
1848
1849 return;
1850 }
1851
1852 static void reset_contexts(SnowContext *s){
1853 int plane_index, level, orientation;
1854
1855 for(plane_index=0; plane_index<3; plane_index++){
1856 for(level=0; level<s->spatial_decomposition_count; level++){
1857 for(orientation=level ? 1:0; orientation<4; orientation++){
1858 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1859 }
1860 }
1861 }
1862 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1863 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1864 }
1865
1866 static int alloc_blocks(SnowContext *s){
1867 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1868 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1869
1870 s->b_width = w;
1871 s->b_height= h;
1872
1873 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1874 return 0;
1875 }
1876
1877 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1878 uint8_t *bytestream= d->bytestream;
1879 uint8_t *bytestream_start= d->bytestream_start;
1880 *d= *s;
1881 d->bytestream= bytestream;
1882 d->bytestream_start= bytestream_start;
1883 }
1884
1885 //near copy & paste from dsputil, FIXME
1886 static int pix_sum(uint8_t * pix, int line_size, int w)
1887 {
1888 int s, i, j;
1889
1890 s = 0;
1891 for (i = 0; i < w; i++) {
1892 for (j = 0; j < w; j++) {
1893 s += pix[0];
1894 pix ++;
1895 }
1896 pix += line_size - w;
1897 }
1898 return s;
1899 }
1900
1901 //near copy & paste from dsputil, FIXME
1902 static int pix_norm1(uint8_t * pix, int line_size, int w)
1903 {
1904 int s, i, j;
1905 uint32_t *sq = ff_squareTbl + 256;
1906
1907 s = 0;
1908 for (i = 0; i < w; i++) {
1909 for (j = 0; j < w; j ++) {
1910 s += sq[pix[0]];
1911 pix ++;
1912 }
1913 pix += line_size - w;
1914 }
1915 return s;
1916 }
1917
1918 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1919 const int w= s->b_width << s->block_max_depth;
1920 const int rem_depth= s->block_max_depth - level;
1921 const int index= (x + y*w) << rem_depth;
1922 const int block_w= 1<<rem_depth;
1923 BlockNode block;
1924 int i,j;
1925
1926 block.color[0]= l;
1927 block.color[1]= cb;
1928 block.color[2]= cr;
1929 block.mx= mx;
1930 block.my= my;
1931 block.ref= ref;
1932 block.type= type;
1933 block.level= level;
1934
1935 for(j=0; j<block_w; j++){
1936 for(i=0; i<block_w; i++){
1937 s->block[index + i + j*w]= block;
1938 }
1939 }
1940 }
1941
1942 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1943 const int offset[3]= {
1944 y*c-> stride + x,
1945 ((y*c->uvstride + x)>>1),
1946 ((y*c->uvstride + x)>>1),
1947 };
1948 int i;
1949 for(i=0; i<3; i++){
1950 c->src[0][i]= src [i];
1951 c->ref[0][i]= ref [i] + offset[i];
1952 }
1953 assert(!ref_index);
1954 }
1955
1956 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1957 BlockNode *left, BlockNode *top, BlockNode *tr){
1958 if(s->ref_frames == 1){
1959 *mx = mid_pred(left->mx, top->mx, tr->mx);
1960 *my = mid_pred(left->my, top->my, tr->my);
1961 }else{
1962 const int *scale = scale_mv_ref[ref];
1963 *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8,
1964 top ->mx * scale[top ->ref] + 128 >>8,
1965 tr ->mx * scale[tr ->ref] + 128 >>8);
1966 *my = mid_pred(left->my * scale[left->ref] + 128 >>8,
1967 top ->my * scale[top ->ref] + 128 >>8,
1968 tr ->my * scale[tr ->ref] + 128 >>8);
1969 }
1970 }
1971
1972 //FIXME copy&paste
1973 #define P_LEFT P[1]
1974 #define P_TOP P[2]
1975 #define P_TOPRIGHT P[3]
1976 #define P_MEDIAN P[4]
1977 #define P_MV1 P[9]
1978 #define FLAG_QPEL 1 //must be 1
1979
1980 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1981 uint8_t p_buffer[1024];
1982 uint8_t i_buffer[1024];
1983 uint8_t p_state[sizeof(s->block_state)];
1984 uint8_t i_state[sizeof(s->block_state)];
1985 RangeCoder pc, ic;
1986 uint8_t *pbbak= s->c.bytestream;
1987 uint8_t *pbbak_start= s->c.bytestream_start;
1988 int score, score2, iscore, i_len, p_len, block_s, sum;
1989 const int w= s->b_width << s->block_max_depth;
1990 const int h= s->b_height << s->block_max_depth;
1991 const int rem_depth= s->block_max_depth - level;
1992 const int index= (x + y*w) << rem_depth;
1993 const int block_w= 1<<(LOG2_MB_SIZE - level);
1994 int trx= (x+1)<<rem_depth;
1995 int try= (y+1)<<rem_depth;
1996 BlockNode *left = x ? &s->block[index-1] : &null_block;
1997 BlockNode *top = y ? &s->block[index-w] : &null_block;
1998 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1999 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2000 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2001 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2002 int pl = left->color[0];
2003 int pcb= left->color[1];
2004 int pcr= left->color[2];
2005 int pmx, pmy;
2006 int mx=0, my=0;
2007 int l,cr,cb;
2008 const int stride= s->current_picture.linesize[0];
2009 const int uvstride= s->current_picture.linesize[1];
2010 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2011 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2012 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2013 int P[10][2];
2014 int16_t last_mv[3][2];
2015 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2016 const int shift= 1+qpel;
2017 MotionEstContext *c= &s->m.me;
2018 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2019 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2020 int my_context= av_log2(2*FFABS(left->my - top->my));
2021 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2022 int ref, best_ref, ref_score, ref_mx, ref_my;
2023
2024 assert(sizeof(s->block_state) >= 256);
2025 if(s->keyframe){
2026 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2027 return 0;
2028 }
2029
2030 // clip predictors / edge ?
2031
2032 P_LEFT[0]= left->mx;
2033 P_LEFT[1]= left->my;
2034 P_TOP [0]= top->mx;
2035 P_TOP [1]= top->my;
2036 P_TOPRIGHT[0]= tr->mx;
2037 P_TOPRIGHT[1]= tr->my;
2038
2039 last_mv[0][0]= s->block[index].mx;
2040 last_mv[0][1]= s->block[index].my;
2041 last_mv[1][0]= right->mx;
2042 last_mv[1][1]= right->my;
2043 last_mv[2][0]= bottom->mx;
2044 last_mv[2][1]= bottom->my;
2045
2046 s->m.mb_stride=2;
2047 s->m.mb_x=
2048 s->m.mb_y= 0;
2049 s->m.me.skip= 0;
2050
2051 assert(s->m.me. stride == stride);
2052 assert(s->m.me.uvstride == uvstride);
2053
2054 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2055 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2056 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2057 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2058
2059 c->xmin = - x*block_w - 16+2;
2060 c->ymin = -