various security fixes and precautionary checks
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 #include "avcodec.h"
20 #include "common.h"
21 #include "dsputil.h"
22
23 #include "rangecoder.h"
24 #define MID_STATE 128
25
26 #include "mpegvideo.h"
27
28 #undef NDEBUG
29 #include <assert.h>
30
31 #define MAX_DECOMPOSITIONS 8
32 #define MAX_PLANES 4
33 #define DWTELEM int
34 #define QROOT 8
35 #define LOSSLESS_QLOG -128
36 #define FRAC_BITS 8
37
38 static const int8_t quant3[256]={
39 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
55 };
56 static const int8_t quant3b[256]={
57 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73 };
74 static const int8_t quant5[256]={
75 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
88 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
89 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
90 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
91 };
92 static const int8_t quant7[256]={
93 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
99 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
100 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
101 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
104 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
105 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
106 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
107 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
109 };
110 static const int8_t quant9[256]={
111 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
117 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
118 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
119 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
123 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
124 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
125 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
126 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
127 };
128 static const int8_t quant11[256]={
129 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
135 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
136 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
137 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
140 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
141 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
142 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
143 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
145 };
146 static const int8_t quant13[256]={
147 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
148 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
153 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
154 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
155 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
157 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
158 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
159 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
160 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
162 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
163 };
164
165 #define LOG2_OBMC_MAX 6
166 #define OBMC_MAX (1<<(LOG2_OBMC_MAX))
167 #if 0 //64*cubic
168 static const uint8_t obmc32[1024]={
169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
171 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
172 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
173 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
174 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
175 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
176 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
177 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
178 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
179 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
180 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
181 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
182 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
184 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
185 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
186 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
187 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
188 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
192 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
193 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
196 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
197 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
198 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
199 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 //error:0.000022
202 };
203 static const uint8_t obmc16[256]={
204 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
205 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
206 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
207 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
208 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
209 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
211 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
212 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
213 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
214 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
215 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
219 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
220 //error:0.000033
221 };
222 #elif 1 // 64*linear
223 static const uint8_t obmc32[1024]={
224 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
225 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
226 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
227 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
228 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
229 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
230 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
231 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
232 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
233 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
234 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
235 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
236 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
237 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
239 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
240 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
241 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
242 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
243 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
244 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
245 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
246 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
247 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
248 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
251 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
252 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
253 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
254 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
255 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
256 //error:0.000020
257 };
258 static const uint8_t obmc16[256]={
259 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
260 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
261 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
262 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
263 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
264 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
266 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
267 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
268 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
269 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
270 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
271 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
272 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
273 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
274 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
275 //error:0.000015
276 };
277 #else //64*cos
278 static const uint8_t obmc32[1024]={
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
282 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
283 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
284 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
285 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
286 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
287 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
288 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
289 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
290 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
291 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
292 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
294 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
295 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
296 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
297 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
298 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
302 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
303 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
306 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
307 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
308 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
309 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311 //error:0.000022
312 };
313 static const uint8_t obmc16[256]={
314 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
315 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
316 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
317 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
318 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
319 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
321 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
322 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
323 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
324 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
325 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
329 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
330 //error:0.000022
331 };
332 #endif
333
334 //linear *64
335 static const uint8_t obmc8[64]={
336 1, 3, 5, 7, 7, 5, 3, 1,
337 3, 9,15,21,21,15, 9, 3,
338 5,15,25,35,35,25,15, 5,
339 7,21,35,49,49,35,21, 7,
340 7,21,35,49,49,35,21, 7,
341 5,15,25,35,35,25,15, 5,
342 3, 9,15,21,21,15, 9, 3,
343 1, 3, 5, 7, 7, 5, 3, 1,
344 //error:0.000000
345 };
346
347 //linear *64
348 static const uint8_t obmc4[16]={
349 4,12,12, 4,
350 12,36,36,12,
351 12,36,36,12,
352 4,12,12, 4,
353 //error:0.000000
354 };
355
356 static const uint8_t *obmc_tab[4]={
357 obmc32, obmc16, obmc8, obmc4
358 };
359
360 typedef struct BlockNode{
361 int16_t mx;
362 int16_t my;
363 uint8_t color[3];
364 uint8_t type;
365 //#define TYPE_SPLIT 1
366 #define BLOCK_INTRA 1
367 //#define TYPE_NOCOLOR 4
368 uint8_t level; //FIXME merge into type?
369 }BlockNode;
370
371 #define LOG2_MB_SIZE 4
372 #define MB_SIZE (1<<LOG2_MB_SIZE)
373
374 typedef struct SubBand{
375 int level;
376 int stride;
377 int width;
378 int height;
379 int qlog; ///< log(qscale)/log[2^(1/6)]
380 DWTELEM *buf;
381 int16_t *x;
382 DWTELEM *coeff;
383 struct SubBand *parent;
384 uint8_t state[/*7*2*/ 7 + 512][32];
385 }SubBand;
386
387 typedef struct Plane{
388 int width;
389 int height;
390 SubBand band[MAX_DECOMPOSITIONS][4];
391 }Plane;
392
393 typedef struct SnowContext{
394 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
395
396 AVCodecContext *avctx;
397 RangeCoder c;
398 DSPContext dsp;
399 AVFrame input_picture;
400 AVFrame current_picture;
401 AVFrame last_picture;
402 AVFrame mconly_picture;
403 // uint8_t q_context[16];
404 uint8_t header_state[32];
405 uint8_t block_state[128 + 32*128];
406 int keyframe;
407 int always_reset;
408 int version;
409 int spatial_decomposition_type;
410 int temporal_decomposition_type;
411 int spatial_decomposition_count;
412 int temporal_decomposition_count;
413 DWTELEM *spatial_dwt_buffer;
414 int colorspace_type;
415 int chroma_h_shift;
416 int chroma_v_shift;
417 int spatial_scalability;
418 int qlog;
419 int lambda;
420 int lambda2;
421 int mv_scale;
422 int qbias;
423 #define QBIAS_SHIFT 3
424 int b_width;
425 int b_height;
426 int block_max_depth;
427 Plane plane[MAX_PLANES];
428 BlockNode *block;
429
430 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
431 }SnowContext;
432
433 #ifdef __sgi
434 // Avoid a name clash on SGI IRIX
435 #undef qexp
436 #endif
437 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
438 static const uint8_t qexp[8]={
439 128, 140, 152, 166, 181, 197, 215, 235
440 // 64, 70, 76, 83, 91, 99, 108, 117
441 // 32, 35, 38, 41, 45, 49, 54, 59
442 // 16, 17, 19, 21, 23, 25, 27, 29
443 // 8, 9, 10, 10, 11, 12, 13, 15
444 };
445
446 static inline int mirror(int v, int m){
447 if (v<0) return -v;
448 else if(v>m) return 2*m-v;
449 else return v;
450 }
451
452 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
453 int i;
454
455 if(v){
456 const int a= ABS(v);
457 const int e= av_log2(a);
458 #if 1
459 const int el= FFMIN(e, 10);
460 put_rac(c, state+0, 0);
461
462 for(i=0; i<el; i++){
463 put_rac(c, state+1+i, 1); //1..10
464 }
465 for(; i<e; i++){
466 put_rac(c, state+1+9, 1); //1..10
467 }
468 put_rac(c, state+1+FFMIN(i,9), 0);
469
470 for(i=e-1; i>=el; i--){
471 put_rac(c, state+22+9, (a>>i)&1); //22..31
472 }
473 for(; i>=0; i--){
474 put_rac(c, state+22+i, (a>>i)&1); //22..31
475 }
476
477 if(is_signed)
478 put_rac(c, state+11 + el, v < 0); //11..21
479 #else
480
481 put_rac(c, state+0, 0);
482 if(e<=9){
483 for(i=0; i<e; i++){
484 put_rac(c, state+1+i, 1); //1..10
485 }
486 put_rac(c, state+1+i, 0);
487
488 for(i=e-1; i>=0; i--){
489 put_rac(c, state+22+i, (a>>i)&1); //22..31
490 }
491
492 if(is_signed)
493 put_rac(c, state+11 + e, v < 0); //11..21
494 }else{
495 for(i=0; i<e; i++){
496 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
497 }
498 put_rac(c, state+1+FFMIN(i,9), 0);
499
500 for(i=e-1; i>=0; i--){
501 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
502 }
503
504 if(is_signed)
505 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
506 }
507 #endif
508 }else{
509 put_rac(c, state+0, 1);
510 }
511 }
512
513 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
514 if(get_rac(c, state+0))
515 return 0;
516 else{
517 int i, e, a;
518 e= 0;
519 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
520 e++;
521 }
522
523 a= 1;
524 for(i=e-1; i>=0; i--){
525 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
526 }
527
528 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
529 return -a;
530 else
531 return a;
532 }
533 }
534
535 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
536 int i;
537 int r= log2>=0 ? 1<<log2 : 1;
538
539 assert(v>=0);
540 assert(log2>=-4);
541
542 while(v >= r){
543 put_rac(c, state+4+log2, 1);
544 v -= r;
545 log2++;
546 if(log2>0) r+=r;
547 }
548 put_rac(c, state+4+log2, 0);
549
550 for(i=log2-1; i>=0; i--){
551 put_rac(c, state+31-i, (v>>i)&1);
552 }
553 }
554
555 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
556 int i;
557 int r= log2>=0 ? 1<<log2 : 1;
558 int v=0;
559
560 assert(log2>=-4);
561
562 while(get_rac(c, state+4+log2)){
563 v+= r;
564 log2++;
565 if(log2>0) r+=r;
566 }
567
568 for(i=log2-1; i>=0; i--){
569 v+= get_rac(c, state+31-i)<<i;
570 }
571
572 return v;
573 }
574
575 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
576 const int mirror_left= !highpass;
577 const int mirror_right= (width&1) ^ highpass;
578 const int w= (width>>1) - 1 + (highpass & width);
579 int i;
580
581 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
582 if(mirror_left){
583 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
584 dst += dst_step;
585 src += src_step;
586 }
587
588 for(i=0; i<w; i++){
589 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
590 }
591
592 if(mirror_right){
593 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
594 }
595 }
596
597 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
598 const int mirror_left= !highpass;
599 const int mirror_right= (width&1) ^ highpass;
600 const int w= (width>>1) - 1 + (highpass & width);
601 int i;
602
603 if(mirror_left){
604 int r= 3*2*ref[0];
605 r += r>>4;
606 r += r>>8;
607 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
608 dst += dst_step;
609 src += src_step;
610 }
611
612 for(i=0; i<w; i++){
613 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
614 r += r>>4;
615 r += r>>8;
616 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
617 }
618
619 if(mirror_right){
620 int r= 3*2*ref[w*ref_step];
621 r += r>>4;
622 r += r>>8;
623 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
624 }
625 }
626
627
628 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
629 int x, i;
630
631 for(x=start; x<width; x+=2){
632 int64_t sum=0;
633
634 for(i=0; i<n; i++){
635 int x2= x + 2*i - n + 1;
636 if (x2< 0) x2= -x2;
637 else if(x2>=width) x2= 2*width-x2-2;
638 sum += coeffs[i]*(int64_t)dst[x2];
639 }
640 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
641 else dst[x] += (sum + (1<<shift)/2)>>shift;
642 }
643 }
644
645 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
646 int x, y, i;
647 for(y=start; y<height; y+=2){
648 for(x=0; x<width; x++){
649 int64_t sum=0;
650
651 for(i=0; i<n; i++){
652 int y2= y + 2*i - n + 1;
653 if (y2< 0) y2= -y2;
654 else if(y2>=height) y2= 2*height-y2-2;
655 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
656 }
657 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
658 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
659 }
660 }
661 }
662
663 #define SCALEX 1
664 #define LX0 0
665 #define LX1 1
666
667 #if 0 // more accurate 9/7
668 #define N1 2
669 #define SHIFT1 14
670 #define COEFFS1 (int[]){-25987,-25987}
671 #define N2 2
672 #define SHIFT2 19
673 #define COEFFS2 (int[]){-27777,-27777}
674 #define N3 2
675 #define SHIFT3 15
676 #define COEFFS3 (int[]){28931,28931}
677 #define N4 2
678 #define SHIFT4 15
679 #define COEFFS4 (int[]){14533,14533}
680 #elif 1 // 13/7 CRF
681 #define N1 4
682 #define SHIFT1 4
683 #define COEFFS1 (int[]){1,-9,-9,1}
684 #define N2 4
685 #define SHIFT2 4
686 #define COEFFS2 (int[]){-1,5,5,-1}
687 #define N3 0
688 #define SHIFT3 1
689 #define COEFFS3 NULL
690 #define N4 0
691 #define SHIFT4 1
692 #define COEFFS4 NULL
693 #elif 1 // 3/5
694 #define LX0 1
695 #define LX1 0
696 #define SCALEX 0.5
697 #define N1 2
698 #define SHIFT1 1
699 #define COEFFS1 (int[]){1,1}
700 #define N2 2
701 #define SHIFT2 2
702 #define COEFFS2 (int[]){-1,-1}
703 #define N3 0
704 #define SHIFT3 0
705 #define COEFFS3 NULL
706 #define N4 0
707 #define SHIFT4 0
708 #define COEFFS4 NULL
709 #elif 1 // 11/5
710 #define N1 0
711 #define SHIFT1 1
712 #define COEFFS1 NULL
713 #define N2 2
714 #define SHIFT2 2
715 #define COEFFS2 (int[]){-1,-1}
716 #define N3 2
717 #define SHIFT3 0
718 #define COEFFS3 (int[]){-1,-1}
719 #define N4 4
720 #define SHIFT4 7
721 #define COEFFS4 (int[]){-5,29,29,-5}
722 #define SCALEX 4
723 #elif 1 // 9/7 CDF
724 #define N1 2
725 #define SHIFT1 7
726 #define COEFFS1 (int[]){-203,-203}
727 #define N2 2
728 #define SHIFT2 12
729 #define COEFFS2 (int[]){-217,-217}
730 #define N3 2
731 #define SHIFT3 7
732 #define COEFFS3 (int[]){113,113}
733 #define N4 2
734 #define SHIFT4 9
735 #define COEFFS4 (int[]){227,227}
736 #define SCALEX 1
737 #elif 1 // 7/5 CDF
738 #define N1 0
739 #define SHIFT1 1
740 #define COEFFS1 NULL
741 #define N2 2
742 #define SHIFT2 2
743 #define COEFFS2 (int[]){-1,-1}
744 #define N3 2
745 #define SHIFT3 0
746 #define COEFFS3 (int[]){-1,-1}
747 #define N4 2
748 #define SHIFT4 4
749 #define COEFFS4 (int[]){3,3}
750 #elif 1 // 9/7 MN
751 #define N1 4
752 #define SHIFT1 4
753 #define COEFFS1 (int[]){1,-9,-9,1}
754 #define N2 2
755 #define SHIFT2 2
756 #define COEFFS2 (int[]){1,1}
757 #define N3 0
758 #define SHIFT3 1
759 #define COEFFS3 NULL
760 #define N4 0
761 #define SHIFT4 1
762 #define COEFFS4 NULL
763 #else // 13/7 CRF
764 #define N1 4
765 #define SHIFT1 4
766 #define COEFFS1 (int[]){1,-9,-9,1}
767 #define N2 4
768 #define SHIFT2 4
769 #define COEFFS2 (int[]){-1,5,5,-1}
770 #define N3 0
771 #define SHIFT3 1
772 #define COEFFS3 NULL
773 #define N4 0
774 #define SHIFT4 1
775 #define COEFFS4 NULL
776 #endif
777 static void horizontal_decomposeX(DWTELEM *b, int width){
778 DWTELEM temp[width];
779 const int width2= width>>1;
780 const int w2= (width+1)>>1;
781 int A1,A2,A3,A4, x;
782
783 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
784 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
785 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
786 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
787
788 for(x=0; x<width2; x++){
789 temp[x ]= b[2*x ];
790 temp[x+w2]= b[2*x + 1];
791 }
792 if(width&1)
793 temp[x ]= b[2*x ];
794 memcpy(b, temp, width*sizeof(int));
795 }
796
797 static void horizontal_composeX(DWTELEM *b, int width){
798 DWTELEM temp[width];
799 const int width2= width>>1;
800 int A1,A2,A3,A4, x;
801 const int w2= (width+1)>>1;
802
803 memcpy(temp, b, width*sizeof(int));
804 for(x=0; x<width2; x++){
805 b[2*x ]= temp[x ];
806 b[2*x + 1]= temp[x+w2];
807 }
808 if(width&1)
809 b[2*x ]= temp[x ];
810
811 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
812 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
813 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
814 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
815 }
816
817 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
818 int x, y;
819
820 for(y=0; y<height; y++){
821 for(x=0; x<width; x++){
822 buffer[y*stride + x] *= SCALEX;
823 }
824 }
825
826 for(y=0; y<height; y++){
827 horizontal_decomposeX(buffer + y*stride, width);
828 }
829
830 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
831 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
832 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
833 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
834 }
835
836 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
837 int x, y;
838
839 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
840 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
841 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
842 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
843
844 for(y=0; y<height; y++){
845 horizontal_composeX(buffer + y*stride, width);
846 }
847
848 for(y=0; y<height; y++){
849 for(x=0; x<width; x++){
850 buffer[y*stride + x] /= SCALEX;
851 }
852 }
853 }
854
855 static void horizontal_decompose53i(DWTELEM *b, int width){
856 DWTELEM temp[width];
857 const int width2= width>>1;
858 int A1,A2,A3,A4, x;
859 const int w2= (width+1)>>1;
860
861 for(x=0; x<width2; x++){
862 temp[x ]= b[2*x ];
863 temp[x+w2]= b[2*x + 1];
864 }
865 if(width&1)
866 temp[x ]= b[2*x ];
867 #if 0
868 A2= temp[1 ];
869 A4= temp[0 ];
870 A1= temp[0+width2];
871 A1 -= (A2 + A4)>>1;
872 A4 += (A1 + 1)>>1;
873 b[0+width2] = A1;
874 b[0 ] = A4;
875 for(x=1; x+1<width2; x+=2){
876 A3= temp[x+width2];
877 A4= temp[x+1 ];
878 A3 -= (A2 + A4)>>1;
879 A2 += (A1 + A3 + 2)>>2;
880 b[x+width2] = A3;
881 b[x ] = A2;
882
883 A1= temp[x+1+width2];
884 A2= temp[x+2 ];
885 A1 -= (A2 + A4)>>1;
886 A4 += (A1 + A3 + 2)>>2;
887 b[x+1+width2] = A1;
888 b[x+1 ] = A4;
889 }
890 A3= temp[width-1];
891 A3 -= A2;
892 A2 += (A1 + A3 + 2)>>2;
893 b[width -1] = A3;
894 b[width2-1] = A2;
895 #else
896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
898 #endif
899 }
900
901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
902 int i;
903
904 for(i=0; i<width; i++){
905 b1[i] -= (b0[i] + b2[i])>>1;
906 }
907 }
908
909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
910 int i;
911
912 for(i=0; i<width; i++){
913 b1[i] += (b0[i] + b2[i] + 2)>>2;
914 }
915 }
916
917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
918 int y;
919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
921
922 for(y=-2; y<height; y+=2){
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
925
926 {START_TIMER
927 if(b1 <= b3) horizontal_decompose53i(b2, width);
928 if(y+2 < height) horizontal_decompose53i(b3, width);
929 STOP_TIMER("horizontal_decompose53i")}
930
931 {START_TIMER
932 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
933 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
934 STOP_TIMER("vertical_decompose53i*")}
935
936 b0=b2;
937 b1=b3;
938 }
939 }
940
941 #define lift5 lift
942 #if 1
943 #define W_AM 3
944 #define W_AO 0
945 #define W_AS 1
946
947 #define W_BM 1
948 #define W_BO 8
949 #define W_BS 4
950
951 #undef lift5
952 #define W_CM 9999
953 #define W_CO 2
954 #define W_CS 2
955
956 #define W_DM 15
957 #define W_DO 16
958 #define W_DS 5
959 #elif 0
960 #define W_AM 55
961 #define W_AO 16
962 #define W_AS 5
963
964 #define W_BM 3
965 #define W_BO 32
966 #define W_BS 6
967
968 #define W_CM 127
969 #define W_CO 64
970 #define W_CS 7
971
972 #define W_DM 7
973 #define W_DO 8
974 #define W_DS 4
975 #elif 0
976 #define W_AM 97
977 #define W_AO 32
978 #define W_AS 6
979
980 #define W_BM 63
981 #define W_BO 512
982 #define W_BS 10
983
984 #define W_CM 13
985 #define W_CO 8
986 #define W_CS 4
987
988 #define W_DM 15
989 #define W_DO 16
990 #define W_DS 5
991
992 #else
993
994 #define W_AM 203
995 #define W_AO 64
996 #define W_AS 7
997
998 #define W_BM 217
999 #define W_BO 2048
1000 #define W_BS 12
1001
1002 #define W_CM 113
1003 #define W_CO 64
1004 #define W_CS 7
1005
1006 #define W_DM 227
1007 #define W_DO 128
1008 #define W_DS 9
1009 #endif
1010 static void horizontal_decompose97i(DWTELEM *b, int width){
1011 DWTELEM temp[width];
1012 const int w2= (width+1)>>1;
1013
1014 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1015 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1016 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1017 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1018 }
1019
1020
1021 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1022 int i;
1023
1024 for(i=0; i<width; i++){
1025 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1026 }
1027 }
1028
1029 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1030 int i;
1031
1032 for(i=0; i<width; i++){
1033 #ifdef lift5
1034 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1035 #else
1036 int r= 3*(b0[i] + b2[i]);
1037 r+= r>>4;
1038 r+= r>>8;
1039 b1[i] += (r+W_CO)>>W_CS;
1040 #endif
1041 }
1042 }
1043
1044 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1045 int i;
1046
1047 for(i=0; i<width; i++){
1048 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1049 }
1050 }
1051
1052 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1053 int i;
1054
1055 for(i=0; i<width; i++){
1056 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1057 }
1058 }
1059
1060 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1061 int y;
1062 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1063 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1064 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1065 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1066
1067 for(y=-4; y<height; y+=2){
1068 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1069 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1070
1071 {START_TIMER
1072 if(b3 <= b5) horizontal_decompose97i(b4, width);
1073 if(y+4 < height) horizontal_decompose97i(b5, width);
1074 if(width>400){
1075 STOP_TIMER("horizontal_decompose97i")
1076 }}
1077
1078 {START_TIMER
1079 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1080 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1081 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1082 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1083
1084 if(width>400){
1085 STOP_TIMER("vertical_decompose97i")
1086 }}
1087
1088 b0=b2;
1089 b1=b3;
1090 b2=b4;
1091 b3=b5;
1092 }
1093 }
1094
1095 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1096 int level;
1097
1098 for(level=0; level<decomposition_count; level++){
1099 switch(type){
1100 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1101 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1102 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1103 }
1104 }
1105 }
1106
1107 static void horizontal_compose53i(DWTELEM *b, int width){
1108 DWTELEM temp[width];
1109 const int width2= width>>1;
1110 const int w2= (width+1)>>1;
1111 int A1,A2,A3,A4, x;
1112
1113 #if 0
1114 A2= temp[1 ];
1115 A4= temp[0 ];
1116 A1= temp[0+width2];
1117 A1 -= (A2 + A4)>>1;
1118 A4 += (A1 + 1)>>1;
1119 b[0+width2] = A1;
1120 b[0 ] = A4;
1121 for(x=1; x+1<width2; x+=2){
1122 A3= temp[x+width2];
1123 A4= temp[x+1 ];
1124 A3 -= (A2 + A4)>>1;
1125 A2 += (A1 + A3 + 2)>>2;
1126 b[x+width2] = A3;
1127 b[x ] = A2;
1128
1129 A1= temp[x+1+width2];
1130 A2= temp[x+2 ];
1131 A1 -= (A2 + A4)>>1;
1132 A4 += (A1 + A3 + 2)>>2;
1133 b[x+1+width2] = A1;
1134 b[x+1 ] = A4;
1135 }
1136 A3= temp[width-1];
1137 A3 -= A2;
1138 A2 += (A1 + A3 + 2)>>2;
1139 b[width -1] = A3;
1140 b[width2-1] = A2;
1141 #else
1142 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1143 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1144 #endif
1145 for(x=0; x<width2; x++){
1146 b[2*x ]= temp[x ];
1147 b[2*x + 1]= temp[x+w2];
1148 }
1149 if(width&1)
1150 b[2*x ]= temp[x ];
1151 }
1152
1153 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1154 int i;
1155
1156 for(i=0; i<width; i++){
1157 b1[i] += (b0[i] + b2[i])>>1;
1158 }
1159 }
1160
1161 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1162 int i;
1163
1164 for(i=0; i<width; i++){
1165 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1166 }
1167 }
1168
1169 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1170 int y;
1171 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1172 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride;
1173
1174 for(y=-1; y<=height; y+=2){
1175 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1176 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1177
1178 {START_TIMER
1179 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1180 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1181 STOP_TIMER("vertical_compose53i*")}
1182
1183 {START_TIMER
1184 if(y-1 >= 0) horizontal_compose53i(b0, width);
1185 if(b0 <= b2) horizontal_compose53i(b1, width);
1186 STOP_TIMER("horizontal_compose53i")}
1187
1188 b0=b2;
1189 b1=b3;
1190 }
1191 }
1192
1193
1194 static void horizontal_compose97i(DWTELEM *b, int width){
1195 DWTELEM temp[width];
1196 const int w2= (width+1)>>1;
1197
1198 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1199 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1200 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1201 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1202 }
1203
1204 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1205 int i;
1206
1207 for(i=0; i<width; i++){
1208 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1209 }
1210 }
1211
1212 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1213 int i;
1214
1215 for(i=0; i<width; i++){
1216 #ifdef lift5
1217 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1218 #else
1219 int r= 3*(b0[i] + b2[i]);
1220 r+= r>>4;
1221 r+= r>>8;
1222 b1[i] -= (r+W_CO)>>W_CS;
1223 #endif
1224 }
1225 }
1226
1227 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1228 int i;
1229
1230 for(i=0; i<width; i++){
1231 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1232 }
1233 }
1234
1235 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1236 int i;
1237
1238 for(i=0; i<width; i++){
1239 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1240 }
1241 }
1242
1243 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1244 int y;
1245 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1246 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride;
1247 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1248 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1249
1250 for(y=-3; y<=height; y+=2){
1251 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1252 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1253
1254 if(stride == width && y+4 < height && 0){
1255 int x;
1256 for(x=0; x<width/2; x++)
1257 b5[x] += 64*2;
1258 for(; x<width; x++)
1259 b5[x] += 169*2;
1260 }
1261
1262 {START_TIMER
1263 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1264 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1265 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1266 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1267 if(width>400){
1268 STOP_TIMER("vertical_compose97i")}}
1269
1270 {START_TIMER
1271 if(y-1>= 0) horizontal_compose97i(b0, width);
1272 if(b0 <= b2) horizontal_compose97i(b1, width);
1273 if(width>400 && b0 <= b2){
1274 STOP_TIMER("horizontal_compose97i")}}
1275
1276 b0=b2;
1277 b1=b3;
1278 b2=b4;
1279 b3=b5;
1280 }
1281 }
1282
1283 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1284 int level;
1285
1286 for(level=decomposition_count-1; level>=0; level--){
1287 switch(type){
1288 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1289 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1290 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break;
1291 }
1292 }
1293 }
1294
1295 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1296 const int w= b->width;
1297 const int h= b->height;
1298 int x, y;
1299
1300 if(1){
1301 int run=0;
1302 int runs[w*h];
1303 int run_index=0;
1304
1305 for(y=0; y<h; y++){
1306 for(x=0; x<w; x++){
1307 int v, p=0;
1308 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1309 v= src[x + y*stride];
1310
1311 if(y){
1312 t= src[x + (y-1)*stride];
1313 if(x){
1314 lt= src[x - 1 + (y-1)*stride];
1315 }
1316 if(x + 1 < w){
1317 rt= src[x + 1 + (y-1)*stride];
1318 }
1319 }
1320 if(x){
1321 l= src[x - 1 + y*stride];
1322 /*if(x > 1){
1323 if(orientation==1) ll= src[y + (x-2)*stride];
1324 else ll= src[x - 2 + y*stride];
1325 }*/
1326 }
1327 if(parent){
1328 int px= x>>1;
1329 int py= y>>1;
1330 if(px<b->parent->width && py<b->parent->height)
1331 p= parent[px + py*2*stride];
1332 }
1333 if(!(/*ll|*/l|lt|t|rt|p)){
1334 if(v){
1335 runs[run_index++]= run;
1336 run=0;
1337 }else{
1338 run++;
1339 }
1340 }
1341 }
1342 }
1343 runs[run_index++]= run;
1344 run_index=0;
1345 run= runs[run_index++];
1346
1347 put_symbol2(&s->c, b->state[1], run, 3);
1348
1349 for(y=0; y<h; y++){
1350 if(&s->c.bytestream_end - &s->c.bytestream < w*40){
1351 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1352 return -1;
1353 }
1354 for(x=0; x<w; x++){
1355 int v, p=0;
1356 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1357 v= src[x + y*stride];
1358
1359 if(y){
1360 t= src[x + (y-1)*stride];
1361 if(x){
1362 lt= src[x - 1 + (y-1)*stride];
1363 }
1364 if(x + 1 < w){
1365 rt= src[x + 1 + (y-1)*stride];
1366 }
1367 }
1368 if(x){
1369 l= src[x - 1 + y*stride];
1370 /*if(x > 1){
1371 if(orientation==1) ll= src[y + (x-2)*stride];
1372 else ll= src[x - 2 + y*stride];
1373 }*/
1374 }
1375 if(parent){
1376 int px= x>>1;
1377 int py= y>>1;
1378 if(px<b->parent->width && py<b->parent->height)
1379 p= parent[px + py*2*stride];
1380 }
1381 if(/*ll|*/l|lt|t|rt|p){
1382 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1383
1384 put_rac(&s->c, &b->state[0][context], !!v);
1385 }else{
1386 if(!run){
1387 run= runs[run_index++];
1388
1389 put_symbol2(&s->c, b->state[1], run, 3);
1390 assert(v);
1391 }else{
1392 run--;
1393 assert(!v);
1394 }
1395 }
1396 if(v){
1397 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1398
1399 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1400 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1401 }
1402 }
1403 }
1404 }
1405 return 0;
1406 }
1407
1408 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1409 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1410 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1411 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1412 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1413 }
1414
1415 static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1416 const int w= b->width;
1417 const int h= b->height;
1418 int x,y;
1419 const int qlog= clip(s->qlog + b->qlog, 0, 128);
1420 int qmul= qexp[qlog&7]<<(qlog>>3);
1421 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1422
1423 START_TIMER
1424
1425 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1426 qadd= 0;
1427 qmul= 1<<QEXPSHIFT;
1428 }
1429
1430 if(1){
1431 int run;
1432 int index=0;
1433 int prev_index=-1;
1434 int prev2_index=0;
1435 int parent_index= 0;
1436 int prev_parent_index= 0;
1437
1438 for(y=0; y<b->height; y++)
1439 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1440
1441 run= get_symbol2(&s->c, b->state[1], 3);
1442 for(y=0; y<h; y++){
1443 int v=0;
1444 int lt=0, t=0, rt=0;
1445
1446 if(y && b->x[prev_index] == 0){
1447 rt= b->coeff[prev_index];
1448 }
1449 for(x=0; x<w; x++){
1450 int p=0;
1451 const int l= v;
1452
1453 lt= t; t= rt;
1454
1455 if(y){
1456 if(b->x[prev_index] <= x)
1457 prev_index++;
1458 if(b->x[prev_index] == x + 1)
1459 rt= b->coeff[prev_index];
1460 else
1461 rt=0;
1462 }
1463 if(parent){
1464 if(x>>1 > b->parent->x[parent_index]){
1465 parent_index++;
1466 }
1467 if(x>>1 == b->parent->x[parent_index]){
1468 p= b->parent->coeff[parent_index];
1469 }
1470 }
1471 if(/*ll|*/l|lt|t|rt|p){
1472 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1473
1474 v=get_rac(&s->c, &b->state[0][context]);
1475 }else{
1476 if(!run){
1477 run= get_symbol2(&s->c, b->state[1], 3);
1478 v=1;
1479 }else{
1480 run--;
1481 v=0;
1482
1483 if(y && parent){
1484 int max_run;
1485
1486 max_run= FFMIN(run, b->x[prev_index] - x - 2);
1487 max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1488 x+= max_run;
1489 run-= max_run;
1490 }
1491 }
1492 }
1493 if(v){
1494 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1495 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1496 if(get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1497 src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
1498 v= -v;
1499 }else{
1500 src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1501 }
1502 b->x[index]=x; //FIXME interleave x/coeff
1503 b->coeff[index++]= v;
1504 }
1505 }
1506 b->x[index++]= w+1; //end marker
1507 prev_index= prev2_index;
1508 prev2_index= index;
1509
1510 if(parent){
1511 while(b->parent->x[parent_index] != b->parent->width+1)
1512 parent_index++;
1513 parent_index++;
1514 if(y&1){
1515 prev_parent_index= parent_index;
1516 }else{
1517 parent_index= prev_parent_index;
1518 }
1519 }
1520 }
1521 b->x[index++]= w+1; //end marker
1522 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
1523 STOP_TIMER("decode_subband")
1524 }
1525
1526 return;
1527 }
1528 }
1529
1530 static void reset_contexts(SnowContext *s){
1531 int plane_index, level, orientation;
1532
1533 for(plane_index=0; plane_index<3; plane_index++){
1534 for(level=0; level<s->spatial_decomposition_count; level++){
1535 for(orientation=level ? 1:0; orientation<4; orientation++){
1536 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1537 }
1538 }
1539 }
1540 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1541 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1542 }
1543
1544 static int alloc_blocks(SnowContext *s){
1545 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1546 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1547
1548 s->b_width = w;
1549 s->b_height= h;
1550
1551 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1552 return 0;
1553 }
1554
1555 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1556 uint8_t *bytestream= d->bytestream;
1557 uint8_t *bytestream_start= d->bytestream_start;
1558 *d= *s;
1559 d->bytestream= bytestream;
1560 d->bytestream_start= bytestream_start;
1561 }
1562
1563 //near copy & paste from dsputil, FIXME
1564 static int pix_sum(uint8_t * pix, int line_size, int w)
1565 {
1566 int s, i, j;
1567
1568 s = 0;
1569 for (i = 0; i < w; i++) {
1570 for (j = 0; j < w; j++) {
1571 s += pix[0];
1572 pix ++;
1573 }
1574 pix += line_size - w;
1575 }
1576 return s;
1577 }
1578
1579 //near copy & paste from dsputil, FIXME
1580 static int pix_norm1(uint8_t * pix, int line_size, int w)
1581 {
1582 int s, i, j;
1583 uint32_t *sq = squareTbl + 256;
1584
1585 s = 0;
1586 for (i = 0; i < w; i++) {
1587 for (j = 0; j < w; j ++) {
1588 s += sq[pix[0]];
1589 pix ++;
1590 }
1591 pix += line_size - w;
1592 }
1593 return s;
1594 }
1595
1596 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1597 const int w= s->b_width << s->block_max_depth;
1598 const int rem_depth= s->block_max_depth - level;
1599 const int index= (x + y*w) << rem_depth;
1600 const int block_w= 1<<rem_depth;
1601 BlockNode block;
1602 int i,j;
1603
1604 block.color[0]= l;
1605 block.color[1]= cb;
1606 block.color[2]= cr;
1607 block.mx= mx;
1608 block.my= my;
1609 block.type= type;
1610 block.level= level;
1611
1612 for(j=0; j<block_w; j++){
1613 for(i=0; i<block_w; i++){
1614 s->block[index + i + j*w]= block;
1615 }
1616 }
1617 }
1618
1619 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1620 const int offset[3]= {
1621 y*c-> stride + x,
1622 ((y*c->uvstride + x)>>1),
1623 ((y*c->uvstride + x)>>1),
1624 };
1625 int i;
1626 for(i=0; i<3; i++){
1627 c->src[0][i]= src [i];
1628 c->ref[0][i]= ref [i] + offset[i];
1629 }
1630 assert(!ref_index);
1631 }
1632
1633 //FIXME copy&paste
1634 #define P_LEFT P[1]
1635 #define P_TOP P[2]
1636 #define P_TOPRIGHT P[3]
1637 #define P_MEDIAN P[4]
1638 #define P_MV1 P[9]
1639 #define FLAG_QPEL 1 //must be 1
1640
1641 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1642 uint8_t p_buffer[1024];
1643 uint8_t i_buffer[1024];
1644 uint8_t p_state[sizeof(s->block_state)];
1645 uint8_t i_state[sizeof(s->block_state)];
1646 RangeCoder pc, ic;
1647 uint8_t *pbbak= s->c.bytestream;
1648 uint8_t *pbbak_start= s->c.bytestream_start;
1649 int score, score2, iscore, i_len, p_len, block_s, sum;
1650 const int w= s->b_width << s->block_max_depth;
1651 const int h= s->b_height << s->block_max_depth;
1652 const int rem_depth= s->block_max_depth - level;
1653 const int index= (x + y*w) << rem_depth;
1654 const int block_w= 1<<(LOG2_MB_SIZE - level);
1655 static BlockNode null_block= { //FIXME add border maybe
1656 .color= {128,128,128},
1657 .mx= 0,
1658 .my= 0,
1659 .type= 0,
1660 .level= 0,
1661 };
1662 int trx= (x+1)<<rem_depth;
1663 int try= (y+1)<<rem_depth;
1664 BlockNode *left = x ? &s->block[index-1] : &null_block;
1665 BlockNode *top = y ? &s->block[index-w] : &null_block;
1666 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1667 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1668 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1669 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1670 int pl = left->color[0];
1671 int pcb= left->color[1];
1672 int pcr= left->color[2];
1673 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1674 int pmy= mid_pred(left->my, top->my, tr->my);
1675 int mx=0, my=0;
1676 int l,cr,cb, i;
1677 const int stride= s->current_picture.linesize[0];
1678 const int uvstride= s->current_picture.linesize[1];
1679 const int instride= s->input_picture.linesize[0];
1680 const int uvinstride= s->input_picture.linesize[1];
1681 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
1682 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1683 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1684 uint8_t current_mb[3][stride*block_w];
1685 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1686 int P[10][2];
1687 int16_t last_mv[3][2];
1688 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1689 const int shift= 1+qpel;
1690 MotionEstContext *c= &s->m.me;
1691 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1692 int my_context= av_log2(2*ABS(left->my - top->my));
1693 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1694
1695 assert(sizeof(s->block_state) >= 256);
1696 if(s->keyframe){
1697 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1698 return 0;
1699 }
1700
1701 //FIXME optimize
1702 for(i=0; i<block_w; i++)
1703 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
1704 for(i=0; i<block_w>>1; i++)
1705 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1706 for(i=0; i<block_w>>1; i++)
1707 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1708
1709 // clip predictors / edge ?
1710
1711 P_LEFT[0]= left->mx;
1712 P_LEFT[1]= left->my;
1713 P_TOP [0]= top->mx;
1714 P_TOP [1]= top->my;
1715 P_TOPRIGHT[0]= tr->mx;
1716 P_TOPRIGHT[1]= tr->my;
1717
1718 last_mv[0][0]= s->block[index].mx;
1719 last_mv[0][1]= s->block[index].my;
1720 last_mv[1][0]= right->mx;
1721 last_mv[1][1]= right->my;
1722 last_mv[2][0]= bottom->mx;
1723 last_mv[2][1]= bottom->my;
1724
1725 s->m.mb_stride=2;
1726 s->m.mb_x=
1727 s->m.mb_y= 0;
1728 s->m.me.skip= 0;
1729
1730 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1731
1732 assert(s->m.me. stride == stride);
1733 assert(s->m.me.uvstride == uvstride);
1734
1735 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1736 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1737 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1738 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1739
1740 c->xmin = - x*block_w - 16+2;
1741 c->ymin = - y*block_w - 16+2;
1742 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1743 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1744
1745 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1746 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1747 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1748 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1749 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1750 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1751 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1752
1753 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1754 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1755
1756 if (!y) {
1757 c->pred_x= P_LEFT[0];
1758 c->pred_y= P_LEFT[1];
1759 } else {
1760 c->pred_x = P_MEDIAN[0];
1761 c->pred_y = P_MEDIAN[1];
1762 }
1763
1764 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
1765 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1766
1767 assert(mx >= c->xmin);
1768 assert(mx <= c->xmax);
1769 assert(my >= c->ymin);
1770 assert(my <= c->ymax);
1771
1772 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1773 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1774 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1775
1776 // subpel search
1777 pc= s->c;
1778 pc.bytestream_start=
1779 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1780 memcpy(p_state, s->block_state, sizeof(s->block_state));
1781
1782 if(level!=s->block_max_depth)
1783 put_rac(&pc, &p_state[4 + s_context], 1);
1784 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1785 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1786 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1787 p_len= pc.bytestream - pc.bytestream_start;
1788 score += (s->lambda2*(p_len*8
1789 + (pc.outstanding_count - s->c.outstanding_count)*8
1790 + (-av_log2(pc.range) + av_log2(s->c.range))
1791 ))>>FF_LAMBDA_SHIFT;
1792
1793 block_s= block_w*block_w;
1794 sum = pix_sum(&current_mb[0][0], stride, block_w);
1795 l= (sum + block_s/2)/block_s;
1796 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1797
1798 block_s= block_w*block_w>>2;
1799 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1800 cb= (sum + block_s/2)/block_s;
1801 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1802 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1803 cr= (sum + block_s/2)/block_s;
1804 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1805
1806 ic= s->c;
1807 ic.bytestream_start=
1808 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1809 memcpy(i_state, s->block_state, sizeof(s->block_state));
1810 if(level!=s->block_max_depth)
1811 put_rac(&ic, &i_state[4 + s_context], 1);
1812 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1813 put_symbol(&ic, &i_state[32], l-pl , 1);
1814 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1815 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1816 i_len= ic.bytestream - ic.bytestream_start;
1817 iscore += (s->lambda2*(i_len*8
1818 + (ic.outstanding_count - s->c.outstanding_count)*8
1819 + (-av_log2(ic.range) + av_log2(s->c.range))
1820 ))>>FF_LAMBDA_SHIFT;
1821
1822 // assert(score==256*256*256*64-1);
1823 assert(iscore < 255*255*256 + s->lambda2*10);
1824 assert(iscore >= 0);
1825 assert(l>=0 && l<=255);
1826 assert(pl>=0 && pl<=255);
1827
1828 if(level==0){
1829 int varc= iscore >> 8;
1830 int vard= score >> 8;
1831 if (vard <= 64 || vard < varc)
1832 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1833 else
1834 c->scene_change_score+= s->m.qscale;
1835 }
1836
1837 if(level!=s->block_max_depth){
1838 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1839 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1840 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1841 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1842 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1843 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1844
1845 if(score2 < score && score2 < iscore)
1846 return score2;
1847 }
1848
1849 if(iscore < score){
1850 memcpy(pbbak, i_buffer, i_len);
1851 s->c= ic;
1852 s->c.bytestream_start= pbbak_start;
1853 s->c.bytestream= pbbak + i_len;
1854 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1855 memcpy(s->block_state, i_state, sizeof(s->block_state));
1856 return iscore;
1857 }else{
1858 memcpy(pbbak, p_buffer, p_len);
1859 s->c= pc;
1860 s->c.bytestream_start= pbbak_start;
1861 s->c.bytestream= pbbak + p_len;
1862 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1863 memcpy(s->block_state, p_state, sizeof(s->block_state));
1864 return score;
1865 }
1866 }
1867
1868 static void decode_q_branch(SnowContext *s, int level, int x, int y){
1869 const int w= s->b_width << s->block_max_depth;
1870 const int rem_depth= s->block_max_depth - level;
1871 const int index= (x + y*w) << rem_depth;
1872 static BlockNode null_block= { //FIXME add border maybe
1873 .color= {128,128,128},
1874 .mx= 0,
1875 .my= 0,
1876 .type= 0,
1877 .level= 0,
1878 };
1879 int trx= (x+1)<<rem_depth;
1880 BlockNode *left = x ? &s->block[index-1] : &null_block;
1881 BlockNode *top = y ? &s->block[index-w] : &null_block;
1882 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1883 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1884 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1885
1886 if(s->keyframe){
1887 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1888 return;
1889 }
1890
1891 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1892 int type;
1893 int l = left->color[0];
1894 int cb= left->color[1];
1895 int cr= left->color[2];
1896 int mx= mid_pred(left->mx, top->mx, tr->mx);
1897 int my= mid_pred(left->my, top->my, tr->my);
1898 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1899 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1900
1901 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1902
1903 if(type){
1904 l += get_symbol(&s->c, &s->block_state[32], 1);
1905 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1906 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1907 }else{
1908 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1909 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1910 }
1911 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1912 }else{
1913 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1914 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1915 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1916 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1917 }
1918 }
1919
1920 static void encode_blocks(SnowContext *s){
1921 int x, y;
1922 int w= s->b_width;
1923 int h= s->b_height;
1924
1925 for(y=0; y<h; y++){
1926 if(&s->c.bytestream_end - &s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
1927 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1928 return;
1929 }
1930 for(x=0; x<w; x++){
1931 encode_q_branch(s, 0, x, y);
1932 }
1933 }
1934 }
1935
1936 static void decode_blocks(SnowContext *s){
1937 int x, y;
1938 int w= s->b_width;
1939 int h= s->b_height;
1940
1941 for(y=0; y<h; y++){
1942 for(x=0; x<w; x++){
1943 decode_q_branch(s, 0, x, y);
1944 }
1945 }
1946 }
1947
1948 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1949 int x, y;
1950 START_TIMER
1951 for(y=0; y < b_h+5; y++){
1952 for(x=0; x < b_w; x++){
1953 int a0= src[x ];
1954 int a1= src[x + 1];
1955 int a2= src[x + 2];
1956 int a3= src[x + 3];
1957 int a4= src[x + 4];
1958 int a5= src[x + 5];
1959 // int am= 9*(a1+a2) - (a0+a3);
1960 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1961 // int am= 18*(a2+a3) - 2*(a1+a4);
1962 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1963 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1964
1965 // if(b_w==16) am= 8*(a1+a2);
1966
1967 if(dx<8) tmp[x]= (32*a2*( 8-dx) + am* dx + 128)>>8;
1968 else tmp[x]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1969
1970 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
1971 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
1972 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
1973 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1974 }
1975 tmp += stride;
1976 src += stride;
1977 }
1978 tmp -= (b_h+5)*stride;
1979
1980 for(y=0; y < b_h; y++){
1981 for(x=0; x < b_w; x++){
1982 int a0= tmp[x + 0*stride];
1983 int a1= tmp[x + 1*stride];
1984 int a2= tmp[x + 2*stride];
1985 int a3= tmp[x + 3*stride];
1986 int a4= tmp[x + 4*stride];
1987 int a5= tmp[x + 5*stride];
1988 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1989 // int am= 18*(a2+a3) - 2*(a1+a4);
1990 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1991 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1992
1993 // if(b_w==16) am= 8*(a1+a2);
1994
1995 if(dy<8) dst[x]= (32*a2*( 8-dy) + am* dy + 128)>>8;
1996 else dst[x]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1997
1998 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
1999 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2000 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2001 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2002 }
2003 dst += stride;
2004 tmp += stride;
2005 }
2006 STOP_TIMER("mc_block")
2007 }
2008
2009 #define mca(dx,dy,b_w)\
2010 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2011 uint8_t tmp[stride*(b_w+5)];\
2012 assert(h==b_w);\
2013 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2014 }
2015
2016 mca( 0, 0,16)
2017 mca( 8, 0,16)
2018 mca( 0, 8,16)
2019 mca( 8, 8,16)
2020 mca( 0, 0,8)
2021 mca( 8, 0,8)
2022 mca( 0, 8,8)
2023 mca( 8, 8,8)
2024
2025 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2026 if(block->type){
2027 int x, y;
2028 const int color= block->color[plane_index];
2029 for(y=0; y < b_h; y++){
2030 for(x=0; x < b_w; x++){
2031 dst[x + y*stride]= color;
2032 }
2033 }
2034 }else{
2035 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2036 int mx= block->mx*scale;
2037 int my= block->my*scale;
2038 const int dx= mx&15;
2039 const int dy= my&15;
2040 sx += (mx>>4) - 2;
2041 sy += (my>>4) - 2;
2042 src += sx + sy*stride;
2043 if( (unsigned)sx >= w - b_w - 4
2044 || (unsigned)sy >= h - b_h - 4){
2045 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2046 src= tmp + MB_SIZE;
2047 }
2048 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2049 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2050 else
2051 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2052 }
2053 }
2054
2055 static always_inline int same_block(BlockNode *a, BlockNode *b){
2056 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2057 }
2058
2059 //FIXME name clenup (b_w, block_w, b_width stuff)
2060 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2061 const int b_width = s->b_width << s->block_max_depth;
2062 const int b_height= s->b_height << s->block_max_depth;
2063 const int b_stride= b_width;
2064 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2065 BlockNode *rt= lt+1;
2066 BlockNode *lb= lt+b_stride;
2067 BlockNode *rb= lb+1;
2068 uint8_t *block[4];
2069 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2070 int x,y;
2071
2072 if(b_x<0){
2073 lt= rt;
2074 lb= rb;
2075 }else if(b_x + 1 >= b_width){
2076 rt= lt;
2077 rb= lb;
2078 }
2079 if(b_y<0){
<