cleanup
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 #include "avcodec.h"
20 #include "common.h"
21 #include "dsputil.h"
22 #include "cabac.h"
23
24 #include "mpegvideo.h"
25
26 #undef NDEBUG
27 #include <assert.h>
28
29 #define MAX_DECOMPOSITIONS 8
30 #define MAX_PLANES 4
31 #define DWTELEM int
32 #define QROOT 8
33 #define LOSSLESS_QLOG -128
34
35 static const int8_t quant3[256]={
36 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52 };
53 static const int8_t quant3b[256]={
54 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 };
71 static const int8_t quant5[256]={
72 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
88 };
89 static const int8_t quant7[256]={
90 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
93 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
106 };
107 static const int8_t quant9[256]={
108 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
123 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
124 };
125 static const int8_t quant11[256]={
126 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
142 };
143 static const int8_t quant13[256]={
144 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
145 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
160 };
161
162 #define OBMC_MAX 64
163 #if 0 //64*cubic
164 static const uint8_t obmc32[1024]={
165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
167 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
168 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
169 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
170 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
171 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
172 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
173 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
174 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
175 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
176 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
177 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
178 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
179 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
180 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
182 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
183 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
184 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
185 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
186 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
190 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
191 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
194 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
195 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 //error:0.000022
198 };
199 static const uint8_t obmc16[256]={
200 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
201 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
202 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
203 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
204 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
205 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
206 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
207 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
209 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
210 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
211 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
212 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
213 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216 //error:0.000033
217 };
218 #elif 1 // 64*linear
219 static const uint8_t obmc32[1024]={
220 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
221 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
222 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
223 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
224 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
225 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
226 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
227 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
228 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
229 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
230 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
231 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
232 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
233 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
234 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
235 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
237 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
238 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
239 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
240 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
241 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
242 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
243 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
244 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
245 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
246 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
249 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
250 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
251 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
252 //error:0.000020
253 };
254 static const uint8_t obmc16[256]={
255 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
256 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
257 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
258 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
259 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
260 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
261 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
262 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
264 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
265 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
266 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
267 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
268 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
269 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
270 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
271 //error:0.000015
272 };
273 #else //64*cos
274 static const uint8_t obmc32[1024]={
275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
277 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
278 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
279 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
280 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
281 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
282 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
283 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
284 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
285 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
286 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
287 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
288 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
289 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
290 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
292 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
293 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
294 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
295 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
296 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
300 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
301 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
304 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
305 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
306 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307 //error:0.000022
308 };
309 static const uint8_t obmc16[256]={
310 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
311 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
312 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
313 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
314 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
315 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
316 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
317 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
319 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
320 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
321 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
322 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
323 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326 //error:0.000022
327 };
328 #endif
329
330 //linear *64
331 static const uint8_t obmc8[64]={
332 1, 3, 5, 7, 7, 5, 3, 1,
333 3, 9,15,21,21,15, 9, 3,
334 5,15,25,35,35,25,15, 5,
335 7,21,35,49,49,35,21, 7,
336 7,21,35,49,49,35,21, 7,
337 5,15,25,35,35,25,15, 5,
338 3, 9,15,21,21,15, 9, 3,
339 1, 3, 5, 7, 7, 5, 3, 1,
340 //error:0.000000
341 };
342
343 //linear *64
344 static const uint8_t obmc4[16]={
345 4,12,12, 4,
346 12,36,36,12,
347 12,36,36,12,
348 4,12,12, 4,
349 //error:0.000000
350 };
351
352 static const uint8_t *obmc_tab[4]={
353 obmc32, obmc16, obmc8, obmc4
354 };
355
356 typedef struct BlockNode{
357 int16_t mx;
358 int16_t my;
359 uint8_t color[3];
360 uint8_t type;
361 //#define TYPE_SPLIT 1
362 #define BLOCK_INTRA 1
363 //#define TYPE_NOCOLOR 4
364 uint8_t level; //FIXME merge into type?
365 }BlockNode;
366
367 #define LOG2_MB_SIZE 4
368 #define MB_SIZE (1<<LOG2_MB_SIZE)
369
370 typedef struct SubBand{
371 int level;
372 int stride;
373 int width;
374 int height;
375 int qlog; ///< log(qscale)/log[2^(1/6)]
376 DWTELEM *buf;
377 int16_t *x;
378 DWTELEM *coeff;
379 struct SubBand *parent;
380 uint8_t state[/*7*2*/ 7 + 512][32];
381 }SubBand;
382
383 typedef struct Plane{
384 int width;
385 int height;
386 SubBand band[MAX_DECOMPOSITIONS][4];
387 }Plane;
388
389 typedef struct SnowContext{
390 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
391
392 AVCodecContext *avctx;
393 CABACContext c;
394 DSPContext dsp;
395 AVFrame input_picture;
396 AVFrame current_picture;
397 AVFrame last_picture;
398 AVFrame mconly_picture;
399 // uint8_t q_context[16];
400 uint8_t header_state[32];
401 uint8_t block_state[128 + 32*128];
402 int keyframe;
403 int version;
404 int spatial_decomposition_type;
405 int temporal_decomposition_type;
406 int spatial_decomposition_count;
407 int temporal_decomposition_count;
408 DWTELEM *spatial_dwt_buffer;
409 DWTELEM *pred_buffer;
410 int colorspace_type;
411 int chroma_h_shift;
412 int chroma_v_shift;
413 int spatial_scalability;
414 int qlog;
415 int lambda;
416 int lambda2;
417 int mv_scale;
418 int qbias;
419 #define QBIAS_SHIFT 3
420 int b_width;
421 int b_height;
422 int block_max_depth;
423 Plane plane[MAX_PLANES];
424 BlockNode *block;
425
426 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
427 }SnowContext;
428
429 #define QEXPSHIFT 7 //FIXME try to change this to 0
430 static const uint8_t qexp[8]={
431 128, 140, 152, 166, 181, 197, 215, 235
432 // 64, 70, 76, 83, 91, 99, 108, 117
433 // 32, 35, 38, 41, 45, 49, 54, 59
434 // 16, 17, 19, 21, 23, 25, 27, 29
435 // 8, 9, 10, 10, 11, 12, 13, 15
436 };
437
438 static inline int mirror(int v, int m){
439 if (v<0) return -v;
440 else if(v>m) return 2*m-v;
441 else return v;
442 }
443
444 static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
445 int i;
446
447 if(v){
448 const int a= ABS(v);
449 const int e= av_log2(a);
450 #if 1
451 const int el= FFMIN(e, 10);
452 put_cabac(c, state+0, 0);
453
454 for(i=0; i<el; i++){
455 put_cabac(c, state+1+i, 1); //1..10
456 }
457 for(; i<e; i++){
458 put_cabac(c, state+1+9, 1); //1..10
459 }
460 put_cabac(c, state+1+FFMIN(i,9), 0);
461
462 for(i=e-1; i>=el; i--){
463 put_cabac(c, state+22+9, (a>>i)&1); //22..31
464 }
465 for(; i>=0; i--){
466 put_cabac(c, state+22+i, (a>>i)&1); //22..31
467 }
468
469 if(is_signed)
470 put_cabac(c, state+11 + el, v < 0); //11..21
471 #else
472
473 put_cabac(c, state+0, 0);
474 if(e<=9){
475 for(i=0; i<e; i++){
476 put_cabac(c, state+1+i, 1); //1..10
477 }
478 put_cabac(c, state+1+i, 0);
479
480 for(i=e-1; i>=0; i--){
481 put_cabac(c, state+22+i, (a>>i)&1); //22..31
482 }
483
484 if(is_signed)
485 put_cabac(c, state+11 + e, v < 0); //11..21
486 }else{
487 for(i=0; i<e; i++){
488 put_cabac(c, state+1+FFMIN(i,9), 1); //1..10
489 }
490 put_cabac(c, state+1+FFMIN(i,9), 0);
491
492 for(i=e-1; i>=0; i--){
493 put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
494 }
495
496 if(is_signed)
497 put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
498 }
499 #endif
500 }else{
501 put_cabac(c, state+0, 1);
502 }
503 }
504
505 static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
506 if(get_cabac(c, state+0))
507 return 0;
508 else{
509 int i, e, a, el;
510 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
511 for(e=0; e<10; e++){
512 if(get_cabac(c, state + 1 + e)==0) // 1..10
513 break;
514 }
515 el= e;
516
517 if(e==10){
518 while(get_cabac(c, state + 1 + 9)) //10
519 e++;
520 }
521 a= 1;
522 for(i=e-1; i>=el; i--){
523 a += a + get_cabac(c, state+22+9); //31
524 }
525 for(; i>=0; i--){
526 a += a + get_cabac(c, state+22+i); //22..31
527 }
528
529 if(is_signed && get_cabac(c, state+11 + el)) //11..21
530 return -a;
531 else
532 return a;
533 }
534 }
535
536 static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
537 int i;
538 int r= log2>=0 ? 1<<log2 : 1;
539
540 assert(v>=0);
541 assert(log2>=-4);
542
543 while(v >= r){
544 put_cabac(c, state+4+log2, 1);
545 v -= r;
546 log2++;
547 if(log2>0) r+=r;
548 }
549 put_cabac(c, state+4+log2, 0);
550
551 for(i=log2-1; i>=0; i--){
552 put_cabac(c, state+31-i, (v>>i)&1);
553 }
554 }
555
556 static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
557 int i;
558 int r= log2>=0 ? 1<<log2 : 1;
559 int v=0;
560
561 assert(log2>=-4);
562
563 while(get_cabac(c, state+4+log2)){
564 v+= r;
565 log2++;
566 if(log2>0) r+=r;
567 }
568
569 for(i=log2-1; i>=0; i--){
570 v+= get_cabac(c, state+31-i)<<i;
571 }
572
573 return v;
574 }
575
576 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
577 const int mirror_left= !highpass;
578 const int mirror_right= (width&1) ^ highpass;
579 const int w= (width>>1) - 1 + (highpass & width);
580 int i;
581
582 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
583 if(mirror_left){
584 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
585 dst += dst_step;
586 src += src_step;
587 }
588
589 for(i=0; i<w; i++){
590 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
591 }
592
593 if(mirror_right){
594 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
595 }
596 }
597
598 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
599 const int mirror_left= !highpass;
600 const int mirror_right= (width&1) ^ highpass;
601 const int w= (width>>1) - 1 + (highpass & width);
602 int i;
603
604 if(mirror_left){
605 int r= 3*2*ref[0];
606 r += r>>4;
607 r += r>>8;
608 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
609 dst += dst_step;
610 src += src_step;
611 }
612
613 for(i=0; i<w; i++){
614 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
615 r += r>>4;
616 r += r>>8;
617 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
618 }
619
620 if(mirror_right){
621 int r= 3*2*ref[w*ref_step];
622 r += r>>4;
623 r += r>>8;
624 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
625 }
626 }
627
628
629 static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
630 int x, i;
631
632 for(x=start; x<width; x+=2){
633 int64_t sum=0;
634
635 for(i=0; i<n; i++){
636 int x2= x + 2*i - n + 1;
637 if (x2< 0) x2= -x2;
638 else if(x2>=width) x2= 2*width-x2-2;
639 sum += coeffs[i]*(int64_t)dst[x2];
640 }
641 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
642 else dst[x] += (sum + (1<<shift)/2)>>shift;
643 }
644 }
645
646 static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
647 int x, y, i;
648 for(y=start; y<height; y+=2){
649 for(x=0; x<width; x++){
650 int64_t sum=0;
651
652 for(i=0; i<n; i++){
653 int y2= y + 2*i - n + 1;
654 if (y2< 0) y2= -y2;
655 else if(y2>=height) y2= 2*height-y2-2;
656 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
657 }
658 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
659 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
660 }
661 }
662 }
663
664 #define SCALEX 1
665 #define LX0 0
666 #define LX1 1
667
668 #if 0 // more accurate 9/7
669 #define N1 2
670 #define SHIFT1 14
671 #define COEFFS1 (int[]){-25987,-25987}
672 #define N2 2
673 #define SHIFT2 19
674 #define COEFFS2 (int[]){-27777,-27777}
675 #define N3 2
676 #define SHIFT3 15
677 #define COEFFS3 (int[]){28931,28931}
678 #define N4 2
679 #define SHIFT4 15
680 #define COEFFS4 (int[]){14533,14533}
681 #elif 1 // 13/7 CRF
682 #define N1 4
683 #define SHIFT1 4
684 #define COEFFS1 (int[]){1,-9,-9,1}
685 #define N2 4
686 #define SHIFT2 4
687 #define COEFFS2 (int[]){-1,5,5,-1}
688 #define N3 0
689 #define SHIFT3 1
690 #define COEFFS3 NULL
691 #define N4 0
692 #define SHIFT4 1
693 #define COEFFS4 NULL
694 #elif 1 // 3/5
695 #define LX0 1
696 #define LX1 0
697 #define SCALEX 0.5
698 #define N1 2
699 #define SHIFT1 1
700 #define COEFFS1 (int[]){1,1}
701 #define N2 2
702 #define SHIFT2 2
703 #define COEFFS2 (int[]){-1,-1}
704 #define N3 0
705 #define SHIFT3 0
706 #define COEFFS3 NULL
707 #define N4 0
708 #define SHIFT4 0
709 #define COEFFS4 NULL
710 #elif 1 // 11/5
711 #define N1 0
712 #define SHIFT1 1
713 #define COEFFS1 NULL
714 #define N2 2
715 #define SHIFT2 2
716 #define COEFFS2 (int[]){-1,-1}
717 #define N3 2
718 #define SHIFT3 0
719 #define COEFFS3 (int[]){-1,-1}
720 #define N4 4
721 #define SHIFT4 7
722 #define COEFFS4 (int[]){-5,29,29,-5}
723 #define SCALEX 4
724 #elif 1 // 9/7 CDF
725 #define N1 2
726 #define SHIFT1 7
727 #define COEFFS1 (int[]){-203,-203}
728 #define N2 2
729 #define SHIFT2 12
730 #define COEFFS2 (int[]){-217,-217}
731 #define N3 2
732 #define SHIFT3 7
733 #define COEFFS3 (int[]){113,113}
734 #define N4 2
735 #define SHIFT4 9
736 #define COEFFS4 (int[]){227,227}
737 #define SCALEX 1
738 #elif 1 // 7/5 CDF
739 #define N1 0
740 #define SHIFT1 1
741 #define COEFFS1 NULL
742 #define N2 2
743 #define SHIFT2 2
744 #define COEFFS2 (int[]){-1,-1}
745 #define N3 2
746 #define SHIFT3 0
747 #define COEFFS3 (int[]){-1,-1}
748 #define N4 2
749 #define SHIFT4 4
750 #define COEFFS4 (int[]){3,3}
751 #elif 1 // 9/7 MN
752 #define N1 4
753 #define SHIFT1 4
754 #define COEFFS1 (int[]){1,-9,-9,1}
755 #define N2 2
756 #define SHIFT2 2
757 #define COEFFS2 (int[]){1,1}
758 #define N3 0
759 #define SHIFT3 1
760 #define COEFFS3 NULL
761 #define N4 0
762 #define SHIFT4 1
763 #define COEFFS4 NULL
764 #else // 13/7 CRF
765 #define N1 4
766 #define SHIFT1 4
767 #define COEFFS1 (int[]){1,-9,-9,1}
768 #define N2 4
769 #define SHIFT2 4
770 #define COEFFS2 (int[]){-1,5,5,-1}
771 #define N3 0
772 #define SHIFT3 1
773 #define COEFFS3 NULL
774 #define N4 0
775 #define SHIFT4 1
776 #define COEFFS4 NULL
777 #endif
778 static void horizontal_decomposeX(int *b, int width){
779 int temp[width];
780 const int width2= width>>1;
781 const int w2= (width+1)>>1;
782 int A1,A2,A3,A4, x;
783
784 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
785 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
786 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
787 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
788
789 for(x=0; x<width2; x++){
790 temp[x ]= b[2*x ];
791 temp[x+w2]= b[2*x + 1];
792 }
793 if(width&1)
794 temp[x ]= b[2*x ];
795 memcpy(b, temp, width*sizeof(int));
796 }
797
798 static void horizontal_composeX(int *b, int width){
799 int temp[width];
800 const int width2= width>>1;
801 int A1,A2,A3,A4, x;
802 const int w2= (width+1)>>1;
803
804 memcpy(temp, b, width*sizeof(int));
805 for(x=0; x<width2; x++){
806 b[2*x ]= temp[x ];
807 b[2*x + 1]= temp[x+w2];
808 }
809 if(width&1)
810 b[2*x ]= temp[x ];
811
812 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
813 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
814 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
815 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
816 }
817
818 static void spatial_decomposeX(int *buffer, int width, int height, int stride){
819 int x, y;
820
821 for(y=0; y<height; y++){
822 for(x=0; x<width; x++){
823 buffer[y*stride + x] *= SCALEX;
824 }
825 }
826
827 for(y=0; y<height; y++){
828 horizontal_decomposeX(buffer + y*stride, width);
829 }
830
831 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
832 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
833 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
834 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
835 }
836
837 static void spatial_composeX(int *buffer, int width, int height, int stride){
838 int x, y;
839
840 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
841 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
842 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
843 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
844
845 for(y=0; y<height; y++){
846 horizontal_composeX(buffer + y*stride, width);
847 }
848
849 for(y=0; y<height; y++){
850 for(x=0; x<width; x++){
851 buffer[y*stride + x] /= SCALEX;
852 }
853 }
854 }
855
856 static void horizontal_decompose53i(int *b, int width){
857 int temp[width];
858 const int width2= width>>1;
859 int A1,A2,A3,A4, x;
860 const int w2= (width+1)>>1;
861
862 for(x=0; x<width2; x++){
863 temp[x ]= b[2*x ];
864 temp[x+w2]= b[2*x + 1];
865 }
866 if(width&1)
867 temp[x ]= b[2*x ];
868 #if 0
869 A2= temp[1 ];
870 A4= temp[0 ];
871 A1= temp[0+width2];
872 A1 -= (A2 + A4)>>1;
873 A4 += (A1 + 1)>>1;
874 b[0+width2] = A1;
875 b[0 ] = A4;
876 for(x=1; x+1<width2; x+=2){
877 A3= temp[x+width2];
878 A4= temp[x+1 ];
879 A3 -= (A2 + A4)>>1;
880 A2 += (A1 + A3 + 2)>>2;
881 b[x+width2] = A3;
882 b[x ] = A2;
883
884 A1= temp[x+1+width2];
885 A2= temp[x+2 ];
886 A1 -= (A2 + A4)>>1;
887 A4 += (A1 + A3 + 2)>>2;
888 b[x+1+width2] = A1;
889 b[x+1 ] = A4;
890 }
891 A3= temp[width-1];
892 A3 -= A2;
893 A2 += (A1 + A3 + 2)>>2;
894 b[width -1] = A3;
895 b[width2-1] = A2;
896 #else
897 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
898 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
899 #endif
900 }
901
902 static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
903 int i;
904
905 for(i=0; i<width; i++){
906 b1[i] -= (b0[i] + b2[i])>>1;
907 }
908 }
909
910 static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
911 int i;
912
913 for(i=0; i<width; i++){
914 b1[i] += (b0[i] + b2[i] + 2)>>2;
915 }
916 }
917
918 static void spatial_decompose53i(int *buffer, int width, int height, int stride){
919 int y;
920 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
921 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
922
923 for(y=-2; y<height; y+=2){
924 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
925 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
926
927 {START_TIMER
928 if(b1 <= b3) horizontal_decompose53i(b2, width);
929 if(y+2 < height) horizontal_decompose53i(b3, width);
930 STOP_TIMER("horizontal_decompose53i")}
931
932 {START_TIMER
933 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
934 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
935 STOP_TIMER("vertical_decompose53i*")}
936
937 b0=b2;
938 b1=b3;
939 }
940 }
941
942 #define lift5 lift
943 #if 1
944 #define W_AM 3
945 #define W_AO 0
946 #define W_AS 1
947
948 #define W_BM 1
949 #define W_BO 8
950 #define W_BS 4
951
952 #undef lift5
953 #define W_CM 9999
954 #define W_CO 2
955 #define W_CS 2
956
957 #define W_DM 15
958 #define W_DO 16
959 #define W_DS 5
960 #elif 0
961 #define W_AM 55
962 #define W_AO 16
963 #define W_AS 5
964
965 #define W_BM 3
966 #define W_BO 32
967 #define W_BS 6
968
969 #define W_CM 127
970 #define W_CO 64
971 #define W_CS 7
972
973 #define W_DM 7
974 #define W_DO 8
975 #define W_DS 4
976 #elif 0
977 #define W_AM 97
978 #define W_AO 32
979 #define W_AS 6
980
981 #define W_BM 63
982 #define W_BO 512
983 #define W_BS 10
984
985 #define W_CM 13
986 #define W_CO 8
987 #define W_CS 4
988
989 #define W_DM 15
990 #define W_DO 16
991 #define W_DS 5
992
993 #else
994
995 #define W_AM 203
996 #define W_AO 64
997 #define W_AS 7
998
999 #define W_BM 217
1000 #define W_BO 2048
1001 #define W_BS 12
1002
1003 #define W_CM 113
1004 #define W_CO 64
1005 #define W_CS 7
1006
1007 #define W_DM 227
1008 #define W_DO 128
1009 #define W_DS 9
1010 #endif
1011 static void horizontal_decompose97i(int *b, int width){
1012 int temp[width];
1013 const int w2= (width+1)>>1;
1014
1015 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1016 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1017 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1018 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1019 }
1020
1021
1022 static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
1023 int i;
1024
1025 for(i=0; i<width; i++){
1026 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1027 }
1028 }
1029
1030 static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
1031 int i;
1032
1033 for(i=0; i<width; i++){
1034 #ifdef lift5
1035 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1036 #else
1037 int r= 3*(b0[i] + b2[i]);
1038 r+= r>>4;
1039 r+= r>>8;
1040 b1[i] += (r+W_CO)>>W_CS;
1041 #endif
1042 }
1043 }
1044
1045 static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1046 int i;
1047
1048 for(i=0; i<width; i++){
1049 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1050 }
1051 }
1052
1053 static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1054 int i;
1055
1056 for(i=0; i<width; i++){
1057 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1058 }
1059 }
1060
1061 static void spatial_decompose97i(int *buffer, int width, int height, int stride){
1062 int y;
1063 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1064 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1065 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1066 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1067
1068 for(y=-4; y<height; y+=2){
1069 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1070 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1071
1072 {START_TIMER
1073 if(b3 <= b5) horizontal_decompose97i(b4, width);
1074 if(y+4 < height) horizontal_decompose97i(b5, width);
1075 if(width>400){
1076 STOP_TIMER("horizontal_decompose97i")
1077 }}
1078
1079 {START_TIMER
1080 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1081 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1082 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1083 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1084
1085 if(width>400){
1086 STOP_TIMER("vertical_decompose97i")
1087 }}
1088
1089 b0=b2;
1090 b1=b3;
1091 b2=b4;
1092 b3=b5;
1093 }
1094 }
1095
1096 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1097 int level;
1098
1099 for(level=0; level<decomposition_count; level++){
1100 switch(type){
1101 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1102 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1103 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1104 }
1105 }
1106 }
1107
1108 static void horizontal_compose53i(int *b, int width){
1109 int temp[width];
1110 const int width2= width>>1;
1111 const int w2= (width+1)>>1;
1112 int A1,A2,A3,A4, x;
1113
1114 #if 0
1115 A2= temp[1 ];
1116 A4= temp[0 ];
1117 A1= temp[0+width2];
1118 A1 -= (A2 + A4)>>1;
1119 A4 += (A1 + 1)>>1;
1120 b[0+width2] = A1;
1121 b[0 ] = A4;
1122 for(x=1; x+1<width2; x+=2){
1123 A3= temp[x+width2];
1124 A4= temp[x+1 ];
1125 A3 -= (A2 + A4)>>1;
1126 A2 += (A1 + A3 + 2)>>2;
1127 b[x+width2] = A3;
1128 b[x ] = A2;
1129
1130 A1= temp[x+1+width2];
1131 A2= temp[x+2 ];
1132 A1 -= (A2 + A4)>>1;
1133 A4 += (A1 + A3 + 2)>>2;
1134 b[x+1+width2] = A1;
1135 b[x+1 ] = A4;
1136 }
1137 A3= temp[width-1];
1138 A3 -= A2;
1139 A2 += (A1 + A3 + 2)>>2;
1140 b[width -1] = A3;
1141 b[width2-1] = A2;
1142 #else
1143 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1144 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1145 #endif
1146 for(x=0; x<width2; x++){
1147 b[2*x ]= temp[x ];
1148 b[2*x + 1]= temp[x+w2];
1149 }
1150 if(width&1)
1151 b[2*x ]= temp[x ];
1152 }
1153
1154 static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1155 int i;
1156
1157 for(i=0; i<width; i++){
1158 b1[i] += (b0[i] + b2[i])>>1;
1159 }
1160 }
1161
1162 static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1163 int i;
1164
1165 for(i=0; i<width; i++){
1166 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1167 }
1168 }
1169
1170 static void spatial_compose53i(int *buffer, int width, int height, int stride){
1171 int y;
1172 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1173 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride;
1174
1175 for(y=-1; y<=height; y+=2){
1176 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1177 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1178
1179 {START_TIMER
1180 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1181 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1182 STOP_TIMER("vertical_compose53i*")}
1183
1184 {START_TIMER
1185 if(y-1 >= 0) horizontal_compose53i(b0, width);
1186 if(b0 <= b2) horizontal_compose53i(b1, width);
1187 STOP_TIMER("horizontal_compose53i")}
1188
1189 b0=b2;
1190 b1=b3;
1191 }
1192 }
1193
1194
1195 static void horizontal_compose97i(int *b, int width){
1196 int temp[width];
1197 const int w2= (width+1)>>1;
1198
1199 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1200 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1201 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1202 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1203 }
1204
1205 static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1206 int i;
1207
1208 for(i=0; i<width; i++){
1209 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1210 }
1211 }
1212
1213 static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1214 int i;
1215
1216 for(i=0; i<width; i++){
1217 #ifdef lift5
1218 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1219 #else
1220 int r= 3*(b0[i] + b2[i]);
1221 r+= r>>4;
1222 r+= r>>8;
1223 b1[i] -= (r+W_CO)>>W_CS;
1224 #endif
1225 }
1226 }
1227
1228 static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1229 int i;
1230
1231 for(i=0; i<width; i++){
1232 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1233 }
1234 }
1235
1236 static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1237 int i;
1238
1239 for(i=0; i<width; i++){
1240 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1241 }
1242 }
1243
1244 static void spatial_compose97i(int *buffer, int width, int height, int stride){
1245 int y;
1246 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1247 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride;
1248 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1249 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1250
1251 for(y=-3; y<=height; y+=2){
1252 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1253 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1254
1255 if(stride == width && y+4 < height && 0){
1256 int x;
1257 for(x=0; x<width/2; x++)
1258 b5[x] += 64*2;
1259 for(; x<width; x++)
1260 b5[x] += 169*2;
1261 }
1262
1263 {START_TIMER
1264 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1265 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1266 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1267 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1268 if(width>400){
1269 STOP_TIMER("vertical_compose97i")}}
1270
1271 {START_TIMER
1272 if(y-1>= 0) horizontal_compose97i(b0, width);
1273 if(b0 <= b2) horizontal_compose97i(b1, width);
1274 if(width>400 && b0 <= b2){
1275 STOP_TIMER("horizontal_compose97i")}}
1276
1277 b0=b2;
1278 b1=b3;
1279 b2=b4;
1280 b3=b5;
1281 }
1282 }
1283
1284 void ff_spatial_idwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1285 int level;
1286
1287 for(level=decomposition_count-1; level>=0; level--){
1288 switch(type){
1289 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1290 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1291 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break;
1292 }
1293 }
1294 }
1295
1296 static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1297 const int w= b->width;
1298 const int h= b->height;
1299 int x, y;
1300
1301 if(1){
1302 int run=0;
1303 int runs[w*h];
1304 int run_index=0;
1305
1306 for(y=0; y<h; y++){
1307 for(x=0; x<w; x++){
1308 int v, p=0;
1309 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1310 v= src[x + y*stride];
1311
1312 if(y){
1313 t= src[x + (y-1)*stride];
1314 if(x){
1315 lt= src[x - 1 + (y-1)*stride];
1316 }
1317 if(x + 1 < w){
1318 rt= src[x + 1 + (y-1)*stride];
1319 }
1320 }
1321 if(x){
1322 l= src[x - 1 + y*stride];
1323 /*if(x > 1){
1324 if(orientation==1) ll= src[y + (x-2)*stride];
1325 else ll= src[x - 2 + y*stride];
1326 }*/
1327 }
1328 if(parent){
1329 int px= x>>1;
1330 int py= y>>1;
1331 if(px<b->parent->width && py<b->parent->height)
1332 p= parent[px + py*2*stride];
1333 }
1334 if(!(/*ll|*/l|lt|t|rt|p)){
1335 if(v){
1336 runs[run_index++]= run;
1337 run=0;
1338 }else{
1339 run++;
1340 }
1341 }
1342 }
1343 }
1344 runs[run_index++]= run;
1345 run_index=0;
1346 run= runs[run_index++];
1347
1348 put_symbol2(&s->c, b->state[1], run, 3);
1349
1350 for(y=0; y<h; y++){
1351 for(x=0; x<w; x++){
1352 int v, p=0;
1353 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1354 v= src[x + y*stride];
1355
1356 if(y){
1357 t= src[x + (y-1)*stride];
1358 if(x){
1359 lt= src[x - 1 + (y-1)*stride];
1360 }
1361 if(x + 1 < w){
1362 rt= src[x + 1 + (y-1)*stride];
1363 }
1364 }
1365 if(x){
1366 l= src[x - 1 + y*stride];
1367 /*if(x > 1){
1368 if(orientation==1) ll= src[y + (x-2)*stride];
1369 else ll= src[x - 2 + y*stride];
1370 }*/
1371 }
1372 if(parent){
1373 int px= x>>1;
1374 int py= y>>1;
1375 if(px<b->parent->width && py<b->parent->height)
1376 p= parent[px + py*2*stride];
1377 }
1378 if(/*ll|*/l|lt|t|rt|p){
1379 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1380
1381 put_cabac(&s->c, &b->state[0][context], !!v);
1382 }else{
1383 if(!run){
1384 run= runs[run_index++];
1385
1386 put_symbol2(&s->c, b->state[1], run, 3);
1387 assert(v);
1388 }else{
1389 run--;
1390 assert(!v);
1391 }
1392 }
1393 if(v){
1394 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1395
1396 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1397 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1398 }
1399 }
1400 }
1401 }
1402 }
1403
1404 static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1405 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1406 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1407 encode_subband_c0run(s, b, src, parent, stride, orientation);
1408 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1409 }
1410
1411 static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1412 const int w= b->width;
1413 const int h= b->height;
1414 int x,y;
1415 const int qlog= clip(s->qlog + b->qlog, 0, 128);
1416 int qmul= qexp[qlog&7]<<(qlog>>3);
1417 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1418
1419 START_TIMER
1420
1421 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1422 qadd= 0;
1423 qmul= 1<<QEXPSHIFT;
1424 }
1425
1426 if(1){
1427 int run;
1428 int index=0;
1429 int prev_index=-1;
1430 int prev2_index=0;
1431 int parent_index= 0;
1432 int prev_parent_index= 0;
1433
1434 for(y=0; y<b->height; y++)
1435 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1436
1437 run= get_symbol2(&s->c, b->state[1], 3);
1438 for(y=0; y<h; y++){
1439 int v=0;
1440 int lt=0, t=0, rt=0;
1441
1442 if(y && b->x[prev_index] == 0){
1443 rt= b->coeff[prev_index];
1444 }
1445 for(x=0; x<w; x++){
1446 int p=0;
1447 const int l= v;
1448
1449 lt= t; t= rt;
1450
1451 if(y){
1452 if(b->x[prev_index] <= x)
1453 prev_index++;
1454 if(b->x[prev_index] == x + 1)
1455 rt= b->coeff[prev_index];
1456 else
1457 rt=0;
1458 }
1459 if(parent){
1460 if(x>>1 > b->parent->x[parent_index]){
1461 parent_index++;
1462 }
1463 if(x>>1 == b->parent->x[parent_index]){
1464 p= b->parent->coeff[parent_index];
1465 }
1466 }
1467 if(/*ll|*/l|lt|t|rt|p){
1468 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1469
1470 v=get_cabac(&s->c, &b->state[0][context]);
1471 }else{
1472 if(!run){
1473 run= get_symbol2(&s->c, b->state[1], 3);
1474 v=1;
1475 }else{
1476 run--;
1477 v=0;
1478
1479 if(y && parent){
1480 int max_run;
1481
1482 max_run= FFMIN(run, b->x[prev_index] - x - 2);
1483 max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1484 x+= max_run;
1485 run-= max_run;
1486 }
1487 }
1488 }
1489 if(v){
1490 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1491 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1492 if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1493 src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
1494 v= -v;
1495 }else{
1496 src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1497 }
1498 b->x[index]=x; //FIXME interleave x/coeff
1499 b->coeff[index++]= v;
1500 }
1501 }
1502 b->x[index++]= w+1; //end marker
1503 prev_index= prev2_index;
1504 prev2_index= index;
1505
1506 if(parent){
1507 while(b->parent->x[parent_index] != b->parent->width+1)
1508 parent_index++;
1509 parent_index++;
1510 if(y&1){
1511 prev_parent_index= parent_index;
1512 }else{
1513 parent_index= prev_parent_index;
1514 }
1515 }
1516 }
1517 b->x[index++]= w+1; //end marker
1518 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
1519 STOP_TIMER("decode_subband")
1520 }
1521
1522 return;
1523 }
1524 }
1525
1526 static void reset_contexts(SnowContext *s){
1527 int plane_index, level, orientation;
1528
1529 for(plane_index=0; plane_index<2; plane_index++){
1530 for(level=0; level<s->spatial_decomposition_count; level++){
1531 for(orientation=level ? 1:0; orientation<4; orientation++){
1532 memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
1533 }
1534 }
1535 }
1536 memset(s->header_state, 0, sizeof(s->header_state));
1537 memset(s->block_state, 0, sizeof(s->block_state));
1538 }
1539
1540 static int alloc_blocks(SnowContext *s){
1541 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1542 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1543
1544 s->b_width = w;
1545 s->b_height= h;
1546
1547 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1548 return 0;
1549 }
1550
1551 static inline void copy_cabac_state(CABACContext *d, CABACContext *s){
1552 PutBitContext bak= d->pb;
1553 *d= *s;
1554 d->pb= bak;
1555 }
1556
1557 //near copy & paste from dsputil, FIXME
1558 static int pix_sum(uint8_t * pix, int line_size, int w)
1559 {
1560 int s, i, j;
1561
1562 s = 0;
1563 for (i = 0; i < w; i++) {
1564 for (j = 0; j < w; j++) {
1565 s += pix[0];
1566 pix ++;
1567 }
1568 pix += line_size - w;
1569 }
1570 return s;
1571 }
1572
1573 //near copy & paste from dsputil, FIXME
1574 static int pix_norm1(uint8_t * pix, int line_size, int w)
1575 {
1576 int s, i, j;
1577 uint32_t *sq = squareTbl + 256;
1578
1579 s = 0;
1580 for (i = 0; i < w; i++) {
1581 for (j = 0; j < w; j ++) {
1582 s += sq[pix[0]];
1583 pix ++;
1584 }
1585 pix += line_size - w;
1586 }
1587 return s;
1588 }
1589
1590 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1591 const int w= s->b_width << s->block_max_depth;
1592 const int rem_depth= s->block_max_depth - level;
1593 const int index= (x + y*w) << rem_depth;
1594 const int block_w= 1<<rem_depth;
1595 BlockNode block;
1596 int i,j;
1597
1598 block.color[0]= l;
1599 block.color[1]= cb;
1600 block.color[2]= cr;
1601 block.mx= mx;
1602 block.my= my;
1603 block.type= type;
1604 block.level= level;
1605
1606 for(j=0; j<block_w; j++){
1607 for(i=0; i<block_w; i++){
1608 s->block[index + i + j*w]= block;
1609 }
1610 }
1611 }
1612
1613 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1614 const int offset[3]= {
1615 y*c-> stride + x,
1616 ((y*c->uvstride + x)>>1),
1617 ((y*c->uvstride + x)>>1),
1618 };
1619 int i;
1620 for(i=0; i<3; i++){
1621 c->src[0][i]= src [i];
1622 c->ref[0][i]= ref [i] + offset[i];
1623 }
1624 assert(!ref_index);
1625 }
1626
1627 //FIXME copy&paste
1628 #define P_LEFT P[1]
1629 #define P_TOP P[2]
1630 #define P_TOPRIGHT P[3]
1631 #define P_MEDIAN P[4]
1632 #define P_MV1 P[9]
1633 #define FLAG_QPEL 1 //must be 1
1634
1635 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1636 uint8_t p_buffer[1024];
1637 uint8_t i_buffer[1024];
1638 uint8_t p_state[sizeof(s->block_state)];
1639 uint8_t i_state[sizeof(s->block_state)];
1640 CABACContext pc, ic;
1641 PutBitContext pbbak= s->c.pb;
1642 int score, score2, iscore, i_len, p_len, block_s, sum;
1643 const int w= s->b_width << s->block_max_depth;
1644 const int h= s->b_height << s->block_max_depth;
1645 const int rem_depth= s->block_max_depth - level;
1646 const int index= (x + y*w) << rem_depth;
1647 const int block_w= 1<<(LOG2_MB_SIZE - level);
1648 static BlockNode null_block= { //FIXME add border maybe
1649 .color= {128,128,128},
1650 .mx= 0,
1651 .my= 0,
1652 .type= 0,
1653 .level= 0,
1654 };
1655 int trx= (x+1)<<rem_depth;
1656 int try= (y+1)<<rem_depth;
1657 BlockNode *left = x ? &s->block[index-1] : &null_block;
1658 BlockNode *top = y ? &s->block[index-w] : &null_block;
1659 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1660 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1661 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1662 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1663 int pl = left->color[0];
1664 int pcb= left->color[1];
1665 int pcr= left->color[2];
1666 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1667 int pmy= mid_pred(left->my, top->my, tr->my);
1668 int mx=0, my=0;
1669 int l,cr,cb, i;
1670 const int stride= s->current_picture.linesize[0];
1671 const int uvstride= s->current_picture.linesize[1];
1672 const int instride= s->input_picture.linesize[0];
1673 const int uvinstride= s->input_picture.linesize[1];
1674 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
1675 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1676 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1677 uint8_t current_mb[3][stride*block_w];
1678 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1679 int P[10][2];
1680 int16_t last_mv[3][2];
1681 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1682 const int shift= 1+qpel;
1683 MotionEstContext *c= &s->m.me;
1684 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1685 int my_context= av_log2(2*ABS(left->my - top->my));
1686 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1687
1688 assert(sizeof(s->block_state) >= 256);
1689 if(s->keyframe){
1690 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1691 return 0;
1692 }
1693
1694 //FIXME optimize
1695 for(i=0; i<block_w; i++)
1696 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
1697 for(i=0; i<block_w>>1; i++)
1698 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1699 for(i=0; i<block_w>>1; i++)
1700 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1701
1702 // clip predictors / edge ?
1703
1704 P_LEFT[0]= left->mx;
1705 P_LEFT[1]= left->my;
1706 P_TOP [0]= top->mx;
1707 P_TOP [1]= top->my;
1708 P_TOPRIGHT[0]= tr->mx;
1709 P_TOPRIGHT[1]= tr->my;
1710
1711 last_mv[0][0]= s->block[index].mx;
1712 last_mv[0][1]= s->block[index].my;
1713 last_mv[1][0]= right->mx;
1714 last_mv[1][1]= right->my;
1715 last_mv[2][0]= bottom->mx;
1716 last_mv[2][1]= bottom->my;
1717
1718 s->m.mb_stride=2;
1719 s->m.mb_x=
1720 s->m.mb_y= 0;
1721 s->m.me.skip= 0;
1722
1723 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1724
1725 assert(s->m.me. stride == stride);
1726 assert(s->m.me.uvstride == uvstride);
1727
1728 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1729 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1730 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1731 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1732
1733 c->xmin = - x*block_w - 16;
1734 c->ymin = - y*block_w - 16;
1735 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16;
1736 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16;
1737
1738 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1739 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1740 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1741 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1742 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1743 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1744 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1745
1746 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1747 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1748
1749 if (!y) {
1750 c->pred_x= P_LEFT[0];
1751 c->pred_y= P_LEFT[1];
1752 } else {
1753 c->pred_x = P_MEDIAN[0];
1754 c->pred_y = P_MEDIAN[1];
1755 }
1756
1757 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
1758 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1759
1760 assert(mx >= c->xmin);
1761 assert(mx <= c->xmax);
1762 assert(my >= c->ymin);
1763 assert(my <= c->ymax);
1764
1765 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1766 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1767 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1768
1769 // subpel search
1770 pc= s->c;
1771 init_put_bits(&pc.pb, p_buffer, sizeof(p_buffer));
1772 memcpy(p_state, s->block_state, sizeof(s->block_state));
1773
1774 if(level!=s->block_max_depth)
1775 put_cabac(&pc, &p_state[4 + s_context], 1);
1776 put_cabac(&pc, &p_state[1 + left->type + top->type], 0);
1777 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1778 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1779 p_len= put_bits_count(&pc.pb);
1780 score += (s->lambda2*(p_len + pc.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1781
1782 block_s= block_w*block_w;
1783 sum = pix_sum(&current_mb[0][0], stride, block_w);
1784 l= (sum + block_s/2)/block_s;
1785 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1786
1787 block_s= block_w*block_w>>2;
1788 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1789 cb= (sum + block_s/2)/block_s;
1790 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1791 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1792 cr= (sum + block_s/2)/block_s;
1793 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1794
1795 ic= s->c;
1796 init_put_bits(&ic.pb, i_buffer, sizeof(i_buffer));
1797 memcpy(i_state, s->block_state, sizeof(s->block_state));
1798 if(level!=s->block_max_depth)
1799 put_cabac(&ic, &i_state[4 + s_context], 1);
1800 put_cabac(&ic, &i_state[1 + left->type + top->type], 1);
1801 put_symbol(&ic, &i_state[32], l-pl , 1);
1802 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1803 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1804 i_len= put_bits_count(&ic.pb);
1805 iscore += (s->lambda2*(i_len + ic.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1806
1807 // assert(score==256*256*256*64-1);
1808 assert(iscore < 255*255*256 + s->lambda2*10);
1809 assert(iscore >= 0);
1810 assert(l>=0 && l<=255);
1811 assert(pl>=0 && pl<=255);
1812
1813 if(level==0){
1814 int varc= iscore >> 8;
1815 int vard= score >> 8;
1816 if (vard <= 64 || vard < varc)
1817 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1818 else
1819 c->scene_change_score+= s->m.qscale;
1820 }
1821
1822 if(level!=s->block_max_depth){
1823 put_cabac(&s->c, &s->block_state[4 + s_context], 0);
1824 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1825 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1826 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1827 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1828 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1829
1830 if(score2 < score && score2 < iscore)
1831 return score2;
1832 }
1833
1834 if(iscore < score){
1835 flush_put_bits(&ic.pb);
1836 ff_copy_bits(&pbbak, i_buffer, i_len);
1837 s->c= ic;
1838 s->c.pb= pbbak;
1839 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1840 memcpy(s->block_state, i_state, sizeof(s->block_state));
1841 return iscore;
1842 }else{
1843 flush_put_bits(&pc.pb);
1844 ff_copy_bits(&pbbak, p_buffer, p_len);
1845 s->c= pc;
1846 s->c.pb= pbbak;
1847 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1848 memcpy(s->block_state, p_state, sizeof(s->block_state));
1849 return score;
1850 }
1851 }
1852
1853 static void decode_q_branch(SnowContext *s, int level, int x, int y){
1854 const int w= s->b_width << s->block_max_depth;
1855 const int rem_depth= s->block_max_depth - level;
1856 const int index= (x + y*w) << rem_depth;
1857 static BlockNode null_block= { //FIXME add border maybe
1858 .color= {128,128,128},
1859 .mx= 0,
1860 .my= 0,
1861 .type= 0,
1862 .level= 0,
1863 };
1864 int trx= (x+1)<<rem_depth;
1865 BlockNode *left = x ? &s->block[index-1] : &null_block;
1866 BlockNode *top = y ? &s->block[index-w] : &null_block;
1867 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1868 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1869 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1870
1871 if(s->keyframe){
1872 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1873 return;
1874 }
1875
1876 if(level==s->block_max_depth || get_cabac(&s->c, &s->block_state[4 + s_context])){
1877 int type;
1878 int l = left->color[0];
1879 int cb= left->color[1];
1880 int cr= left->color[2];
1881 int mx= mid_pred(left->mx, top->mx, tr->mx);
1882 int my= mid_pred(left->my, top->my, tr->my);
1883 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1884 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1885
1886 type= get_cabac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1887
1888 if(type){
1889 l += get_symbol(&s->c, &s->block_state[32], 1);
1890 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1891 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1892 }else{
1893 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1894 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1895 }
1896 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1897 }else{
1898 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1899 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1900 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1901 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1902 }
1903 }
1904
1905 static void encode_blocks(SnowContext *s){
1906 int x, y;
1907 int w= s->b_width;
1908 int h= s->b_height;
1909
1910 for(y=0; y<h; y++){
1911 for(x=0; x<w; x++){
1912 encode_q_branch(s, 0, x, y);
1913 }
1914 }
1915 }
1916
1917 static void decode_blocks(SnowContext *s){
1918 int x, y;
1919 int w= s->b_width;
1920 int h= s->b_height;
1921
1922 for(y=0; y<h; y++){
1923 for(x=0; x<w; x++){
1924 decode_q_branch(s, 0, x, y);
1925 }
1926 }
1927 }
1928
1929 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1930 int x, y;
1931
1932 for(y=0; y < b_h+5; y++){
1933 for(x=0; x < b_w; x++){
1934 int a0= src[x + y*stride];
1935 int a1= src[x + 1 + y*stride];
1936 int a2= src[x + 2 + y*stride];
1937 int a3= src[x + 3 + y*stride];
1938 int a4= src[x + 4 + y*stride];
1939 int a5= src[x + 5 + y*stride];
1940 // int am= 9*(a1+a2) - (a0+a3);
1941 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1942 // int am= 18*(a2+a3) - 2*(a1+a4);
1943 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1944 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1945
1946 // if(b_w==16) am= 8*(a1+a2);
1947
1948 if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) + am* dx + 128)>>8;
1949 else tmp[x + y*stride]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1950
1951 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
1952 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
1953 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
1954 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1955 }
1956 }
1957 for(y=0; y < b_h; y++){
1958 for(x=0; x < b_w; x++){
1959 int a0= tmp[x + y *stride];
1960 int a1= tmp[x + (y + 1)*stride];
1961 int a2= tmp[x + (y + 2)*stride];
1962 int a3= tmp[x + (y + 3)*stride];
1963 int a4= tmp[x + (y + 4)*stride];
1964 int a5= tmp[x + (y + 5)*stride];
1965 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1966 // int am= 18*(a2+a3) - 2*(a1+a4);
1967 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1968 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1969
1970 // if(b_w==16) am= 8*(a1+a2);
1971
1972 if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) + am* dy + 128)>>8;
1973 else dst[x + y*stride]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1974
1975 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
1976 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
1977 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
1978 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1979 }
1980 }
1981 }
1982
1983 #define mcb(dx,dy,b_w)\
1984 static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
1985 uint8_t tmp[stride*(b_w+5)];\
1986 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1987 }
1988
1989 mcb( 0, 0,16)
1990 mcb( 4, 0,16)
1991 mcb( 8, 0,16)
1992 mcb(12, 0,16)
1993 mcb( 0, 4,16)
1994 mcb( 4, 4,16)
1995 mcb( 8, 4,16)
1996 mcb(12, 4,16)
1997 mcb( 0, 8,16)
1998 mcb( 4, 8,16)
1999 mcb( 8, 8,16)
2000 mcb(12, 8,16)
2001 mcb( 0,12,16)
2002 mcb( 4,12,16)
2003 mcb( 8,12,16)
2004 mcb(12,12,16)
2005
2006 #define mca(dx,dy,b_w)\
2007 static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
2008 uint8_t tmp[stride*(b_w+5)];\
2009 assert(h==b_w);\
2010 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2011 }
2012
2013 mca( 0, 0,16)
2014 mca( 8, 0,16)
2015 mca( 0, 8,16)
2016 mca( 8, 8,16)
2017
2018 static always_inline void add_xblock(SnowContext *s, DWTELEM *dst, uint8_t *src, uint8_t *obmc, int s_x, int s_y, int b_w, int b_h, int mv_x, int mv_y, int w, int h, int dst_stride, int src_stride, int obmc_stride, int mb_type, int add, int color){
2019 uint8_t tmp[src_stride*(b_h+5)]; //FIXME move to context to gurantee alignment
2020 int x,y;
2021
2022 if(s_x<0){
2023 obmc -= s_x;
2024 b_w += s_x;
2025 s_x=0;
2026 }else if(s_x + b_w > w){
2027 b_w = w - s_x;
2028 }
2029 if(s_y<0){
2030 obmc -= s_y*obmc_stride;
2031 b_h += s_y;
2032 s_y=0;
2033 }else if(s_y + b_h> h){
2034 b_h = h - s_y;
2035 }
2036
2037 if(b_w<=0 || b_h<=0) return;
2038
2039 dst += s_x + s_y*dst_stride;
2040
2041 if(mb_type==BLOCK_INTRA){
2042 for(y=0; y < b_h; y++){
2043 for(x=0; x < b_w; x++){
2044 if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * color * (256/OBMC_MAX);
2045 else dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * color * (256/OBMC_MAX);
2046 }
2047 }
2048 }else{
2049 int dx= mv_x&15;
2050 int dy= mv_y&15;
2051 // int dxy= (mv_x&1) + 2*(mv_y&1);
2052
2053 s_x += (mv_x>>4) - 2;
2054 s_y += (mv_y>>4) - 2;
2055 src += s_x + s_y*src_stride;
2056 //use dsputil
2057
2058 if( (unsigned)s_x >= w - b_w - 4
2059 || (unsigned)s_y >= h - b_h - 4){
2060 ff_emulated_edge_mc(tmp + 32, src, src_stride, b_w+5, b_h+5, s_x, s_y, w, h);
2061 src= tmp + 32;
2062 }
2063
2064 assert(mb_type==0);
2065 mc_block(tmp, src, tmp + 64+8, src_stride, b_w, b_h, dx, dy);
2066 for(y=0; y < b_h; y++){
2067 for(x=0; x < b_w; x++){
2068 if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2069 else dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2070 }
2071 }
2072 }
2073 }
2074
2075 static void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){