4758698813398b9ef721396c938c833100fab17b
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 #include "avcodec.h"
20 #include "common.h"
21 #include "dsputil.h"
22 #include "cabac.h"
23
24 #include "mpegvideo.h"
25
26 #undef NDEBUG
27 #include <assert.h>
28
29 #define MAX_DECOMPOSITIONS 8
30 #define MAX_PLANES 4
31 #define DWTELEM int
32 #define QROOT 8
33 #define LOSSLESS_QLOG -128
34
35 static const int8_t quant3[256]={
36 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52 };
53 static const int8_t quant3b[256]={
54 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 };
71 static const int8_t quant5[256]={
72 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
88 };
89 static const int8_t quant7[256]={
90 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
93 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
106 };
107 static const int8_t quant9[256]={
108 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
123 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
124 };
125 static const int8_t quant11[256]={
126 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
142 };
143 static const int8_t quant13[256]={
144 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
145 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
160 };
161
162 #define OBMC_MAX 64
163 #if 0 //64*cubic
164 static const uint8_t obmc32[1024]={
165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
167 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
168 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
169 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
170 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
171 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
172 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
173 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
174 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
175 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
176 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
177 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
178 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
179 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
180 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
182 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
183 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
184 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
185 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
186 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
190 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
191 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
194 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
195 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 //error:0.000022
198 };
199 static const uint8_t obmc16[256]={
200 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
201 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
202 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
203 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
204 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
205 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
206 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
207 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
209 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
210 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
211 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
212 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
213 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216 //error:0.000033
217 };
218 #elif 1 // 64*linear
219 static const uint8_t obmc32[1024]={
220 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
221 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
222 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
223 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
224 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
225 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
226 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
227 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
228 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
229 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
230 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
231 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
232 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
233 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
234 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
235 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
237 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
238 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
239 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
240 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
241 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
242 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
243 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
244 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
245 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
246 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
249 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
250 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
251 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
252 //error:0.000020
253 };
254 static const uint8_t obmc16[256]={
255 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
256 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
257 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
258 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
259 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
260 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
261 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
262 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
264 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
265 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
266 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
267 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
268 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
269 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
270 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
271 //error:0.000015
272 };
273 #else //64*cos
274 static const uint8_t obmc32[1024]={
275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
277 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
278 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
279 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
280 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
281 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
282 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
283 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
284 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
285 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
286 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
287 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
288 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
289 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
290 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
292 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
293 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
294 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
295 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
296 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
300 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
301 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
304 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
305 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
306 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307 //error:0.000022
308 };
309 static const uint8_t obmc16[256]={
310 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
311 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
312 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
313 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
314 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
315 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
316 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
317 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
319 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
320 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
321 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
322 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
323 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326 //error:0.000022
327 };
328 #endif
329
330 //linear *64
331 static const uint8_t obmc8[64]={
332 1, 3, 5, 7, 7, 5, 3, 1,
333 3, 9,15,21,21,15, 9, 3,
334 5,15,25,35,35,25,15, 5,
335 7,21,35,49,49,35,21, 7,
336 7,21,35,49,49,35,21, 7,
337 5,15,25,35,35,25,15, 5,
338 3, 9,15,21,21,15, 9, 3,
339 1, 3, 5, 7, 7, 5, 3, 1,
340 //error:0.000000
341 };
342
343 //linear *64
344 static const uint8_t obmc4[16]={
345 4,12,12, 4,
346 12,36,36,12,
347 12,36,36,12,
348 4,12,12, 4,
349 //error:0.000000
350 };
351
352 static const uint8_t *obmc_tab[4]={
353 obmc32, obmc16, obmc8, obmc4
354 };
355
356 typedef struct BlockNode{
357 int16_t mx;
358 int16_t my;
359 uint8_t color[3];
360 uint8_t type;
361 //#define TYPE_SPLIT 1
362 #define BLOCK_INTRA 1
363 //#define TYPE_NOCOLOR 4
364 uint8_t level; //FIXME merge into type?
365 }BlockNode;
366
367 #define LOG2_MB_SIZE 4
368 #define MB_SIZE (1<<LOG2_MB_SIZE)
369
370 typedef struct SubBand{
371 int level;
372 int stride;
373 int width;
374 int height;
375 int qlog; ///< log(qscale)/log[2^(1/6)]
376 DWTELEM *buf;
377 struct SubBand *parent;
378 uint8_t state[/*7*2*/ 7 + 512][32];
379 }SubBand;
380
381 typedef struct Plane{
382 int width;
383 int height;
384 SubBand band[MAX_DECOMPOSITIONS][4];
385 }Plane;
386
387 typedef struct SnowContext{
388 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
389
390 AVCodecContext *avctx;
391 CABACContext c;
392 DSPContext dsp;
393 AVFrame input_picture;
394 AVFrame current_picture;
395 AVFrame last_picture;
396 AVFrame mconly_picture;
397 // uint8_t q_context[16];
398 uint8_t header_state[32];
399 uint8_t block_state[128 + 32*128];
400 int keyframe;
401 int version;
402 int spatial_decomposition_type;
403 int temporal_decomposition_type;
404 int spatial_decomposition_count;
405 int temporal_decomposition_count;
406 DWTELEM *spatial_dwt_buffer;
407 DWTELEM *pred_buffer;
408 int colorspace_type;
409 int chroma_h_shift;
410 int chroma_v_shift;
411 int spatial_scalability;
412 int qlog;
413 int lambda;
414 int lambda2;
415 int mv_scale;
416 int qbias;
417 #define QBIAS_SHIFT 3
418 int b_width;
419 int b_height;
420 int block_max_depth;
421 Plane plane[MAX_PLANES];
422 BlockNode *block;
423
424 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
425 }SnowContext;
426
427 #define QEXPSHIFT 7 //FIXME try to change this to 0
428 static const uint8_t qexp[8]={
429 128, 140, 152, 166, 181, 197, 215, 235
430 // 64, 70, 76, 83, 91, 99, 108, 117
431 // 32, 35, 38, 41, 45, 49, 54, 59
432 // 16, 17, 19, 21, 23, 25, 27, 29
433 // 8, 9, 10, 10, 11, 12, 13, 15
434 };
435
436 static inline int mirror(int v, int m){
437 if (v<0) return -v;
438 else if(v>m) return 2*m-v;
439 else return v;
440 }
441
442 static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
443 int i;
444
445 if(v){
446 const int a= ABS(v);
447 const int e= av_log2(a);
448 #if 1
449 const int el= FFMIN(e, 10);
450 put_cabac(c, state+0, 0);
451
452 for(i=0; i<el; i++){
453 put_cabac(c, state+1+i, 1); //1..10
454 }
455 for(; i<e; i++){
456 put_cabac(c, state+1+9, 1); //1..10
457 }
458 put_cabac(c, state+1+FFMIN(i,9), 0);
459
460 for(i=e-1; i>=el; i--){
461 put_cabac(c, state+22+9, (a>>i)&1); //22..31
462 }
463 for(; i>=0; i--){
464 put_cabac(c, state+22+i, (a>>i)&1); //22..31
465 }
466
467 if(is_signed)
468 put_cabac(c, state+11 + el, v < 0); //11..21
469 #else
470
471 put_cabac(c, state+0, 0);
472 if(e<=9){
473 for(i=0; i<e; i++){
474 put_cabac(c, state+1+i, 1); //1..10
475 }
476 put_cabac(c, state+1+i, 0);
477
478 for(i=e-1; i>=0; i--){
479 put_cabac(c, state+22+i, (a>>i)&1); //22..31
480 }
481
482 if(is_signed)
483 put_cabac(c, state+11 + e, v < 0); //11..21
484 }else{
485 for(i=0; i<e; i++){
486 put_cabac(c, state+1+FFMIN(i,9), 1); //1..10
487 }
488 put_cabac(c, state+1+FFMIN(i,9), 0);
489
490 for(i=e-1; i>=0; i--){
491 put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
492 }
493
494 if(is_signed)
495 put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
496 }
497 #endif
498 }else{
499 put_cabac(c, state+0, 1);
500 }
501 }
502
503 static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
504 if(get_cabac(c, state+0))
505 return 0;
506 else{
507 int i, e, a, el;
508 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
509 for(e=0; e<10; e++){
510 if(get_cabac(c, state + 1 + e)==0) // 1..10
511 break;
512 }
513 el= e;
514
515 if(e==10){
516 while(get_cabac(c, state + 1 + 9)) //10
517 e++;
518 }
519 a= 1;
520 for(i=e-1; i>=el; i--){
521 a += a + get_cabac(c, state+22+9); //31
522 }
523 for(; i>=0; i--){
524 a += a + get_cabac(c, state+22+i); //22..31
525 }
526
527 if(is_signed && get_cabac(c, state+11 + el)) //11..21
528 return -a;
529 else
530 return a;
531 }
532 }
533
534 static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
535 int i;
536 int r= log2>=0 ? 1<<log2 : 1;
537
538 assert(v>=0);
539 assert(log2>=-4);
540
541 while(v >= r){
542 put_cabac(c, state+4+log2, 1);
543 v -= r;
544 log2++;
545 if(log2>0) r+=r;
546 }
547 put_cabac(c, state+4+log2, 0);
548
549 for(i=log2-1; i>=0; i--){
550 put_cabac(c, state+31-i, (v>>i)&1);
551 }
552 }
553
554 static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
555 int i;
556 int r= log2>=0 ? 1<<log2 : 1;
557 int v=0;
558
559 assert(log2>=-4);
560
561 while(get_cabac(c, state+4+log2)){
562 v+= r;
563 log2++;
564 if(log2>0) r+=r;
565 }
566
567 for(i=log2-1; i>=0; i--){
568 v+= get_cabac(c, state+31-i)<<i;
569 }
570
571 return v;
572 }
573
574 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
575 const int mirror_left= !highpass;
576 const int mirror_right= (width&1) ^ highpass;
577 const int w= (width>>1) - 1 + (highpass & width);
578 int i;
579
580 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
581 if(mirror_left){
582 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
583 dst += dst_step;
584 src += src_step;
585 }
586
587 for(i=0; i<w; i++){
588 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
589 }
590
591 if(mirror_right){
592 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
593 }
594 }
595
596 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
597 const int mirror_left= !highpass;
598 const int mirror_right= (width&1) ^ highpass;
599 const int w= (width>>1) - 1 + (highpass & width);
600 int i;
601
602 if(mirror_left){
603 int r= 3*2*ref[0];
604 r += r>>4;
605 r += r>>8;
606 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
607 dst += dst_step;
608 src += src_step;
609 }
610
611 for(i=0; i<w; i++){
612 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
613 r += r>>4;
614 r += r>>8;
615 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
616 }
617
618 if(mirror_right){
619 int r= 3*2*ref[w*ref_step];
620 r += r>>4;
621 r += r>>8;
622 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
623 }
624 }
625
626
627 static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
628 int x, i;
629
630 for(x=start; x<width; x+=2){
631 int64_t sum=0;
632
633 for(i=0; i<n; i++){
634 int x2= x + 2*i - n + 1;
635 if (x2< 0) x2= -x2;
636 else if(x2>=width) x2= 2*width-x2-2;
637 sum += coeffs[i]*(int64_t)dst[x2];
638 }
639 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
640 else dst[x] += (sum + (1<<shift)/2)>>shift;
641 }
642 }
643
644 static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
645 int x, y, i;
646 for(y=start; y<height; y+=2){
647 for(x=0; x<width; x++){
648 int64_t sum=0;
649
650 for(i=0; i<n; i++){
651 int y2= y + 2*i - n + 1;
652 if (y2< 0) y2= -y2;
653 else if(y2>=height) y2= 2*height-y2-2;
654 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
655 }
656 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
657 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
658 }
659 }
660 }
661
662 #define SCALEX 1
663 #define LX0 0
664 #define LX1 1
665
666 #if 0 // more accurate 9/7
667 #define N1 2
668 #define SHIFT1 14
669 #define COEFFS1 (int[]){-25987,-25987}
670 #define N2 2
671 #define SHIFT2 19
672 #define COEFFS2 (int[]){-27777,-27777}
673 #define N3 2
674 #define SHIFT3 15
675 #define COEFFS3 (int[]){28931,28931}
676 #define N4 2
677 #define SHIFT4 15
678 #define COEFFS4 (int[]){14533,14533}
679 #elif 1 // 13/7 CRF
680 #define N1 4
681 #define SHIFT1 4
682 #define COEFFS1 (int[]){1,-9,-9,1}
683 #define N2 4
684 #define SHIFT2 4
685 #define COEFFS2 (int[]){-1,5,5,-1}
686 #define N3 0
687 #define SHIFT3 1
688 #define COEFFS3 NULL
689 #define N4 0
690 #define SHIFT4 1
691 #define COEFFS4 NULL
692 #elif 1 // 3/5
693 #define LX0 1
694 #define LX1 0
695 #define SCALEX 0.5
696 #define N1 2
697 #define SHIFT1 1
698 #define COEFFS1 (int[]){1,1}
699 #define N2 2
700 #define SHIFT2 2
701 #define COEFFS2 (int[]){-1,-1}
702 #define N3 0
703 #define SHIFT3 0
704 #define COEFFS3 NULL
705 #define N4 0
706 #define SHIFT4 0
707 #define COEFFS4 NULL
708 #elif 1 // 11/5
709 #define N1 0
710 #define SHIFT1 1
711 #define COEFFS1 NULL
712 #define N2 2
713 #define SHIFT2 2
714 #define COEFFS2 (int[]){-1,-1}
715 #define N3 2
716 #define SHIFT3 0
717 #define COEFFS3 (int[]){-1,-1}
718 #define N4 4
719 #define SHIFT4 7
720 #define COEFFS4 (int[]){-5,29,29,-5}
721 #define SCALEX 4
722 #elif 1 // 9/7 CDF
723 #define N1 2
724 #define SHIFT1 7
725 #define COEFFS1 (int[]){-203,-203}
726 #define N2 2
727 #define SHIFT2 12
728 #define COEFFS2 (int[]){-217,-217}
729 #define N3 2
730 #define SHIFT3 7
731 #define COEFFS3 (int[]){113,113}
732 #define N4 2
733 #define SHIFT4 9
734 #define COEFFS4 (int[]){227,227}
735 #define SCALEX 1
736 #elif 1 // 7/5 CDF
737 #define N1 0
738 #define SHIFT1 1
739 #define COEFFS1 NULL
740 #define N2 2
741 #define SHIFT2 2
742 #define COEFFS2 (int[]){-1,-1}
743 #define N3 2
744 #define SHIFT3 0
745 #define COEFFS3 (int[]){-1,-1}
746 #define N4 2
747 #define SHIFT4 4
748 #define COEFFS4 (int[]){3,3}
749 #elif 1 // 9/7 MN
750 #define N1 4
751 #define SHIFT1 4
752 #define COEFFS1 (int[]){1,-9,-9,1}
753 #define N2 2
754 #define SHIFT2 2
755 #define COEFFS2 (int[]){1,1}
756 #define N3 0
757 #define SHIFT3 1
758 #define COEFFS3 NULL
759 #define N4 0
760 #define SHIFT4 1
761 #define COEFFS4 NULL
762 #else // 13/7 CRF
763 #define N1 4
764 #define SHIFT1 4
765 #define COEFFS1 (int[]){1,-9,-9,1}
766 #define N2 4
767 #define SHIFT2 4
768 #define COEFFS2 (int[]){-1,5,5,-1}
769 #define N3 0
770 #define SHIFT3 1
771 #define COEFFS3 NULL
772 #define N4 0
773 #define SHIFT4 1
774 #define COEFFS4 NULL
775 #endif
776 static void horizontal_decomposeX(int *b, int width){
777 int temp[width];
778 const int width2= width>>1;
779 const int w2= (width+1)>>1;
780 int A1,A2,A3,A4, x;
781
782 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
783 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
784 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
785 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
786
787 for(x=0; x<width2; x++){
788 temp[x ]= b[2*x ];
789 temp[x+w2]= b[2*x + 1];
790 }
791 if(width&1)
792 temp[x ]= b[2*x ];
793 memcpy(b, temp, width*sizeof(int));
794 }
795
796 static void horizontal_composeX(int *b, int width){
797 int temp[width];
798 const int width2= width>>1;
799 int A1,A2,A3,A4, x;
800 const int w2= (width+1)>>1;
801
802 memcpy(temp, b, width*sizeof(int));
803 for(x=0; x<width2; x++){
804 b[2*x ]= temp[x ];
805 b[2*x + 1]= temp[x+w2];
806 }
807 if(width&1)
808 b[2*x ]= temp[x ];
809
810 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
811 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
812 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
813 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
814 }
815
816 static void spatial_decomposeX(int *buffer, int width, int height, int stride){
817 int x, y;
818
819 for(y=0; y<height; y++){
820 for(x=0; x<width; x++){
821 buffer[y*stride + x] *= SCALEX;
822 }
823 }
824
825 for(y=0; y<height; y++){
826 horizontal_decomposeX(buffer + y*stride, width);
827 }
828
829 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
830 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
831 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
832 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
833 }
834
835 static void spatial_composeX(int *buffer, int width, int height, int stride){
836 int x, y;
837
838 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
839 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
840 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
841 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
842
843 for(y=0; y<height; y++){
844 horizontal_composeX(buffer + y*stride, width);
845 }
846
847 for(y=0; y<height; y++){
848 for(x=0; x<width; x++){
849 buffer[y*stride + x] /= SCALEX;
850 }
851 }
852 }
853
854 static void horizontal_decompose53i(int *b, int width){
855 int temp[width];
856 const int width2= width>>1;
857 int A1,A2,A3,A4, x;
858 const int w2= (width+1)>>1;
859
860 for(x=0; x<width2; x++){
861 temp[x ]= b[2*x ];
862 temp[x+w2]= b[2*x + 1];
863 }
864 if(width&1)
865 temp[x ]= b[2*x ];
866 #if 0
867 A2= temp[1 ];
868 A4= temp[0 ];
869 A1= temp[0+width2];
870 A1 -= (A2 + A4)>>1;
871 A4 += (A1 + 1)>>1;
872 b[0+width2] = A1;
873 b[0 ] = A4;
874 for(x=1; x+1<width2; x+=2){
875 A3= temp[x+width2];
876 A4= temp[x+1 ];
877 A3 -= (A2 + A4)>>1;
878 A2 += (A1 + A3 + 2)>>2;
879 b[x+width2] = A3;
880 b[x ] = A2;
881
882 A1= temp[x+1+width2];
883 A2= temp[x+2 ];
884 A1 -= (A2 + A4)>>1;
885 A4 += (A1 + A3 + 2)>>2;
886 b[x+1+width2] = A1;
887 b[x+1 ] = A4;
888 }
889 A3= temp[width-1];
890 A3 -= A2;
891 A2 += (A1 + A3 + 2)>>2;
892 b[width -1] = A3;
893 b[width2-1] = A2;
894 #else
895 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
896 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
897 #endif
898 }
899
900 static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
901 int i;
902
903 for(i=0; i<width; i++){
904 b1[i] -= (b0[i] + b2[i])>>1;
905 }
906 }
907
908 static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
909 int i;
910
911 for(i=0; i<width; i++){
912 b1[i] += (b0[i] + b2[i] + 2)>>2;
913 }
914 }
915
916 static void spatial_decompose53i(int *buffer, int width, int height, int stride){
917 int x, y;
918 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
919 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
920
921 for(y=-2; y<height; y+=2){
922 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
923 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
924
925 {START_TIMER
926 if(b1 <= b3) horizontal_decompose53i(b2, width);
927 if(y+2 < height) horizontal_decompose53i(b3, width);
928 STOP_TIMER("horizontal_decompose53i")}
929
930 {START_TIMER
931 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
932 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
933 STOP_TIMER("vertical_decompose53i*")}
934
935 b0=b2;
936 b1=b3;
937 }
938 }
939
940 #define lift5 lift
941 #if 1
942 #define W_AM 3
943 #define W_AO 0
944 #define W_AS 1
945
946 #define W_BM 1
947 #define W_BO 8
948 #define W_BS 4
949
950 #undef lift5
951 #define W_CM 9999
952 #define W_CO 2
953 #define W_CS 2
954
955 #define W_DM 15
956 #define W_DO 16
957 #define W_DS 5
958 #elif 0
959 #define W_AM 55
960 #define W_AO 16
961 #define W_AS 5
962
963 #define W_BM 3
964 #define W_BO 32
965 #define W_BS 6
966
967 #define W_CM 127
968 #define W_CO 64
969 #define W_CS 7
970
971 #define W_DM 7
972 #define W_DO 8
973 #define W_DS 4
974 #elif 0
975 #define W_AM 97
976 #define W_AO 32
977 #define W_AS 6
978
979 #define W_BM 63
980 #define W_BO 512
981 #define W_BS 10
982
983 #define W_CM 13
984 #define W_CO 8
985 #define W_CS 4
986
987 #define W_DM 15
988 #define W_DO 16
989 #define W_DS 5
990
991 #else
992
993 #define W_AM 203
994 #define W_AO 64
995 #define W_AS 7
996
997 #define W_BM 217
998 #define W_BO 2048
999 #define W_BS 12
1000
1001 #define W_CM 113
1002 #define W_CO 64
1003 #define W_CS 7
1004
1005 #define W_DM 227
1006 #define W_DO 128
1007 #define W_DS 9
1008 #endif
1009 static void horizontal_decompose97i(int *b, int width){
1010 int temp[width];
1011 const int w2= (width+1)>>1;
1012
1013 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1014 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1015 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1016 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1017 }
1018
1019
1020 static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
1021 int i;
1022
1023 for(i=0; i<width; i++){
1024 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1025 }
1026 }
1027
1028 static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
1029 int i;
1030
1031 for(i=0; i<width; i++){
1032 #ifdef lift5
1033 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1034 #else
1035 int r= 3*(b0[i] + b2[i]);
1036 r+= r>>4;
1037 r+= r>>8;
1038 b1[i] += (r+W_CO)>>W_CS;
1039 #endif
1040 }
1041 }
1042
1043 static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1044 int i;
1045
1046 for(i=0; i<width; i++){
1047 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1048 }
1049 }
1050
1051 static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1052 int i;
1053
1054 for(i=0; i<width; i++){
1055 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1056 }
1057 }
1058
1059 static void spatial_decompose97i(int *buffer, int width, int height, int stride){
1060 int x, y;
1061 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1062 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1063 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1064 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1065
1066 for(y=-4; y<height; y+=2){
1067 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1068 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1069
1070 {START_TIMER
1071 if(b3 <= b5) horizontal_decompose97i(b4, width);
1072 if(y+4 < height) horizontal_decompose97i(b5, width);
1073 if(width>400){
1074 STOP_TIMER("horizontal_decompose97i")
1075 }}
1076
1077 {START_TIMER
1078 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1079 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1080 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1081 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1082
1083 if(width>400){
1084 STOP_TIMER("vertical_decompose97i")
1085 }}
1086
1087 b0=b2;
1088 b1=b3;
1089 b2=b4;
1090 b3=b5;
1091 }
1092 }
1093
1094 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1095 int level;
1096
1097 for(level=0; level<decomposition_count; level++){
1098 switch(type){
1099 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1100 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1101 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1102 }
1103 }
1104 }
1105
1106 static void horizontal_compose53i(int *b, int width){
1107 int temp[width];
1108 const int width2= width>>1;
1109 const int w2= (width+1)>>1;
1110 int A1,A2,A3,A4, x;
1111
1112 #if 0
1113 A2= temp[1 ];
1114 A4= temp[0 ];
1115 A1= temp[0+width2];
1116 A1 -= (A2 + A4)>>1;
1117 A4 += (A1 + 1)>>1;
1118 b[0+width2] = A1;
1119 b[0 ] = A4;
1120 for(x=1; x+1<width2; x+=2){
1121 A3= temp[x+width2];
1122 A4= temp[x+1 ];
1123 A3 -= (A2 + A4)>>1;
1124 A2 += (A1 + A3 + 2)>>2;
1125 b[x+width2] = A3;
1126 b[x ] = A2;
1127
1128 A1= temp[x+1+width2];
1129 A2= temp[x+2 ];
1130 A1 -= (A2 + A4)>>1;
1131 A4 += (A1 + A3 + 2)>>2;
1132 b[x+1+width2] = A1;
1133 b[x+1 ] = A4;
1134 }
1135 A3= temp[width-1];
1136 A3 -= A2;
1137 A2 += (A1 + A3 + 2)>>2;
1138 b[width -1] = A3;
1139 b[width2-1] = A2;
1140 #else
1141 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1142 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1143 #endif
1144 for(x=0; x<width2; x++){
1145 b[2*x ]= temp[x ];
1146 b[2*x + 1]= temp[x+w2];
1147 }
1148 if(width&1)
1149 b[2*x ]= temp[x ];
1150 }
1151
1152 static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1153 int i;
1154
1155 for(i=0; i<width; i++){
1156 b1[i] += (b0[i] + b2[i])>>1;
1157 }
1158 }
1159
1160 static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1161 int i;
1162
1163 for(i=0; i<width; i++){
1164 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1165 }
1166 }
1167
1168 static void spatial_compose53i(int *buffer, int width, int height, int stride){
1169 int x, y;
1170 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1171 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride;
1172
1173 for(y=-1; y<=height; y+=2){
1174 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1175 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1176
1177 {START_TIMER
1178 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1179 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1180 STOP_TIMER("vertical_compose53i*")}
1181
1182 {START_TIMER
1183 if(y-1 >= 0) horizontal_compose53i(b0, width);
1184 if(b0 <= b2) horizontal_compose53i(b1, width);
1185 STOP_TIMER("horizontal_compose53i")}
1186
1187 b0=b2;
1188 b1=b3;
1189 }
1190 }
1191
1192
1193 static void horizontal_compose97i(int *b, int width){
1194 int temp[width];
1195 const int w2= (width+1)>>1;
1196
1197 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1198 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1199 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1200 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1201 }
1202
1203 static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1204 int i;
1205
1206 for(i=0; i<width; i++){
1207 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1208 }
1209 }
1210
1211 static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1212 int i;
1213
1214 for(i=0; i<width; i++){
1215 #ifdef lift5
1216 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1217 #else
1218 int r= 3*(b0[i] + b2[i]);
1219 r+= r>>4;
1220 r+= r>>8;
1221 b1[i] -= (r+W_CO)>>W_CS;
1222 #endif
1223 }
1224 }
1225
1226 static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1227 int i;
1228
1229 for(i=0; i<width; i++){
1230 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1231 }
1232 }
1233
1234 static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1235 int i;
1236
1237 for(i=0; i<width; i++){
1238 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1239 }
1240 }
1241
1242 static void spatial_compose97i(int *buffer, int width, int height, int stride){
1243 int x, y;
1244 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1245 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride;
1246 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1247 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1248
1249 for(y=-3; y<=height; y+=2){
1250 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1251 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1252
1253 if(stride == width && y+4 < height && 0){
1254 int x;
1255 for(x=0; x<width/2; x++)
1256 b5[x] += 64*2;
1257 for(; x<width; x++)
1258 b5[x] += 169*2;
1259 }
1260
1261 {START_TIMER
1262 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1263 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1264 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1265 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1266 if(width>400){
1267 STOP_TIMER("vertical_compose97i")}}
1268
1269 {START_TIMER
1270 if(y-1>= 0) horizontal_compose97i(b0, width);
1271 if(b0 <= b2) horizontal_compose97i(b1, width);
1272 if(width>400 && b0 <= b2){
1273 STOP_TIMER("horizontal_compose97i")}}
1274
1275 b0=b2;
1276 b1=b3;
1277 b2=b4;
1278 b3=b5;
1279 }
1280 }
1281
1282 void ff_spatial_idwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1283 int level;
1284
1285 for(level=decomposition_count-1; level>=0; level--){
1286 switch(type){
1287 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1288 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1289 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break;
1290 }
1291 }
1292 }
1293
1294 static const int hilbert[16][2]={
1295 {0,0}, {1,0}, {1,1}, {0,1},
1296 {0,2}, {0,3}, {1,3}, {1,2},
1297 {2,2}, {2,3}, {3,3}, {3,2},
1298 {3,1}, {2,1}, {2,0}, {3,0},
1299 };
1300 #if 0
1301 -o o-
1302 | |
1303 o-o
1304
1305 -o-o o-o-
1306 | |
1307 o-o o-o
1308 | |
1309 o o-o o
1310 | | | |
1311 o-o o-o
1312
1313 0112122312232334122323342334
1314 0123456789ABCDEF0123456789AB
1315 RLLRMRRLLRRMRLLMLRRLMLLRRLLM
1316
1317 4 B F 14 1B
1318 4 11 15 20 27
1319
1320 -o o-o-o o-o-o o-
1321 | | | | | |
1322 o-o o-o o-o o-o
1323 | |
1324 o-o o-o o-o o-o
1325 | | | | | |
1326 o o-o-o o-o-o o
1327 | |
1328 o-o o-o-o-o o-o
1329 | | | |
1330 o-o o-o o-o o-o
1331 | | | |
1332 o o-o o o o-o o
1333 | | | | | | | |
1334 o-o o-o o-o o-o
1335
1336 #endif
1337
1338 #define SVI(a, i, x, y) \
1339 {\
1340 a[i][0]= x;\
1341 a[i][1]= y;\
1342 i++;\
1343 }
1344
1345 static int sig_cmp(const void *a, const void *b){
1346 const int16_t* da = (const int16_t *) a;
1347 const int16_t* db = (const int16_t *) b;
1348
1349 if(da[1] != db[1]) return da[1] - db[1];
1350 else return da[0] - db[0];
1351 }
1352
1353 static int deint(unsigned int a){
1354 a &= 0x55555555; //0 1 2 3 4 5 6 7 8 9 A B C D E F
1355 a += a & 0x11111111; // 01 23 45 67 89 AB CD EF
1356 a += 3*(a & 0x0F0F0F0F);// 0123 4567 89AB CDEF
1357 a += 15*(a & 0x00FF00FF);// 01234567 89ABCDEF
1358 a +=255*(a & 0x0000FFFF);// 0123456789ABCDEF
1359 return a>>15;
1360 }
1361
1362 static void encode_subband_z0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1363 const int level= b->level;
1364 const int w= b->width;
1365 const int h= b->height;
1366 int x, y, pos;
1367
1368 if(1){
1369 int run=0;
1370 int runs[w*h];
1371 int run_index=0;
1372 int count=0;
1373
1374 for(pos=0; ; pos++){
1375 int x= deint(pos );
1376 int y= deint(pos>>1);
1377 int v, p=0, pr=0, pd=0;
1378 int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
1379
1380 if(x>=w || y>=h){
1381 if(x>=w && y>=h)
1382 break;
1383 continue;
1384 }
1385 count++;
1386
1387 v= src[x + y*stride];
1388
1389 if(y){
1390 t= src[x + (y-1)*stride];
1391 if(x){
1392 lt= src[x - 1 + (y-1)*stride];
1393 }
1394 if(x + 1 < w){
1395 /*rt= src[x + 1 + (y-1)*stride]*/;
1396 }
1397 }
1398 if(x){
1399 l= src[x - 1 + y*stride];
1400 /*if(x > 1){
1401 if(orientation==1) ll= src[y + (x-2)*stride];
1402 else ll= src[x - 2 + y*stride];
1403 }*/
1404 }
1405 if(parent){
1406 int px= x>>1;
1407 int py= y>>1;
1408 if(px<b->parent->width && py<b->parent->height){
1409 p= parent[px + py*2*stride];
1410 /*if(px+1<b->parent->width)
1411 pr= parent[px + 1 + py*2*stride];
1412 if(py+1<b->parent->height)
1413 pd= parent[px + (py+1)*2*stride];*/
1414 }
1415 }
1416 if(!(/*ll|*/l|lt|t|/*rt|*/p)){
1417 if(v){
1418 runs[run_index++]= run;
1419 run=0;
1420 }else{
1421 run++;
1422 }
1423 }
1424 }
1425 assert(count==w*h);
1426 runs[run_index++]= run;
1427 run_index=0;
1428 run= runs[run_index++];
1429
1430 put_symbol(&s->c, b->state[1], run, 0);
1431
1432 for(pos=0; ; pos++){
1433 int x= deint(pos );
1434 int y= deint(pos>>1);
1435 int v, p=0, pr=0, pd=0;
1436 int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
1437
1438 if(x>=w || y>=h){
1439 if(x>=w && y>=h)
1440 break;
1441 continue;
1442 }
1443 v= src[x + y*stride];
1444
1445 if(y){
1446 t= src[x + (y-1)*stride];
1447 if(x){
1448 lt= src[x - 1 + (y-1)*stride];
1449 }
1450 if(x + 1 < w){
1451 // rt= src[x + 1 + (y-1)*stride];
1452 }
1453 }
1454 if(x){
1455 l= src[x - 1 + y*stride];
1456 /*if(x > 1){
1457 if(orientation==1) ll= src[y + (x-2)*stride];
1458 else ll= src[x - 2 + y*stride];
1459 }*/
1460 }
1461
1462 if(parent){
1463 int px= x>>1;
1464 int py= y>>1;
1465 if(px<b->parent->width && py<b->parent->height){
1466 p= parent[px + py*2*stride];
1467 /* if(px+1<b->parent->width)
1468 pr= parent[px + 1 + py*2*stride];
1469 if(py+1<b->parent->height)
1470 pd= parent[px + (py+1)*2*stride];*/
1471 }
1472 }
1473 if(/*ll|*/l|lt|t|/*rt|*/p){
1474 int context= av_log2(/*ABS(ll) + */2*(3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p)));
1475
1476 put_cabac(&s->c, &b->state[0][context], !!v);
1477 }else{
1478 if(!run){
1479 run= runs[run_index++];
1480 put_symbol(&s->c, b->state[1], run, 0);
1481 assert(v);
1482 }else{
1483 run--;
1484 assert(!v);
1485 }
1486 }
1487 if(v){
1488 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p));
1489
1490 put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
1491 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1492 }
1493 }
1494 }
1495 }
1496
1497 static void encode_subband_bp(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1498 const int level= b->level;
1499 const int w= b->width;
1500 const int h= b->height;
1501 int x, y;
1502
1503 #if 0
1504 int plane;
1505 for(plane=24; plane>=0; plane--){
1506 int run=0;
1507 int runs[w*h];
1508 int run_index=0;
1509
1510 for(y=0; y<h; y++){
1511 for(x=0; x<w; x++){
1512 int v, lv, p=0;
1513 int d=0, r=0, rd=0, ld=0;
1514 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1515 v= src[x + y*stride];
1516
1517 if(y){
1518 t= src[x + (y-1)*stride];
1519 if(x){
1520 lt= src[x - 1 + (y-1)*stride];
1521 }
1522 if(x + 1 < w){
1523 rt= src[x + 1 + (y-1)*stride];
1524 }
1525 }
1526 if(x){
1527 l= src[x - 1 + y*stride];
1528 /*if(x > 1){
1529 if(orientation==1) ll= src[y + (x-2)*stride];
1530 else ll= src[x - 2 + y*stride];
1531 }*/
1532 }
1533 if(y+1<h){
1534 d= src[x + (y+1)*stride];
1535 if(x) ld= src[x - 1 + (y+1)*stride];
1536 if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1537 }
1538 if(x + 1 < w)
1539 r= src[x + 1 + y*stride];
1540 if(parent){
1541 int px= x>>1;
1542 int py= y>>1;
1543 if(px<b->parent->width && py<b->parent->height)
1544 p= parent[px + py*2*stride];
1545 }
1546 #define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
1547 lv=v;
1548 HIDE( v, plane)
1549 HIDE(lv, plane+1)
1550 HIDE( p, plane)
1551 HIDE( l, plane)
1552 HIDE(lt, plane)
1553 HIDE( t, plane)
1554 HIDE(rt, plane)
1555 HIDE( r, plane+1)
1556 HIDE(rd, plane+1)
1557 HIDE( d, plane+1)
1558 HIDE(ld, plane+1)
1559 if(!(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv)){
1560 if(v){
1561 runs[run_index++]= run;
1562 run=0;
1563 }else{
1564 run++;
1565 }
1566 }
1567 }
1568 }
1569 runs[run_index++]= run;
1570 run_index=0;
1571 run= runs[run_index++];
1572
1573 put_symbol(&s->c, b->state[1], run, 0);
1574
1575 for(y=0; y<h; y++){
1576 for(x=0; x<w; x++){
1577 int v, p=0, lv;
1578 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1579 int d=0, r=0, rd=0, ld=0;
1580 v= src[x + y*stride];
1581
1582 if(y){
1583 t= src[x + (y-1)*stride];
1584 if(x){
1585 lt= src[x - 1 + (y-1)*stride];
1586 }
1587 if(x + 1 < w){
1588 rt= src[x + 1 + (y-1)*stride];
1589 }
1590 }
1591 if(x){
1592 l= src[x - 1 + y*stride];
1593 /*if(x > 1){
1594 if(orientation==1) ll= src[y + (x-2)*stride];
1595 else ll= src[x - 2 + y*stride];
1596 }*/
1597 }
1598 if(y+1<h){
1599 d= src[x + (y+1)*stride];
1600 if(x) ld= src[x - 1 + (y+1)*stride];
1601 if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1602 }
1603 if(x + 1 < w)
1604 r= src[x + 1 + y*stride];
1605
1606 if(parent){
1607 int px= x>>1;
1608 int py= y>>1;
1609 if(px<b->parent->width && py<b->parent->height)
1610 p= parent[px + py*2*stride];
1611 }
1612 lv=v;
1613 HIDE( v, plane)
1614 HIDE(lv, plane+1)
1615 HIDE( p, plane)
1616 HIDE( l, plane)
1617 HIDE(lt, plane)
1618 HIDE( t, plane)
1619 HIDE(rt, plane)
1620 HIDE( r, plane+1)
1621 HIDE(rd, plane+1)
1622 HIDE( d, plane+1)
1623 HIDE(ld, plane+1)
1624 if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
1625 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
1626 +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
1627
1628 if(lv) put_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)], !!(v-lv));
1629 else put_cabac(&s->c, &b->state[ 0][context], !!v);
1630 }else{
1631 assert(!lv);
1632 if(!run){
1633 run= runs[run_index++];
1634 put_symbol(&s->c, b->state[1], run, 0);
1635 assert(v);
1636 }else{
1637 run--;
1638 assert(!v);
1639 }
1640 }
1641 if(v && !lv){
1642 int context= clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
1643 + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
1644 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + context], v<0);
1645 }
1646 }
1647 }
1648 }
1649 return;
1650 #endif
1651 }
1652
1653 static void encode_subband_X(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1654 const int level= b->level;
1655 const int w= b->width;
1656 const int h= b->height;
1657 int x, y;
1658
1659 #if 0
1660 if(orientation==3 && parent && 0){
1661 int16_t candidate[w*h][2];
1662 uint8_t state[w*h];
1663 int16_t boarder[3][w*h*4][2];
1664 int16_t significant[w*h][2];
1665 int candidate_count=0;
1666 int boarder_count[3]={0,0,0};
1667 int significant_count=0;
1668 int rle_pos=0;
1669 int v, last_v;
1670 int primary= orientation==1;
1671
1672 memset(candidate, 0, sizeof(candidate));
1673 memset(state, 0, sizeof(state));
1674 memset(boarder, 0, sizeof(boarder));
1675
1676 for(y=0; y<h; y++){
1677 for(x=0; x<w; x++){
1678 if(parent[(x>>1) + (y>>1)*2*stride])
1679 SVI(candidate, candidate_count, x, y)
1680 }
1681 }
1682
1683 for(;;){
1684 while(candidate_count && !boarder_count[0] && !boarder_count[1] && !boarder_count[2]){
1685 candidate_count--;
1686 x= candidate[ candidate_count][0];
1687 y= candidate[ candidate_count][1];
1688 if(state[x + y*w])
1689 continue;
1690 state[x + y*w]= 1;
1691 v= !!src[x + y*stride];
1692 put_cabac(&s->c, &b->state[0][0], v);
1693 if(v){
1694 SVI(significant, significant_count, x,y)
1695 if(x && !state[x - 1 + y *w]) SVI(boarder[0],boarder_count[0],x-1,y )
1696 if(y && !state[x + (y-1)*w]) SVI(boarder[1],boarder_count[1],x ,y-1)
1697 if(x+1<w && !state[x + 1 + y *w]) SVI(boarder[0],boarder_count[0],x+1,y )
1698 if(y+1<h && !state[x + (y+1)*w]) SVI(boarder[1],boarder_count[1],x ,y+1)
1699 if(x && y && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1700 if(x && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1701 if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1702 if(x+1<w && y && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1703 }
1704 }
1705 while(!boarder_count[0] && !boarder_count[1] && !boarder_count[2] && rle_pos < w*h){
1706 int run=0;
1707 for(; rle_pos < w*h;){
1708 x= rle_pos % w; //FIXME speed
1709 y= rle_pos / w;
1710 rle_pos++;
1711 if(state[x + y*w])
1712 continue;
1713 state[x + y*w]= 1;
1714 v= !!src[x + y*stride];
1715 if(v){
1716 put_symbol(&s->c, b->state[1], run, 0);
1717 SVI(significant, significant_count, x,y)
1718 if(x && !state[x - 1 + y *w]) SVI(boarder[0],boarder_count[0],x-1,y )
1719 if(y && !state[x + (y-1)*w]) SVI(boarder[1],boarder_count[1],x ,y-1)
1720 if(x+1<w && !state[x + 1 + y *w]) SVI(boarder[0],boarder_count[0],x+1,y )
1721 if(y+1<h && !state[x + (y+1)*w]) SVI(boarder[1],boarder_count[1],x ,y+1)
1722 if(x && y && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1723 if(x && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1724 if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1725 if(x+1<w && y && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1726 break;
1727 //FIXME note only right & down can be boarders
1728 }
1729 run++;
1730 }
1731 }
1732 if(!boarder_count[0] && !boarder_count[1] && !boarder_count[2])
1733 break;
1734
1735 while(boarder_count[0] || boarder_count[1] || boarder_count[2]){
1736 int index;
1737
1738 if (boarder_count[ primary]) index= primary;
1739 else if(boarder_count[1-primary]) index=1-primary;
1740 else index=2;
1741
1742 boarder_count[index]--;
1743 x= boarder[index][ boarder_count[index] ][0];
1744 y= boarder[index][ boarder_count[index] ][1];
1745 if(state[x + y*w]) //FIXME maybe check earlier
1746 continue;
1747 state[x + y*w]= 1;
1748 v= !!src[x + y*stride];
1749 put_cabac(&s->c, &b->state[0][index+1], v);
1750 if(v){
1751 SVI(significant, significant_count, x,y)
1752 if(x && !state[x - 1 + y *w]) SVI(boarder[0],boarder_count[0],x-1,y )
1753 if(y && !state[x + (y-1)*w]) SVI(boarder[1],boarder_count[1],x ,y-1)
1754 if(x+1<w && !state[x + 1 + y *w]) SVI(boarder[0],boarder_count[0],x+1,y )
1755 if(y+1<h && !state[x + (y+1)*w]) SVI(boarder[1],boarder_count[1],x ,y+1)
1756 if(x && y && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1757 if(x && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1758 if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1759 if(x+1<w && y && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1760 }
1761 }
1762 }
1763 //FIXME sort significant coeffs maybe
1764 if(1){
1765 qsort(significant, significant_count, sizeof(int16_t[2]), sig_cmp);
1766 }
1767
1768 last_v=1;
1769 while(significant_count){
1770 int context= 3 + quant7[last_v&0xFF]; //use significance of suroundings
1771 significant_count--;
1772 x= significant[significant_count][0];//FIXME try opposit direction
1773 y= significant[significant_count][1];
1774 v= src[x + y*stride];
1775 put_symbol(&s->c, b->state[context + 2], v, 1); //FIXME try to avoid first bit, try this with the old code too!!
1776 last_v= v;
1777 }
1778 }
1779 #endif
1780 }
1781
1782 static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1783 const int level= b->level;
1784 const int w= b->width;
1785 const int h= b->height;
1786 int x, y;
1787
1788 if(1){
1789 int run=0;
1790 int runs[w*h];
1791 int run_index=0;
1792
1793 for(y=0; y<h; y++){
1794 for(x=0; x<w; x++){
1795 int v, p=0;
1796 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1797 v= src[x + y*stride];
1798
1799 if(y){
1800 t= src[x + (y-1)*stride];
1801 if(x){
1802 lt= src[x - 1 + (y-1)*stride];
1803 }
1804 if(x + 1 < w){
1805 rt= src[x + 1 + (y-1)*stride];
1806 }
1807 }
1808 if(x){
1809 l= src[x - 1 + y*stride];
1810 /*if(x > 1){
1811 if(orientation==1) ll= src[y + (x-2)*stride];
1812 else ll= src[x - 2 + y*stride];
1813 }*/
1814 }
1815 if(parent){
1816 int px= x>>1;
1817 int py= y>>1;
1818 if(px<b->parent->width && py<b->parent->height)
1819 p= parent[px + py*2*stride];
1820 }
1821 if(!(/*ll|*/l|lt|t|rt|p)){
1822 if(v){
1823 runs[run_index++]= run;
1824 run=0;
1825 }else{
1826 run++;
1827 }
1828 }
1829 }
1830 }
1831 runs[run_index++]= run;
1832 run_index=0;
1833 run= runs[run_index++];
1834
1835 put_symbol2(&s->c, b->state[1], run, 3);
1836
1837 for(y=0; y<h; y++){
1838 for(x=0; x<w; x++){
1839 int v, p=0;
1840 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1841 v= src[x + y*stride];
1842
1843 if(y){
1844 t= src[x + (y-1)*stride];
1845 if(x){
1846 lt= src[x - 1 + (y-1)*stride];
1847 }
1848 if(x + 1 < w){
1849 rt= src[x + 1 + (y-1)*stride];
1850 }
1851 }
1852 if(x){
1853 l= src[x - 1 + y*stride];
1854 /*if(x > 1){
1855 if(orientation==1) ll= src[y + (x-2)*stride];
1856 else ll= src[x - 2 + y*stride];
1857 }*/
1858 }
1859 if(parent){
1860 int px= x>>1;
1861 int py= y>>1;
1862 if(px<b->parent->width && py<b->parent->height)
1863 p= parent[px + py*2*stride];
1864 }
1865 if(/*ll|*/l|lt|t|rt|p){
1866 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1867
1868 put_cabac(&s->c, &b->state[0][context], !!v);
1869 }else{
1870 if(!run){
1871 run= runs[run_index++];
1872
1873 put_symbol2(&s->c, b->state[1], run, 3);
1874 assert(v);
1875 }else{
1876 run--;
1877 assert(!v);
1878 }
1879 }
1880 if(v){
1881 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1882
1883 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1884 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1885 }
1886 }
1887 }
1888 }
1889 }
1890
1891 static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1892 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1893 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1894 encode_subband_c0run(s, b, src, parent, stride, orientation);
1895 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1896 }
1897
1898 static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1899 const int level= b->level;
1900 const int w= b->width;
1901 const int h= b->height;
1902 int x,y;
1903
1904 START_TIMER
1905 #if 0
1906 for(y=0; y<b->height; y++)
1907 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1908
1909 int plane;
1910 for(plane=24; plane>=0; plane--){
1911 int run;
1912
1913 run= get_symbol(&s->c, b->state[1], 0);
1914
1915 #define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
1916
1917 for(y=0; y<h; y++){
1918 for(x=0; x<w; x++){
1919 int v, p=0, lv;
1920 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1921 int d=0, r=0, rd=0, ld=0;
1922 lv= src[x + y*stride];
1923
1924 if(y){
1925 t= src[x + (y-1)*stride];
1926 if(x){
1927 lt= src[x - 1 + (y-1)*stride];
1928 }
1929 if(x + 1 < w){
1930 rt= src[x + 1 + (y-1)*stride];
1931 }
1932 }
1933 if(x){
1934 l= src[x - 1 + y*stride];
1935 /*if(x > 1){
1936 if(orientation==1) ll= src[y + (x-2)*stride];
1937 else ll= src[x - 2 + y*stride];
1938 }*/
1939 }
1940 if(y+1<h){
1941 d= src[x + (y+1)*stride];
1942 if(x) ld= src[x - 1 + (y+1)*stride];
1943 if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1944 }
1945 if(x + 1 < w)
1946 r= src[x + 1 + y*stride];
1947
1948 if(parent){
1949 int px= x>>1;
1950 int py= y>>1;
1951 if(px<b->parent->width && py<b->parent->height)
1952 p= parent[px + py*2*stride];
1953 }
1954 HIDE( p, plane)
1955 if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
1956 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
1957 +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
1958
1959 if(lv){
1960 assert(context + 8*av_log2(ABS(lv)) < 512 - 100);
1961 if(get_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)])){
1962 if(lv<0) v= lv - (1<<plane);
1963 else v= lv + (1<<plane);
1964 }else
1965 v=lv;
1966 }else{
1967 v= get_cabac(&s->c, &b->state[ 0][context]) << plane;
1968 }
1969 }else{
1970 assert(!lv);
1971 if(!run){
1972 run= get_symbol(&s->c, b->state[1], 0);
1973 v= 1<<plane;
1974 }else{
1975 run--;
1976 v=0;
1977 }
1978 }
1979 if(v && !lv){
1980 int context= clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
1981 + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
1982 if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + context]))
1983 v= -v;
1984 }
1985 src[x + y*stride]= v;
1986 }
1987 }
1988 }
1989 return;
1990 #endif
1991 if(1){
1992 int run;
1993
1994 for(y=0; y<b->height; y++)
1995 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1996
1997 run= get_symbol2(&s->c, b->state[1], 3);
1998 for(y=0; y<h; y++){
1999 for(x=0; x<w; x++){
2000 int v, p=0;
2001 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2002
2003 if(y){
2004 t= src[x + (y-1)*stride];
2005 if(x){
2006 lt= src[x - 1 + (y-1)*stride];
2007 }
2008 if(x + 1 < w){
2009 rt= src[x + 1 + (y-1)*stride];
2010 }
2011 }
2012 if(x){
2013 l= src[x - 1 + y*stride];
2014 /*if(x > 1){
2015 if(orientation==1) ll= src[y + (x-2)*stride];
2016 else ll= src[x - 2 + y*stride];
2017 }*/
2018 }
2019 if(parent){
2020 int px= x>>1;
2021 int py= y>>1;
2022 if(px<b->parent->width && py<b->parent->height)
2023 p= parent[px + py*2*stride];
2024 }
2025 if(/*ll|*/l|lt|t|rt|p){
2026 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
2027
2028 v=get_cabac(&s->c, &b->state[0][context]);
2029 }else{
2030 if(!run){
2031 run= get_symbol2(&s->c, b->state[1], 3);
2032 //FIXME optimize this here
2033 //FIXME try to store a more naive run
2034 v=1;
2035 }else{
2036 run--;
2037 v=0;
2038 }
2039 }
2040 if(v){
2041 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
2042 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
2043 if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
2044 v= -v;
2045 src[x + y*stride]= v;
2046 }
2047 }
2048 }
2049 if(level+1 == s->spatial_decomposition_count){
2050 STOP_TIMER("decode_subband")
2051 }
2052
2053 return;
2054 }
2055 }
2056
2057 static void reset_contexts(SnowContext *s){
2058 int plane_index, level, orientation;
2059
2060 for(plane_index=0; plane_index<2; plane_index++){
2061 for(level=0; level<s->spatial_decomposition_count; level++){
2062 for(orientation=level ? 1:0; orientation<4; orientation++){
2063 memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
2064 }
2065 }
2066 }
2067 memset(s->header_state, 0, sizeof(s->header_state));
2068 memset(s->block_state, 0, sizeof(s->block_state));
2069 }
2070
2071 static int alloc_blocks(SnowContext *s){
2072 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
2073 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
2074
2075 s->b_width = w;
2076 s->b_height= h;
2077
2078 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
2079 return 0;
2080 }
2081
2082 static inline void copy_cabac_state(CABACContext *d, CABACContext *s){
2083 PutBitContext bak= d->pb;
2084 *d= *s;
2085 d->pb= bak;
2086 }
2087
2088 //near copy & paste from dsputil, FIXME
2089 static int pix_sum(uint8_t * pix, int line_size, int w)
2090 {
2091 int s, i, j;
2092
2093 s = 0;
2094 for (i = 0; i < w; i++) {
2095 for (j = 0; j < w; j++) {
2096 s += pix[0];
2097 pix ++;
2098 }
2099 pix += line_size - w;
2100 }
2101 return s;
2102 }
2103
2104 //near copy & paste from dsputil, FIXME
2105 static int pix_norm1(uint8_t * pix, int line_size, int w)
2106 {
2107 int s, i, j;
2108 uint32_t *sq = squareTbl + 256;
2109
2110 s = 0;
2111 for (i = 0; i < w; i++) {
2112 for (j = 0; j < w; j ++) {
2113 s += sq[pix[0]];
2114 pix ++;
2115 }
2116 pix += line_size - w;
2117 }
2118 return s;
2119 }
2120
2121 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
2122 const int w= s->b_width << s->block_max_depth;
2123 const int rem_depth= s->block_max_depth - level;
2124 const int index= (x + y*w) << rem_depth;
2125 const int block_w= 1<<rem_depth;
2126 BlockNode block;
2127 int i,j;
2128
2129 block.color[0]= l;
2130 block.color[1]= cb;
2131 block.color[2]= cr;
2132 block.mx= mx;
2133 block.my= my;
2134 block.type= type;
2135 block.level= level;
2136
2137 for(j=0; j<block_w; j++){
2138 for(i=0; i<block_w; i++){
2139 s->block[index + i + j*w]= block;
2140 }
2141 }
2142 }
2143
2144 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2145 const int offset[3]= {
2146 y*c-> stride + x,
2147 ((y*c->uvstride + x)>>1),
2148 ((y*c->uvstride + x)>>1),
2149 };
2150 int i;
2151 for(i=0; i<3; i++){
2152 c->src[0][i]= src [i];
2153 c->ref[0][i]= ref [i] + offset[i];
2154 }
2155 assert(!ref_index);
2156 }
2157
2158 //FIXME copy&paste
2159 #define P_LEFT P[1]
2160 #define P_TOP P[2]
2161 #define P_TOPRIGHT P[3]
2162 #define P_MEDIAN P[4]
2163 #define P_MV1 P[9]
2164 #define FLAG_QPEL 1 //must be 1
2165
2166 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2167 uint8_t p_buffer[1024];
2168 uint8_t i_buffer[1024];
2169 uint8_t p_state[sizeof(s->block_state)];
2170 uint8_t i_state[sizeof(s->block_state)];
2171 CABACContext pc, ic;
2172 PutBitContext pbbak= s->c.pb;
2173 int score, score2, iscore, i_len, p_len, block_s, sum;
2174 const int w= s->b_width << s->block_max_depth;
2175 const int h= s->b_height << s->block_max_depth;
2176 const int rem_depth= s->block_max_depth - level;
2177 const int index= (x + y*w) << rem_depth;
2178 const int block_w= 1<<(LOG2_MB_SIZE - level);
2179 static BlockNode null_block= { //FIXME add border maybe
2180 .color= {128,128,128},
2181 .mx= 0,
2182 .my= 0,
2183 .type= 0,
2184 .level= 0,
2185 };
2186 int trx= (x+1)<<rem_depth;
2187 int try= (y+1)<<rem_depth;
2188 BlockNode *left = x ? &s->block[index-1] : &null_block;
2189 BlockNode *top = y ? &s->block[index-w] : &null_block;
2190 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2191 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2192 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2193 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2194 int pl = left->color[0];
2195 int pcb= left->color[1];
2196 int pcr= left->color[2];
2197 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2198 int pmy= mid_pred(left->my, top->my, tr->my);
2199 int mx=0, my=0;
2200 int l,cr,cb, i;
2201 const int stride= s->current_picture.linesize[0];
2202 const int uvstride= s->current_picture.linesize[1];
2203 const int instride= s->input_picture.linesize[0];
2204 const int uvinstride= s->input_picture.linesize[1];
2205 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
2206 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2207 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2208 uint8_t current_mb[3][stride*block_w];
2209 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2210 int P[10][2];
2211 int16_t last_mv[3][2];
2212 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2213 const int shift= 1+qpel;
2214 MotionEstContext *c= &s->m.me;
2215 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2216 int my_context= av_log2(2*ABS(left->my - top->my));
2217 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2218
2219 assert(sizeof(s->block_state) >= 256);
2220 if(s->keyframe){
2221 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2222 return 0;
2223 }
2224
2225 //FIXME optimize
2226 for(i=0; i<block_w; i++)
2227 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
2228 for(i=0; i<block_w>>1; i++)
2229 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2230 for(i=0; i<block_w>>1; i++)
2231 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2232
2233 // clip predictors / edge ?
2234
2235 P_LEFT[0]= left->mx;
2236 P_LEFT[1]= left->my;
2237 P_TOP [0]= top->mx;
2238 P_TOP [1]= top->my;
2239 P_TOPRIGHT[0]= tr->mx;
2240 P_TOPRIGHT[1]= tr->my;
2241
2242 last_mv[0][0]= s->block[index].mx;
2243 last_mv[0][1]= s->block[index].my;
2244 last_mv[1][0]= right->mx;
2245 last_mv[1][1]= right->my;
2246 last_mv[2][0]= bottom->mx;
2247 last_mv[2][1]= bottom->my;
2248
2249 s->m.mb_stride=2;
2250 s->m.mb_x=
2251 s->m.mb_y= 0;
2252 s->m.me.skip= 0;
2253
2254 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2255
2256 assert(s->m.me. stride == stride);
2257 assert(s->m.me.uvstride == uvstride);
2258
2259 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2260 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2261 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2262 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2263
2264 c->xmin = - x*block_w - 16;
2265 c->ymin = - y*block_w - 16;
2266 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16;
2267 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16;
2268
2269 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2270 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2271 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2272 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2273 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2274 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2275 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2276
2277 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2278 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2279
2280 if (!y) {
2281 c->pred_x= P_LEFT[0];
2282 c->pred_y= P_LEFT[1];
2283 } else {
2284 c->pred_x = P_MEDIAN[0];
2285 c->pred_y = P_MEDIAN[1];
2286 }
2287
2288 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2289 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2290
2291 assert(mx >= c->xmin);
2292 assert(mx <= c->xmax);
2293 assert(my >= c->ymin);
2294 assert(my <= c->ymax);
2295
2296 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2297 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2298 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2299
2300 // subpel search
2301 pc= s->c;
2302 init_put_bits(&pc.pb, p_buffer, sizeof(p_buffer));
2303 memcpy(p_state, s->block_state, sizeof(s->block_state));
2304
2305 if(level!=s->block_max_depth)
2306 put_cabac(&pc, &p_state[4 + s_context], 1);
2307 put_cabac(&pc, &p_state[1 + left->type + top->type], 0);
2308 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2309 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2310 p_len= put_bits_count(&pc.pb);
2311 score += (s->lambda2*(p_len + pc.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
2312
2313 block_s= block_w*block_w;
2314 sum = pix_sum(&current_mb[0][0], stride, block_w);
2315 l= (sum + block_s/2)/block_s;
2316 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2317
2318 block_s= block_w*block_w>>2;
2319 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2320 cb= (sum + block_s/2)/block_s;
2321 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2322 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2323 cr= (sum + block_s/2)/block_s;
2324 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2325
2326 ic= s->c;
2327 init_put_bits(&ic.pb, i_buffer, sizeof(i_buffer));
2328 memcpy(i_state, s->block_state, sizeof(s->block_state));
2329 if(level!=s->block_max_depth)
2330 put_cabac(&ic, &i_state[4 + s_context], 1);
2331 put_cabac(&ic, &i_state[1 + left->type + top->type], 1);
2332 put_symbol(&ic, &i_state[32], l-pl , 1);
2333 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2334 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2335 i_len= put_bits_count(&ic.pb);
2336 iscore += (s->lambda2*(i_len + ic.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
2337
2338 // assert(score==256*256*256*64-1);