slice-based idwt (10% faster decoding)
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 #include "avcodec.h"
20 #include "common.h"
21 #include "dsputil.h"
22
23 #include "rangecoder.h"
24 #define MID_STATE 128
25
26 #include "mpegvideo.h"
27
28 #undef NDEBUG
29 #include <assert.h>
30
31 #define MAX_DECOMPOSITIONS 8
32 #define MAX_PLANES 4
33 #define DWTELEM int
34 #define QROOT 8
35 #define LOSSLESS_QLOG -128
36 #define FRAC_BITS 8
37
38 static const int8_t quant3[256]={
39 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
55 };
56 static const int8_t quant3b[256]={
57 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73 };
74 static const int8_t quant5[256]={
75 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
88 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
89 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
90 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
91 };
92 static const int8_t quant7[256]={
93 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
99 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
100 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
101 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
104 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
105 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
106 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
107 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
109 };
110 static const int8_t quant9[256]={
111 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
117 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
118 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
119 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
123 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
124 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
125 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
126 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
127 };
128 static const int8_t quant11[256]={
129 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
135 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
136 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
137 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
140 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
141 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
142 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
143 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
145 };
146 static const int8_t quant13[256]={
147 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
148 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
153 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
154 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
155 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
157 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
158 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
159 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
160 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
162 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
163 };
164
165 #define LOG2_OBMC_MAX 6
166 #define OBMC_MAX (1<<(LOG2_OBMC_MAX))
167 #if 0 //64*cubic
168 static const uint8_t obmc32[1024]={
169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
171 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
172 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
173 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
174 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
175 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
176 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
177 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
178 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
179 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
180 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
181 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
182 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
184 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
185 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
186 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
187 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
188 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
192 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
193 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
196 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
197 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
198 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
199 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 //error:0.000022
202 };
203 static const uint8_t obmc16[256]={
204 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
205 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
206 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
207 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
208 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
209 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
211 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
212 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
213 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
214 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
215 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
219 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
220 //error:0.000033
221 };
222 #elif 1 // 64*linear
223 static const uint8_t obmc32[1024]={
224 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
225 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
226 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
227 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
228 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
229 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
230 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
231 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
232 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
233 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
234 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
235 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
236 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
237 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
239 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
240 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
241 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
242 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
243 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
244 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
245 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
246 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
247 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
248 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
251 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
252 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
253 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
254 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
255 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
256 //error:0.000020
257 };
258 static const uint8_t obmc16[256]={
259 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
260 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
261 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
262 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
263 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
264 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
266 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
267 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
268 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
269 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
270 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
271 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
272 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
273 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
274 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
275 //error:0.000015
276 };
277 #else //64*cos
278 static const uint8_t obmc32[1024]={
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
282 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
283 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
284 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
285 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
286 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
287 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
288 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
289 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
290 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
291 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
292 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
294 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
295 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
296 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
297 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
298 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
302 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
303 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
306 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
307 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
308 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
309 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311 //error:0.000022
312 };
313 static const uint8_t obmc16[256]={
314 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
315 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
316 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
317 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
318 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
319 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
321 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
322 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
323 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
324 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
325 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
329 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
330 //error:0.000022
331 };
332 #endif
333
334 //linear *64
335 static const uint8_t obmc8[64]={
336 1, 3, 5, 7, 7, 5, 3, 1,
337 3, 9,15,21,21,15, 9, 3,
338 5,15,25,35,35,25,15, 5,
339 7,21,35,49,49,35,21, 7,
340 7,21,35,49,49,35,21, 7,
341 5,15,25,35,35,25,15, 5,
342 3, 9,15,21,21,15, 9, 3,
343 1, 3, 5, 7, 7, 5, 3, 1,
344 //error:0.000000
345 };
346
347 //linear *64
348 static const uint8_t obmc4[16]={
349 4,12,12, 4,
350 12,36,36,12,
351 12,36,36,12,
352 4,12,12, 4,
353 //error:0.000000
354 };
355
356 static const uint8_t *obmc_tab[4]={
357 obmc32, obmc16, obmc8, obmc4
358 };
359
360 typedef struct BlockNode{
361 int16_t mx;
362 int16_t my;
363 uint8_t color[3];
364 uint8_t type;
365 //#define TYPE_SPLIT 1
366 #define BLOCK_INTRA 1
367 //#define TYPE_NOCOLOR 4
368 uint8_t level; //FIXME merge into type?
369 }BlockNode;
370
371 #define LOG2_MB_SIZE 4
372 #define MB_SIZE (1<<LOG2_MB_SIZE)
373
374 typedef struct SubBand{
375 int level;
376 int stride;
377 int width;
378 int height;
379 int qlog; ///< log(qscale)/log[2^(1/6)]
380 DWTELEM *buf;
381 int16_t *x;
382 DWTELEM *coeff;
383 struct SubBand *parent;
384 uint8_t state[/*7*2*/ 7 + 512][32];
385 }SubBand;
386
387 typedef struct Plane{
388 int width;
389 int height;
390 SubBand band[MAX_DECOMPOSITIONS][4];
391 }Plane;
392
393 typedef struct SnowContext{
394 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
395
396 AVCodecContext *avctx;
397 RangeCoder c;
398 DSPContext dsp;
399 AVFrame input_picture;
400 AVFrame current_picture;
401 AVFrame last_picture;
402 AVFrame mconly_picture;
403 // uint8_t q_context[16];
404 uint8_t header_state[32];
405 uint8_t block_state[128 + 32*128];
406 int keyframe;
407 int always_reset;
408 int version;
409 int spatial_decomposition_type;
410 int temporal_decomposition_type;
411 int spatial_decomposition_count;
412 int temporal_decomposition_count;
413 DWTELEM *spatial_dwt_buffer;
414 int colorspace_type;
415 int chroma_h_shift;
416 int chroma_v_shift;
417 int spatial_scalability;
418 int qlog;
419 int lambda;
420 int lambda2;
421 int mv_scale;
422 int qbias;
423 #define QBIAS_SHIFT 3
424 int b_width;
425 int b_height;
426 int block_max_depth;
427 Plane plane[MAX_PLANES];
428 BlockNode *block;
429
430 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
431 }SnowContext;
432
433 typedef struct {
434 DWTELEM *b0;
435 DWTELEM *b1;
436 DWTELEM *b2;
437 DWTELEM *b3;
438 int y;
439 } dwt_compose_t;
440
441 #ifdef __sgi
442 // Avoid a name clash on SGI IRIX
443 #undef qexp
444 #endif
445 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
446 static const uint8_t qexp[8]={
447 128, 140, 152, 166, 181, 197, 215, 235
448 // 64, 70, 76, 83, 91, 99, 108, 117
449 // 32, 35, 38, 41, 45, 49, 54, 59
450 // 16, 17, 19, 21, 23, 25, 27, 29
451 // 8, 9, 10, 10, 11, 12, 13, 15
452 };
453
454 static inline int mirror(int v, int m){
455 if (v<0) return -v;
456 else if(v>m) return 2*m-v;
457 else return v;
458 }
459
460 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
461 int i;
462
463 if(v){
464 const int a= ABS(v);
465 const int e= av_log2(a);
466 #if 1
467 const int el= FFMIN(e, 10);
468 put_rac(c, state+0, 0);
469
470 for(i=0; i<el; i++){
471 put_rac(c, state+1+i, 1); //1..10
472 }
473 for(; i<e; i++){
474 put_rac(c, state+1+9, 1); //1..10
475 }
476 put_rac(c, state+1+FFMIN(i,9), 0);
477
478 for(i=e-1; i>=el; i--){
479 put_rac(c, state+22+9, (a>>i)&1); //22..31
480 }
481 for(; i>=0; i--){
482 put_rac(c, state+22+i, (a>>i)&1); //22..31
483 }
484
485 if(is_signed)
486 put_rac(c, state+11 + el, v < 0); //11..21
487 #else
488
489 put_rac(c, state+0, 0);
490 if(e<=9){
491 for(i=0; i<e; i++){
492 put_rac(c, state+1+i, 1); //1..10
493 }
494 put_rac(c, state+1+i, 0);
495
496 for(i=e-1; i>=0; i--){
497 put_rac(c, state+22+i, (a>>i)&1); //22..31
498 }
499
500 if(is_signed)
501 put_rac(c, state+11 + e, v < 0); //11..21
502 }else{
503 for(i=0; i<e; i++){
504 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
505 }
506 put_rac(c, state+1+FFMIN(i,9), 0);
507
508 for(i=e-1; i>=0; i--){
509 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
510 }
511
512 if(is_signed)
513 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
514 }
515 #endif
516 }else{
517 put_rac(c, state+0, 1);
518 }
519 }
520
521 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
522 if(get_rac(c, state+0))
523 return 0;
524 else{
525 int i, e, a;
526 e= 0;
527 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
528 e++;
529 }
530
531 a= 1;
532 for(i=e-1; i>=0; i--){
533 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
534 }
535
536 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
537 return -a;
538 else
539 return a;
540 }
541 }
542
543 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
544 int i;
545 int r= log2>=0 ? 1<<log2 : 1;
546
547 assert(v>=0);
548 assert(log2>=-4);
549
550 while(v >= r){
551 put_rac(c, state+4+log2, 1);
552 v -= r;
553 log2++;
554 if(log2>0) r+=r;
555 }
556 put_rac(c, state+4+log2, 0);
557
558 for(i=log2-1; i>=0; i--){
559 put_rac(c, state+31-i, (v>>i)&1);
560 }
561 }
562
563 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
564 int i;
565 int r= log2>=0 ? 1<<log2 : 1;
566 int v=0;
567
568 assert(log2>=-4);
569
570 while(get_rac(c, state+4+log2)){
571 v+= r;
572 log2++;
573 if(log2>0) r+=r;
574 }
575
576 for(i=log2-1; i>=0; i--){
577 v+= get_rac(c, state+31-i)<<i;
578 }
579
580 return v;
581 }
582
583 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
584 const int mirror_left= !highpass;
585 const int mirror_right= (width&1) ^ highpass;
586 const int w= (width>>1) - 1 + (highpass & width);
587 int i;
588
589 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
590 if(mirror_left){
591 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
592 dst += dst_step;
593 src += src_step;
594 }
595
596 for(i=0; i<w; i++){
597 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
598 }
599
600 if(mirror_right){
601 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
602 }
603 }
604
605 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
606 const int mirror_left= !highpass;
607 const int mirror_right= (width&1) ^ highpass;
608 const int w= (width>>1) - 1 + (highpass & width);
609 int i;
610
611 if(mirror_left){
612 int r= 3*2*ref[0];
613 r += r>>4;
614 r += r>>8;
615 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
616 dst += dst_step;
617 src += src_step;
618 }
619
620 for(i=0; i<w; i++){
621 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
622 r += r>>4;
623 r += r>>8;
624 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
625 }
626
627 if(mirror_right){
628 int r= 3*2*ref[w*ref_step];
629 r += r>>4;
630 r += r>>8;
631 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
632 }
633 }
634
635
636 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
637 int x, i;
638
639 for(x=start; x<width; x+=2){
640 int64_t sum=0;
641
642 for(i=0; i<n; i++){
643 int x2= x + 2*i - n + 1;
644 if (x2< 0) x2= -x2;
645 else if(x2>=width) x2= 2*width-x2-2;
646 sum += coeffs[i]*(int64_t)dst[x2];
647 }
648 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
649 else dst[x] += (sum + (1<<shift)/2)>>shift;
650 }
651 }
652
653 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
654 int x, y, i;
655 for(y=start; y<height; y+=2){
656 for(x=0; x<width; x++){
657 int64_t sum=0;
658
659 for(i=0; i<n; i++){
660 int y2= y + 2*i - n + 1;
661 if (y2< 0) y2= -y2;
662 else if(y2>=height) y2= 2*height-y2-2;
663 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
664 }
665 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
666 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
667 }
668 }
669 }
670
671 #define SCALEX 1
672 #define LX0 0
673 #define LX1 1
674
675 #if 0 // more accurate 9/7
676 #define N1 2
677 #define SHIFT1 14
678 #define COEFFS1 (int[]){-25987,-25987}
679 #define N2 2
680 #define SHIFT2 19
681 #define COEFFS2 (int[]){-27777,-27777}
682 #define N3 2
683 #define SHIFT3 15
684 #define COEFFS3 (int[]){28931,28931}
685 #define N4 2
686 #define SHIFT4 15
687 #define COEFFS4 (int[]){14533,14533}
688 #elif 1 // 13/7 CRF
689 #define N1 4
690 #define SHIFT1 4
691 #define COEFFS1 (int[]){1,-9,-9,1}
692 #define N2 4
693 #define SHIFT2 4
694 #define COEFFS2 (int[]){-1,5,5,-1}
695 #define N3 0
696 #define SHIFT3 1
697 #define COEFFS3 NULL
698 #define N4 0
699 #define SHIFT4 1
700 #define COEFFS4 NULL
701 #elif 1 // 3/5
702 #define LX0 1
703 #define LX1 0
704 #define SCALEX 0.5
705 #define N1 2
706 #define SHIFT1 1
707 #define COEFFS1 (int[]){1,1}
708 #define N2 2
709 #define SHIFT2 2
710 #define COEFFS2 (int[]){-1,-1}
711 #define N3 0
712 #define SHIFT3 0
713 #define COEFFS3 NULL
714 #define N4 0
715 #define SHIFT4 0
716 #define COEFFS4 NULL
717 #elif 1 // 11/5
718 #define N1 0
719 #define SHIFT1 1
720 #define COEFFS1 NULL
721 #define N2 2
722 #define SHIFT2 2
723 #define COEFFS2 (int[]){-1,-1}
724 #define N3 2
725 #define SHIFT3 0
726 #define COEFFS3 (int[]){-1,-1}
727 #define N4 4
728 #define SHIFT4 7
729 #define COEFFS4 (int[]){-5,29,29,-5}
730 #define SCALEX 4
731 #elif 1 // 9/7 CDF
732 #define N1 2
733 #define SHIFT1 7
734 #define COEFFS1 (int[]){-203,-203}
735 #define N2 2
736 #define SHIFT2 12
737 #define COEFFS2 (int[]){-217,-217}
738 #define N3 2
739 #define SHIFT3 7
740 #define COEFFS3 (int[]){113,113}
741 #define N4 2
742 #define SHIFT4 9
743 #define COEFFS4 (int[]){227,227}
744 #define SCALEX 1
745 #elif 1 // 7/5 CDF
746 #define N1 0
747 #define SHIFT1 1
748 #define COEFFS1 NULL
749 #define N2 2
750 #define SHIFT2 2
751 #define COEFFS2 (int[]){-1,-1}
752 #define N3 2
753 #define SHIFT3 0
754 #define COEFFS3 (int[]){-1,-1}
755 #define N4 2
756 #define SHIFT4 4
757 #define COEFFS4 (int[]){3,3}
758 #elif 1 // 9/7 MN
759 #define N1 4
760 #define SHIFT1 4
761 #define COEFFS1 (int[]){1,-9,-9,1}
762 #define N2 2
763 #define SHIFT2 2
764 #define COEFFS2 (int[]){1,1}
765 #define N3 0
766 #define SHIFT3 1
767 #define COEFFS3 NULL
768 #define N4 0
769 #define SHIFT4 1
770 #define COEFFS4 NULL
771 #else // 13/7 CRF
772 #define N1 4
773 #define SHIFT1 4
774 #define COEFFS1 (int[]){1,-9,-9,1}
775 #define N2 4
776 #define SHIFT2 4
777 #define COEFFS2 (int[]){-1,5,5,-1}
778 #define N3 0
779 #define SHIFT3 1
780 #define COEFFS3 NULL
781 #define N4 0
782 #define SHIFT4 1
783 #define COEFFS4 NULL
784 #endif
785 static void horizontal_decomposeX(DWTELEM *b, int width){
786 DWTELEM temp[width];
787 const int width2= width>>1;
788 const int w2= (width+1)>>1;
789 int A1,A2,A3,A4, x;
790
791 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
792 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
793 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
794 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
795
796 for(x=0; x<width2; x++){
797 temp[x ]= b[2*x ];
798 temp[x+w2]= b[2*x + 1];
799 }
800 if(width&1)
801 temp[x ]= b[2*x ];
802 memcpy(b, temp, width*sizeof(int));
803 }
804
805 static void horizontal_composeX(DWTELEM *b, int width){
806 DWTELEM temp[width];
807 const int width2= width>>1;
808 int A1,A2,A3,A4, x;
809 const int w2= (width+1)>>1;
810
811 memcpy(temp, b, width*sizeof(int));
812 for(x=0; x<width2; x++){
813 b[2*x ]= temp[x ];
814 b[2*x + 1]= temp[x+w2];
815 }
816 if(width&1)
817 b[2*x ]= temp[x ];
818
819 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
820 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
821 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
822 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
823 }
824
825 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
826 int x, y;
827
828 for(y=0; y<height; y++){
829 for(x=0; x<width; x++){
830 buffer[y*stride + x] *= SCALEX;
831 }
832 }
833
834 for(y=0; y<height; y++){
835 horizontal_decomposeX(buffer + y*stride, width);
836 }
837
838 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
839 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
840 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
841 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
842 }
843
844 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
845 int x, y;
846
847 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
848 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
849 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
850 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
851
852 for(y=0; y<height; y++){
853 horizontal_composeX(buffer + y*stride, width);
854 }
855
856 for(y=0; y<height; y++){
857 for(x=0; x<width; x++){
858 buffer[y*stride + x] /= SCALEX;
859 }
860 }
861 }
862
863 static void horizontal_decompose53i(DWTELEM *b, int width){
864 DWTELEM temp[width];
865 const int width2= width>>1;
866 int A1,A2,A3,A4, x;
867 const int w2= (width+1)>>1;
868
869 for(x=0; x<width2; x++){
870 temp[x ]= b[2*x ];
871 temp[x+w2]= b[2*x + 1];
872 }
873 if(width&1)
874 temp[x ]= b[2*x ];
875 #if 0
876 A2= temp[1 ];
877 A4= temp[0 ];
878 A1= temp[0+width2];
879 A1 -= (A2 + A4)>>1;
880 A4 += (A1 + 1)>>1;
881 b[0+width2] = A1;
882 b[0 ] = A4;
883 for(x=1; x+1<width2; x+=2){
884 A3= temp[x+width2];
885 A4= temp[x+1 ];
886 A3 -= (A2 + A4)>>1;
887 A2 += (A1 + A3 + 2)>>2;
888 b[x+width2] = A3;
889 b[x ] = A2;
890
891 A1= temp[x+1+width2];
892 A2= temp[x+2 ];
893 A1 -= (A2 + A4)>>1;
894 A4 += (A1 + A3 + 2)>>2;
895 b[x+1+width2] = A1;
896 b[x+1 ] = A4;
897 }
898 A3= temp[width-1];
899 A3 -= A2;
900 A2 += (A1 + A3 + 2)>>2;
901 b[width -1] = A3;
902 b[width2-1] = A2;
903 #else
904 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
905 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
906 #endif
907 }
908
909 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
910 int i;
911
912 for(i=0; i<width; i++){
913 b1[i] -= (b0[i] + b2[i])>>1;
914 }
915 }
916
917 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
918 int i;
919
920 for(i=0; i<width; i++){
921 b1[i] += (b0[i] + b2[i] + 2)>>2;
922 }
923 }
924
925 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
926 int y;
927 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
928 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
929
930 for(y=-2; y<height; y+=2){
931 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
932 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
933
934 {START_TIMER
935 if(b1 <= b3) horizontal_decompose53i(b2, width);
936 if(y+2 < height) horizontal_decompose53i(b3, width);
937 STOP_TIMER("horizontal_decompose53i")}
938
939 {START_TIMER
940 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
941 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
942 STOP_TIMER("vertical_decompose53i*")}
943
944 b0=b2;
945 b1=b3;
946 }
947 }
948
949 #define lift5 lift
950 #if 1
951 #define W_AM 3
952 #define W_AO 0
953 #define W_AS 1
954
955 #define W_BM 1
956 #define W_BO 8
957 #define W_BS 4
958
959 #undef lift5
960 #define W_CM 9999
961 #define W_CO 2
962 #define W_CS 2
963
964 #define W_DM 15
965 #define W_DO 16
966 #define W_DS 5
967 #elif 0
968 #define W_AM 55
969 #define W_AO 16
970 #define W_AS 5
971
972 #define W_BM 3
973 #define W_BO 32
974 #define W_BS 6
975
976 #define W_CM 127
977 #define W_CO 64
978 #define W_CS 7
979
980 #define W_DM 7
981 #define W_DO 8
982 #define W_DS 4
983 #elif 0
984 #define W_AM 97
985 #define W_AO 32
986 #define W_AS 6
987
988 #define W_BM 63
989 #define W_BO 512
990 #define W_BS 10
991
992 #define W_CM 13
993 #define W_CO 8
994 #define W_CS 4
995
996 #define W_DM 15
997 #define W_DO 16
998 #define W_DS 5
999
1000 #else
1001
1002 #define W_AM 203
1003 #define W_AO 64
1004 #define W_AS 7
1005
1006 #define W_BM 217
1007 #define W_BO 2048
1008 #define W_BS 12
1009
1010 #define W_CM 113
1011 #define W_CO 64
1012 #define W_CS 7
1013
1014 #define W_DM 227
1015 #define W_DO 128
1016 #define W_DS 9
1017 #endif
1018 static void horizontal_decompose97i(DWTELEM *b, int width){
1019 DWTELEM temp[width];
1020 const int w2= (width+1)>>1;
1021
1022 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1023 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1024 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1025 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1026 }
1027
1028
1029 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1030 int i;
1031
1032 for(i=0; i<width; i++){
1033 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1034 }
1035 }
1036
1037 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1038 int i;
1039
1040 for(i=0; i<width; i++){
1041 #ifdef lift5
1042 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1043 #else
1044 int r= 3*(b0[i] + b2[i]);
1045 r+= r>>4;
1046 r+= r>>8;
1047 b1[i] += (r+W_CO)>>W_CS;
1048 #endif
1049 }
1050 }
1051
1052 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1053 int i;
1054
1055 for(i=0; i<width; i++){
1056 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1057 }
1058 }
1059
1060 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1061 int i;
1062
1063 for(i=0; i<width; i++){
1064 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1065 }
1066 }
1067
1068 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1069 int y;
1070 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1071 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1072 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1073 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1074
1075 for(y=-4; y<height; y+=2){
1076 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1077 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1078
1079 {START_TIMER
1080 if(b3 <= b5) horizontal_decompose97i(b4, width);
1081 if(y+4 < height) horizontal_decompose97i(b5, width);
1082 if(width>400){
1083 STOP_TIMER("horizontal_decompose97i")
1084 }}
1085
1086 {START_TIMER
1087 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1088 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1089 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1090 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1091
1092 if(width>400){
1093 STOP_TIMER("vertical_decompose97i")
1094 }}
1095
1096 b0=b2;
1097 b1=b3;
1098 b2=b4;
1099 b3=b5;
1100 }
1101 }
1102
1103 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1104 int level;
1105
1106 for(level=0; level<decomposition_count; level++){
1107 switch(type){
1108 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1109 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1110 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1111 }
1112 }
1113 }
1114
1115 static void horizontal_compose53i(DWTELEM *b, int width){
1116 DWTELEM temp[width];
1117 const int width2= width>>1;
1118 const int w2= (width+1)>>1;
1119 int A1,A2,A3,A4, x;
1120
1121 #if 0
1122 A2= temp[1 ];
1123 A4= temp[0 ];
1124 A1= temp[0+width2];
1125 A1 -= (A2 + A4)>>1;
1126 A4 += (A1 + 1)>>1;
1127 b[0+width2] = A1;
1128 b[0 ] = A4;
1129 for(x=1; x+1<width2; x+=2){
1130 A3= temp[x+width2];
1131 A4= temp[x+1 ];
1132 A3 -= (A2 + A4)>>1;
1133 A2 += (A1 + A3 + 2)>>2;
1134 b[x+width2] = A3;
1135 b[x ] = A2;
1136
1137 A1= temp[x+1+width2];
1138 A2= temp[x+2 ];
1139 A1 -= (A2 + A4)>>1;
1140 A4 += (A1 + A3 + 2)>>2;
1141 b[x+1+width2] = A1;
1142 b[x+1 ] = A4;
1143 }
1144 A3= temp[width-1];
1145 A3 -= A2;
1146 A2 += (A1 + A3 + 2)>>2;
1147 b[width -1] = A3;
1148 b[width2-1] = A2;
1149 #else
1150 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1151 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1152 #endif
1153 for(x=0; x<width2; x++){
1154 b[2*x ]= temp[x ];
1155 b[2*x + 1]= temp[x+w2];
1156 }
1157 if(width&1)
1158 b[2*x ]= temp[x ];
1159 }
1160
1161 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1162 int i;
1163
1164 for(i=0; i<width; i++){
1165 b1[i] += (b0[i] + b2[i])>>1;
1166 }
1167 }
1168
1169 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1170 int i;
1171
1172 for(i=0; i<width; i++){
1173 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1174 }
1175 }
1176
1177 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1178 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1179 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1180 cs->y = -1;
1181 }
1182
1183 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1184 int y= cs->y;
1185 DWTELEM *b0= cs->b0;
1186 DWTELEM *b1= cs->b1;
1187 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1188 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1189
1190 {START_TIMER
1191 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1192 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1193 STOP_TIMER("vertical_compose53i*")}
1194
1195 {START_TIMER
1196 if(y-1 >= 0) horizontal_compose53i(b0, width);
1197 if(b0 <= b2) horizontal_compose53i(b1, width);
1198 STOP_TIMER("horizontal_compose53i")}
1199
1200 cs->b0 = b2;
1201 cs->b1 = b3;
1202 cs->y += 2;
1203 }
1204
1205 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1206 dwt_compose_t cs;
1207 spatial_compose53i_init(&cs, buffer, height, stride);
1208 while(cs.y <= height)
1209 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1210 }
1211
1212
1213 static void horizontal_compose97i(DWTELEM *b, int width){
1214 DWTELEM temp[width];
1215 const int w2= (width+1)>>1;
1216
1217 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1218 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1219 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1220 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1221 }
1222
1223 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1224 int i;
1225
1226 for(i=0; i<width; i++){
1227 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1228 }
1229 }
1230
1231 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1232 int i;
1233
1234 for(i=0; i<width; i++){
1235 #ifdef lift5
1236 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1237 #else
1238 int r= 3*(b0[i] + b2[i]);
1239 r+= r>>4;
1240 r+= r>>8;
1241 b1[i] -= (r+W_CO)>>W_CS;
1242 #endif
1243 }
1244 }
1245
1246 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1247 int i;
1248
1249 for(i=0; i<width; i++){
1250 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1251 }
1252 }
1253
1254 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1255 int i;
1256
1257 for(i=0; i<width; i++){
1258 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1259 }
1260 }
1261
1262 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1263 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1264 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1265 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1266 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1267 cs->y = -3;
1268 }
1269
1270 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1271 int y = cs->y;
1272 DWTELEM *b0= cs->b0;
1273 DWTELEM *b1= cs->b1;
1274 DWTELEM *b2= cs->b2;
1275 DWTELEM *b3= cs->b3;
1276 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1277 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1278
1279 if(stride == width && y+4 < height && 0){
1280 int x;
1281 for(x=0; x<width/2; x++)
1282 b5[x] += 64*2;
1283 for(; x<width; x++)
1284 b5[x] += 169*2;
1285 }
1286
1287 {START_TIMER
1288 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1289 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1290 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1291 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1292 if(width>400){
1293 STOP_TIMER("vertical_compose97i")}}
1294
1295 {START_TIMER
1296 if(y-1>= 0) horizontal_compose97i(b0, width);
1297 if(b0 <= b2) horizontal_compose97i(b1, width);
1298 if(width>400 && b0 <= b2){
1299 STOP_TIMER("horizontal_compose97i")}}
1300
1301 cs->b0=b2;
1302 cs->b1=b3;
1303 cs->b2=b4;
1304 cs->b3=b5;
1305 cs->y += 2;
1306 }
1307
1308 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1309 dwt_compose_t cs;
1310 spatial_compose97i_init(&cs, buffer, height, stride);
1311 while(cs.y <= height)
1312 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1313 }
1314
1315 void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1316 int level;
1317 for(level=decomposition_count-1; level>=0; level--){
1318 switch(type){
1319 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1320 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1321 /* not slicified yet */
1322 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1323 }
1324 }
1325 }
1326
1327 void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1328 const int support = type==1 ? 3 : 5;
1329 int level;
1330 if(type==2) return;
1331
1332 for(level=decomposition_count-1; level>=0; level--){
1333 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1334 switch(type){
1335 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1336 break;
1337 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1338 break;
1339 case 2: break;
1340 }
1341 }
1342 }
1343 }
1344
1345 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1346 if(type==2){
1347 int level;
1348 for(level=decomposition_count-1; level>=0; level--)
1349 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1350 }else{
1351 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1352 int y;
1353 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1354 for(y=0; y<height; y+=4)
1355 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1356 }
1357 }
1358
1359 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1360 const int w= b->width;
1361 const int h= b->height;
1362 int x, y;
1363
1364 if(1){
1365 int run=0;
1366 int runs[w*h];
1367 int run_index=0;
1368
1369 for(y=0; y<h; y++){
1370 for(x=0; x<w; x++){
1371 int v, p=0;
1372 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1373 v= src[x + y*stride];
1374
1375 if(y){
1376 t= src[x + (y-1)*stride];
1377 if(x){
1378 lt= src[x - 1 + (y-1)*stride];
1379 }
1380 if(x + 1 < w){
1381 rt= src[x + 1 + (y-1)*stride];
1382 }
1383 }
1384 if(x){
1385 l= src[x - 1 + y*stride];
1386 /*if(x > 1){
1387 if(orientation==1) ll= src[y + (x-2)*stride];
1388 else ll= src[x - 2 + y*stride];
1389 }*/
1390 }
1391 if(parent){
1392 int px= x>>1;
1393 int py= y>>1;
1394 if(px<b->parent->width && py<b->parent->height)
1395 p= parent[px + py*2*stride];
1396 }
1397 if(!(/*ll|*/l|lt|t|rt|p)){
1398 if(v){
1399 runs[run_index++]= run;
1400 run=0;
1401 }else{
1402 run++;
1403 }
1404 }
1405 }
1406 }
1407 runs[run_index++]= run;
1408 run_index=0;
1409 run= runs[run_index++];
1410
1411 put_symbol2(&s->c, b->state[1], run, 3);
1412
1413 for(y=0; y<h; y++){
1414 if(s->c.bytestream_end - s->c.bytestream < w*40){
1415 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1416 return -1;
1417 }
1418 for(x=0; x<w; x++){
1419 int v, p=0;
1420 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1421 v= src[x + y*stride];
1422
1423 if(y){
1424 t= src[x + (y-1)*stride];
1425 if(x){
1426 lt= src[x - 1 + (y-1)*stride];
1427 }
1428 if(x + 1 < w){
1429 rt= src[x + 1 + (y-1)*stride];
1430 }
1431 }
1432 if(x){
1433 l= src[x - 1 + y*stride];
1434 /*if(x > 1){
1435 if(orientation==1) ll= src[y + (x-2)*stride];
1436 else ll= src[x - 2 + y*stride];
1437 }*/
1438 }
1439 if(parent){
1440 int px= x>>1;
1441 int py= y>>1;
1442 if(px<b->parent->width && py<b->parent->height)
1443 p= parent[px + py*2*stride];
1444 }
1445 if(/*ll|*/l|lt|t|rt|p){
1446 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1447
1448 put_rac(&s->c, &b->state[0][context], !!v);
1449 }else{
1450 if(!run){
1451 run= runs[run_index++];
1452
1453 put_symbol2(&s->c, b->state[1], run, 3);
1454 assert(v);
1455 }else{
1456 run--;
1457 assert(!v);
1458 }
1459 }
1460 if(v){
1461 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1462
1463 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1464 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1465 }
1466 }
1467 }
1468 }
1469 return 0;
1470 }
1471
1472 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1473 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1474 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1475 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1476 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1477 }
1478
1479 static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1480 const int w= b->width;
1481 const int h= b->height;
1482 int x,y;
1483 const int qlog= clip(s->qlog + b->qlog, 0, 128);
1484 int qmul= qexp[qlog&7]<<(qlog>>3);
1485 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1486
1487 START_TIMER
1488
1489 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1490 qadd= 0;
1491 qmul= 1<<QEXPSHIFT;
1492 }
1493
1494 if(1){
1495 int run;
1496 int index=0;
1497 int prev_index=-1;
1498 int prev2_index=0;
1499 int parent_index= 0;
1500 int prev_parent_index= 0;
1501
1502 for(y=0; y<b->height; y++)
1503 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1504
1505 run= get_symbol2(&s->c, b->state[1], 3);
1506 for(y=0; y<h; y++){
1507 int v=0;
1508 int lt=0, t=0, rt=0;
1509
1510 if(y && b->x[prev_index] == 0){
1511 rt= b->coeff[prev_index];
1512 }
1513 for(x=0; x<w; x++){
1514 int p=0;
1515 const int l= v;
1516
1517 lt= t; t= rt;
1518
1519 if(y){
1520 if(b->x[prev_index] <= x)
1521 prev_index++;
1522 if(b->x[prev_index] == x + 1)
1523 rt= b->coeff[prev_index];
1524 else
1525 rt=0;
1526 }
1527 if(parent){
1528 if(x>>1 > b->parent->x[parent_index]){
1529 parent_index++;
1530 }
1531 if(x>>1 == b->parent->x[parent_index]){
1532 p= b->parent->coeff[parent_index];
1533 }
1534 }
1535 if(/*ll|*/l|lt|t|rt|p){
1536 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1537
1538 v=get_rac(&s->c, &b->state[0][context]);
1539 }else{
1540 if(!run){
1541 run= get_symbol2(&s->c, b->state[1], 3);
1542 v=1;
1543 }else{
1544 run--;
1545 v=0;
1546
1547 if(y && parent){
1548 int max_run;
1549
1550 max_run= FFMIN(run, b->x[prev_index] - x - 2);
1551 max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1552 x+= max_run;
1553 run-= max_run;
1554 }
1555 }
1556 }
1557 if(v){
1558 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1559 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1560 if(get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1561 src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
1562 v= -v;
1563 }else{
1564 src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1565 }
1566 b->x[index]=x; //FIXME interleave x/coeff
1567 b->coeff[index++]= v;
1568 }
1569 }
1570 b->x[index++]= w+1; //end marker
1571 prev_index= prev2_index;
1572 prev2_index= index;
1573
1574 if(parent){
1575 while(b->parent->x[parent_index] != b->parent->width+1)
1576 parent_index++;
1577 parent_index++;
1578 if(y&1){
1579 prev_parent_index= parent_index;
1580 }else{
1581 parent_index= prev_parent_index;
1582 }
1583 }
1584 }
1585 b->x[index++]= w+1; //end marker
1586 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
1587 STOP_TIMER("decode_subband")
1588 }
1589
1590 return;
1591 }
1592 }
1593
1594 static void reset_contexts(SnowContext *s){
1595 int plane_index, level, orientation;
1596
1597 for(plane_index=0; plane_index<3; plane_index++){
1598 for(level=0; level<s->spatial_decomposition_count; level++){
1599 for(orientation=level ? 1:0; orientation<4; orientation++){
1600 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1601 }
1602 }
1603 }
1604 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1605 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1606 }
1607
1608 static int alloc_blocks(SnowContext *s){
1609 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1610 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1611
1612 s->b_width = w;
1613 s->b_height= h;
1614
1615 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1616 return 0;
1617 }
1618
1619 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1620 uint8_t *bytestream= d->bytestream;
1621 uint8_t *bytestream_start= d->bytestream_start;
1622 *d= *s;
1623 d->bytestream= bytestream;
1624 d->bytestream_start= bytestream_start;
1625 }
1626
1627 //near copy & paste from dsputil, FIXME
1628 static int pix_sum(uint8_t * pix, int line_size, int w)
1629 {
1630 int s, i, j;
1631
1632 s = 0;
1633 for (i = 0; i < w; i++) {
1634 for (j = 0; j < w; j++) {
1635 s += pix[0];
1636 pix ++;
1637 }
1638 pix += line_size - w;
1639 }
1640 return s;
1641 }
1642
1643 //near copy & paste from dsputil, FIXME
1644 static int pix_norm1(uint8_t * pix, int line_size, int w)
1645 {
1646 int s, i, j;
1647 uint32_t *sq = squareTbl + 256;
1648
1649 s = 0;
1650 for (i = 0; i < w; i++) {
1651 for (j = 0; j < w; j ++) {
1652 s += sq[pix[0]];
1653 pix ++;
1654 }
1655 pix += line_size - w;
1656 }
1657 return s;
1658 }
1659
1660 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1661 const int w= s->b_width << s->block_max_depth;
1662 const int rem_depth= s->block_max_depth - level;
1663 const int index= (x + y*w) << rem_depth;
1664 const int block_w= 1<<rem_depth;
1665 BlockNode block;
1666 int i,j;
1667
1668 block.color[0]= l;
1669 block.color[1]= cb;
1670 block.color[2]= cr;
1671 block.mx= mx;
1672 block.my= my;
1673 block.type= type;
1674 block.level= level;
1675
1676 for(j=0; j<block_w; j++){
1677 for(i=0; i<block_w; i++){
1678 s->block[index + i + j*w]= block;
1679 }
1680 }
1681 }
1682
1683 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1684 const int offset[3]= {
1685 y*c-> stride + x,
1686 ((y*c->uvstride + x)>>1),
1687 ((y*c->uvstride + x)>>1),
1688 };
1689 int i;
1690 for(i=0; i<3; i++){
1691 c->src[0][i]= src [i];
1692 c->ref[0][i]= ref [i] + offset[i];
1693 }
1694 assert(!ref_index);
1695 }
1696
1697 //FIXME copy&paste
1698 #define P_LEFT P[1]
1699 #define P_TOP P[2]
1700 #define P_TOPRIGHT P[3]
1701 #define P_MEDIAN P[4]
1702 #define P_MV1 P[9]
1703 #define FLAG_QPEL 1 //must be 1
1704
1705 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1706 uint8_t p_buffer[1024];
1707 uint8_t i_buffer[1024];
1708 uint8_t p_state[sizeof(s->block_state)];
1709 uint8_t i_state[sizeof(s->block_state)];
1710 RangeCoder pc, ic;
1711 uint8_t *pbbak= s->c.bytestream;
1712 uint8_t *pbbak_start= s->c.bytestream_start;
1713 int score, score2, iscore, i_len, p_len, block_s, sum;
1714 const int w= s->b_width << s->block_max_depth;
1715 const int h= s->b_height << s->block_max_depth;
1716 const int rem_depth= s->block_max_depth - level;
1717 const int index= (x + y*w) << rem_depth;
1718 const int block_w= 1<<(LOG2_MB_SIZE - level);
1719 static BlockNode null_block= { //FIXME add border maybe
1720 .color= {128,128,128},
1721 .mx= 0,
1722 .my= 0,
1723 .type= 0,
1724 .level= 0,
1725 };
1726 int trx= (x+1)<<rem_depth;
1727 int try= (y+1)<<rem_depth;
1728 BlockNode *left = x ? &s->block[index-1] : &null_block;
1729 BlockNode *top = y ? &s->block[index-w] : &null_block;
1730 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1731 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1732 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1733 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1734 int pl = left->color[0];
1735 int pcb= left->color[1];
1736 int pcr= left->color[2];
1737 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1738 int pmy= mid_pred(left->my, top->my, tr->my);
1739 int mx=0, my=0;
1740 int l,cr,cb, i;
1741 const int stride= s->current_picture.linesize[0];
1742 const int uvstride= s->current_picture.linesize[1];
1743 const int instride= s->input_picture.linesize[0];
1744 const int uvinstride= s->input_picture.linesize[1];
1745 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
1746 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1747 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1748 uint8_t current_mb[3][stride*block_w];
1749 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1750 int P[10][2];
1751 int16_t last_mv[3][2];
1752 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1753 const int shift= 1+qpel;
1754 MotionEstContext *c= &s->m.me;
1755 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1756 int my_context= av_log2(2*ABS(left->my - top->my));
1757 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1758
1759 assert(sizeof(s->block_state) >= 256);
1760 if(s->keyframe){
1761 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1762 return 0;
1763 }
1764
1765 //FIXME optimize
1766 for(i=0; i<block_w; i++)
1767 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
1768 for(i=0; i<block_w>>1; i++)
1769 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1770 for(i=0; i<block_w>>1; i++)
1771 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1772
1773 // clip predictors / edge ?
1774
1775 P_LEFT[0]= left->mx;
1776 P_LEFT[1]= left->my;
1777 P_TOP [0]= top->mx;
1778 P_TOP [1]= top->my;
1779 P_TOPRIGHT[0]= tr->mx;
1780 P_TOPRIGHT[1]= tr->my;
1781
1782 last_mv[0][0]= s->block[index].mx;
1783 last_mv[0][1]= s->block[index].my;
1784 last_mv[1][0]= right->mx;
1785 last_mv[1][1]= right->my;
1786 last_mv[2][0]= bottom->mx;
1787 last_mv[2][1]= bottom->my;
1788
1789 s->m.mb_stride=2;
1790 s->m.mb_x=
1791 s->m.mb_y= 0;
1792 s->m.me.skip= 0;
1793
1794 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1795
1796 assert(s->m.me. stride == stride);
1797 assert(s->m.me.uvstride == uvstride);
1798
1799 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1800 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1801 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1802 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1803
1804 c->xmin = - x*block_w - 16+2;
1805 c->ymin = - y*block_w - 16+2;
1806 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1807 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1808
1809 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1810 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1811 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1812 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1813 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1814 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1815 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1816
1817 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1818 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1819
1820 if (!y) {
1821 c->pred_x= P_LEFT[0];
1822 c->pred_y= P_LEFT[1];
1823 } else {
1824 c->pred_x = P_MEDIAN[0];
1825 c->pred_y = P_MEDIAN[1];
1826 }
1827
1828 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
1829 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1830
1831 assert(mx >= c->xmin);
1832 assert(mx <= c->xmax);
1833 assert(my >= c->ymin);
1834 assert(my <= c->ymax);
1835
1836 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1837 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1838 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1839
1840 // subpel search
1841 pc= s->c;
1842 pc.bytestream_start=
1843 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1844 memcpy(p_state, s->block_state, sizeof(s->block_state));
1845
1846 if(level!=s->block_max_depth)
1847 put_rac(&pc, &p_state[4 + s_context], 1);
1848 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1849 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1850 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1851 p_len= pc.bytestream - pc.bytestream_start;
1852 score += (s->lambda2*(p_len*8
1853 + (pc.outstanding_count - s->c.outstanding_count)*8
1854 + (-av_log2(pc.range) + av_log2(s->c.range))
1855 ))>>FF_LAMBDA_SHIFT;
1856
1857 block_s= block_w*block_w;
1858 sum = pix_sum(&current_mb[0][0], stride, block_w);
1859 l= (sum + block_s/2)/block_s;
1860 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1861
1862 block_s= block_w*block_w>>2;
1863 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1864 cb= (sum + block_s/2)/block_s;
1865 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1866 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1867 cr= (sum + block_s/2)/block_s;
1868 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1869
1870 ic= s->c;
1871 ic.bytestream_start=
1872 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1873 memcpy(i_state, s->block_state, sizeof(s->block_state));
1874 if(level!=s->block_max_depth)
1875 put_rac(&ic, &i_state[4 + s_context], 1);
1876 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1877 put_symbol(&ic, &i_state[32], l-pl , 1);
1878 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1879 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1880 i_len= ic.bytestream - ic.bytestream_start;
1881 iscore += (s->lambda2*(i_len*8
1882 + (ic.outstanding_count - s->c.outstanding_count)*8
1883 + (-av_log2(ic.range) + av_log2(s->c.range))
1884 ))>>FF_LAMBDA_SHIFT;
1885
1886 // assert(score==256*256*256*64-1);
1887 assert(iscore < 255*255*256 + s->lambda2*10);
1888 assert(iscore >= 0);
1889 assert(l>=0 && l<=255);
1890 assert(pl>=0 && pl<=255);
1891
1892 if(level==0){
1893 int varc= iscore >> 8;
1894 int vard= score >> 8;
1895 if (vard <= 64 || vard < varc)
1896 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1897 else
1898 c->scene_change_score+= s->m.qscale;
1899 }
1900
1901 if(level!=s->block_max_depth){
1902 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1903 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1904 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1905 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1906 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1907 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1908
1909 if(score2 < score && score2 < iscore)
1910 return score2;
1911 }
1912
1913 if(iscore < score){
1914 memcpy(pbbak, i_buffer, i_len);
1915 s->c= ic;
1916 s->c.bytestream_start= pbbak_start;
1917 s->c.bytestream= pbbak + i_len;
1918 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1919 memcpy(s->block_state, i_state, sizeof(s->block_state));
1920 return iscore;
1921 }else{
1922 memcpy(pbbak, p_buffer, p_len);
1923 s->c= pc;
1924 s->c.bytestream_start= pbbak_start;
1925 s->c.bytestream= pbbak + p_len;
1926 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1927 memcpy(s->block_state, p_state, sizeof(s->block_state));
1928 return score;
1929 }
1930 }
1931
1932 static void decode_q_branch(SnowContext *s, int level, int x, int y){
1933 const int w= s->b_width << s->block_max_depth;
1934 const int rem_depth= s->block_max_depth - level;
1935 const int index= (x + y*w) << rem_depth;
1936 static BlockNode null_block= { //FIXME add border maybe
1937 .color= {128,128,128},
1938 .mx= 0,
1939 .my= 0,
1940 .type= 0,
1941 .level= 0,
1942 };
1943 int trx= (x+1)<<rem_depth;
1944 BlockNode *left = x ? &s->block[index-1] : &null_block;
1945 BlockNode *top = y ? &s->block[index-w] : &null_block;
1946 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1947 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1948 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1949
1950 if(s->keyframe){
1951 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1952 return;
1953 }
1954
1955 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1956 int type;
1957 int l = left->color[0];
1958 int cb= left->color[1];
1959 int cr= left->color[2];
1960 int mx= mid_pred(left->mx, top->mx, tr->mx);
1961 int my= mid_pred(left->my, top->my, tr->my);
1962 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1963 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1964
1965 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1966
1967 if(type){
1968 l += get_symbol(&s->c, &s->block_state[32], 1);
1969 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1970 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1971 }else{
1972 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1973 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1974 }
1975 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1976 }else{
1977 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1978 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1979 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1980 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1981 }
1982 }
1983
1984 static void encode_blocks(SnowContext *s){
1985 int x, y;
1986 int w= s->b_width;
1987 int h= s->b_height;
1988
1989 for(y=0; y<h; y++){
1990 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
1991 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1992 return;
1993 }
1994 for(x=0; x<w; x++){
1995 encode_q_branch(s, 0, x, y);
1996 }
1997 }
1998 }
1999
2000 static void decode_blocks(SnowContext *s){
2001 int x, y;
2002 int w= s->b_width;
2003 int h= s->b_height;
2004
2005 for(y=0; y<h; y++){
2006 for(x=0; x<w; x++){
2007 decode_q_branch(s, 0, x, y);
2008 }
2009 }
2010 }
2011
2012 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2013 int x, y;
2014 START_TIMER
2015 for(y=0; y < b_h+5; y++){
2016 for(x=0; x < b_w; x++){
2017 int a0= src[x ];
2018 int a1= src[x + 1];
2019 int a2= src[x + 2];
2020 int a3= src[x + 3];
2021 int a4= src[x + 4];
2022 int a5= src[x + 5];
2023 // int am= 9*(a1+a2) - (a0+a3);
2024 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2025 // int am= 18*(a2+a3) - 2*(a1+a4);
2026 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2027 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2028
2029 // if(b_w==16) am= 8*(a1+a2);
2030
2031 if(dx<8) tmp[x]= (32*a2*( 8-dx) + am* dx + 128)>>8;
2032 else tmp[x]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2033
2034 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2035 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2036 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2037 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2038 }
2039 tmp += stride;
2040 src += stride;
2041 }
2042 tmp -= (b_h+5)*stride;
2043
2044 for(y=0; y < b_h; y++){
2045 for(x=0; x < b_w; x++){
2046 int a0= tmp[x + 0*stride];
2047 int a1= tmp[x + 1*stride];
2048 int a2= tmp[x + 2*stride];
2049 int a3= tmp[x + 3*stride];
2050 int a4= tmp[x + 4*stride];
2051 int a5= tmp[x + 5*stride];
2052 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2053 // int am= 18*(a2+a3) - 2*(a1+a4);
2054 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2055 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2056
2057 // if(b_w==16) am= 8*(a1+a2);
2058
2059 if(dy<8) dst[x]= (32*a2*( 8-dy) + am* dy + 128)>>8;
2060 else dst[x]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2061
2062 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2063 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2064 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2065 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2066 }
2067 dst += stride;
2068 tmp += stride;
2069 }
2070 STOP_TIMER("mc_block")
2071 }
2072
2073 #define mca(dx,dy,b_w)\
2074 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2075 uint8_t tmp[stride*(b_w+5)];\
2076 assert(h==b_w);\
2077 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2078 }
2079
2080 mca( 0, 0,16)
2081 mca( 8, 0,16)
2082 mca( 0, 8,16)
2083 mca( 8, 8,16)
2084 mca( 0, 0,8)
2085 mca( 8, 0<