c9d47a8aa14dba13aced04d36b7b737469669e5c
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 #include "avcodec.h"
20 #include "common.h"
21 #include "dsputil.h"
22
23 #include "rangecoder.h"
24 #define MID_STATE 128
25
26 #include "mpegvideo.h"
27
28 #undef NDEBUG
29 #include <assert.h>
30
31 #define MAX_DECOMPOSITIONS 8
32 #define MAX_PLANES 4
33 #define DWTELEM int
34 #define QROOT 8
35 #define LOSSLESS_QLOG -128
36 #define FRAC_BITS 8
37
38 static const int8_t quant3[256]={
39 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
55 };
56 static const int8_t quant3b[256]={
57 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73 };
74 static const int8_t quant5[256]={
75 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
88 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
89 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
90 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
91 };
92 static const int8_t quant7[256]={
93 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
99 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
100 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
101 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
104 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
105 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
106 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
107 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
109 };
110 static const int8_t quant9[256]={
111 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
117 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
118 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
119 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
123 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
124 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
125 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
126 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
127 };
128 static const int8_t quant11[256]={
129 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
135 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
136 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
137 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
140 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
141 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
142 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
143 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
145 };
146 static const int8_t quant13[256]={
147 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
148 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
153 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
154 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
155 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
157 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
158 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
159 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
160 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
162 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
163 };
164
165 #define LOG2_OBMC_MAX 6
166 #define OBMC_MAX (1<<(LOG2_OBMC_MAX))
167 #if 0 //64*cubic
168 static const uint8_t obmc32[1024]={
169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
171 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
172 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
173 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
174 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
175 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
176 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
177 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
178 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
179 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
180 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
181 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
182 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
184 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
185 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
186 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
187 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
188 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
192 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
193 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
196 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
197 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
198 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
199 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 //error:0.000022
202 };
203 static const uint8_t obmc16[256]={
204 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
205 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
206 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
207 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
208 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
209 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
211 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
212 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
213 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
214 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
215 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
219 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
220 //error:0.000033
221 };
222 #elif 1 // 64*linear
223 static const uint8_t obmc32[1024]={
224 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
225 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
226 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
227 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
228 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
229 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
230 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
231 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
232 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
233 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
234 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
235 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
236 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
237 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
239 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
240 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
241 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
242 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
243 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
244 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
245 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
246 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
247 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
248 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
251 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
252 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
253 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
254 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
255 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
256 //error:0.000020
257 };
258 static const uint8_t obmc16[256]={
259 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
260 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
261 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
262 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
263 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
264 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
266 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
267 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
268 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
269 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
270 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
271 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
272 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
273 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
274 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
275 //error:0.000015
276 };
277 #else //64*cos
278 static const uint8_t obmc32[1024]={
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
282 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
283 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
284 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
285 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
286 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
287 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
288 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
289 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
290 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
291 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
292 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
294 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
295 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
296 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
297 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
298 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
302 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
303 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
306 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
307 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
308 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
309 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311 //error:0.000022
312 };
313 static const uint8_t obmc16[256]={
314 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
315 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
316 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
317 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
318 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
319 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
321 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
322 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
323 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
324 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
325 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
329 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
330 //error:0.000022
331 };
332 #endif
333
334 //linear *64
335 static const uint8_t obmc8[64]={
336 1, 3, 5, 7, 7, 5, 3, 1,
337 3, 9,15,21,21,15, 9, 3,
338 5,15,25,35,35,25,15, 5,
339 7,21,35,49,49,35,21, 7,
340 7,21,35,49,49,35,21, 7,
341 5,15,25,35,35,25,15, 5,
342 3, 9,15,21,21,15, 9, 3,
343 1, 3, 5, 7, 7, 5, 3, 1,
344 //error:0.000000
345 };
346
347 //linear *64
348 static const uint8_t obmc4[16]={
349 4,12,12, 4,
350 12,36,36,12,
351 12,36,36,12,
352 4,12,12, 4,
353 //error:0.000000
354 };
355
356 static const uint8_t *obmc_tab[4]={
357 obmc32, obmc16, obmc8, obmc4
358 };
359
360 typedef struct BlockNode{
361 int16_t mx;
362 int16_t my;
363 uint8_t color[3];
364 uint8_t type;
365 //#define TYPE_SPLIT 1
366 #define BLOCK_INTRA 1
367 //#define TYPE_NOCOLOR 4
368 uint8_t level; //FIXME merge into type?
369 }BlockNode;
370
371 #define LOG2_MB_SIZE 4
372 #define MB_SIZE (1<<LOG2_MB_SIZE)
373
374 typedef struct SubBand{
375 int level;
376 int stride;
377 int width;
378 int height;
379 int qlog; ///< log(qscale)/log[2^(1/6)]
380 DWTELEM *buf;
381 int16_t *x;
382 DWTELEM *coeff;
383 struct SubBand *parent;
384 uint8_t state[/*7*2*/ 7 + 512][32];
385 }SubBand;
386
387 typedef struct Plane{
388 int width;
389 int height;
390 SubBand band[MAX_DECOMPOSITIONS][4];
391 }Plane;
392
393 typedef struct SnowContext{
394 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
395
396 AVCodecContext *avctx;
397 RangeCoder c;
398 DSPContext dsp;
399 AVFrame input_picture;
400 AVFrame current_picture;
401 AVFrame last_picture;
402 AVFrame mconly_picture;
403 // uint8_t q_context[16];
404 uint8_t header_state[32];
405 uint8_t block_state[128 + 32*128];
406 int keyframe;
407 int always_reset;
408 int version;
409 int spatial_decomposition_type;
410 int temporal_decomposition_type;
411 int spatial_decomposition_count;
412 int temporal_decomposition_count;
413 DWTELEM *spatial_dwt_buffer;
414 int colorspace_type;
415 int chroma_h_shift;
416 int chroma_v_shift;
417 int spatial_scalability;
418 int qlog;
419 int lambda;
420 int lambda2;
421 int mv_scale;
422 int qbias;
423 #define QBIAS_SHIFT 3
424 int b_width;
425 int b_height;
426 int block_max_depth;
427 Plane plane[MAX_PLANES];
428 BlockNode *block;
429
430 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
431 }SnowContext;
432
433 #ifdef __sgi
434 // Avoid a name clash on SGI IRIX
435 #undef qexp
436 #endif
437 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
438 static const uint8_t qexp[8]={
439 128, 140, 152, 166, 181, 197, 215, 235
440 // 64, 70, 76, 83, 91, 99, 108, 117
441 // 32, 35, 38, 41, 45, 49, 54, 59
442 // 16, 17, 19, 21, 23, 25, 27, 29
443 // 8, 9, 10, 10, 11, 12, 13, 15
444 };
445
446 static inline int mirror(int v, int m){
447 if (v<0) return -v;
448 else if(v>m) return 2*m-v;
449 else return v;
450 }
451
452 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
453 int i;
454
455 if(v){
456 const int a= ABS(v);
457 const int e= av_log2(a);
458 #if 1
459 const int el= FFMIN(e, 10);
460 put_rac(c, state+0, 0);
461
462 for(i=0; i<el; i++){
463 put_rac(c, state+1+i, 1); //1..10
464 }
465 for(; i<e; i++){
466 put_rac(c, state+1+9, 1); //1..10
467 }
468 put_rac(c, state+1+FFMIN(i,9), 0);
469
470 for(i=e-1; i>=el; i--){
471 put_rac(c, state+22+9, (a>>i)&1); //22..31
472 }
473 for(; i>=0; i--){
474 put_rac(c, state+22+i, (a>>i)&1); //22..31
475 }
476
477 if(is_signed)
478 put_rac(c, state+11 + el, v < 0); //11..21
479 #else
480
481 put_rac(c, state+0, 0);
482 if(e<=9){
483 for(i=0; i<e; i++){
484 put_rac(c, state+1+i, 1); //1..10
485 }
486 put_rac(c, state+1+i, 0);
487
488 for(i=e-1; i>=0; i--){
489 put_rac(c, state+22+i, (a>>i)&1); //22..31
490 }
491
492 if(is_signed)
493 put_rac(c, state+11 + e, v < 0); //11..21
494 }else{
495 for(i=0; i<e; i++){
496 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
497 }
498 put_rac(c, state+1+FFMIN(i,9), 0);
499
500 for(i=e-1; i>=0; i--){
501 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
502 }
503
504 if(is_signed)
505 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
506 }
507 #endif
508 }else{
509 put_rac(c, state+0, 1);
510 }
511 }
512
513 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
514 if(get_rac(c, state+0))
515 return 0;
516 else{
517 int i, e, a;
518 e= 0;
519 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
520 e++;
521 }
522
523 a= 1;
524 for(i=e-1; i>=0; i--){
525 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
526 }
527
528 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
529 return -a;
530 else
531 return a;
532 }
533 }
534
535 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
536 int i;
537 int r= log2>=0 ? 1<<log2 : 1;
538
539 assert(v>=0);
540 assert(log2>=-4);
541
542 while(v >= r){
543 put_rac(c, state+4+log2, 1);
544 v -= r;
545 log2++;
546 if(log2>0) r+=r;
547 }
548 put_rac(c, state+4+log2, 0);
549
550 for(i=log2-1; i>=0; i--){
551 put_rac(c, state+31-i, (v>>i)&1);
552 }
553 }
554
555 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
556 int i;
557 int r= log2>=0 ? 1<<log2 : 1;
558 int v=0;
559
560 assert(log2>=-4);
561
562 while(get_rac(c, state+4+log2)){
563 v+= r;
564 log2++;
565 if(log2>0) r+=r;
566 }
567
568 for(i=log2-1; i>=0; i--){
569 v+= get_rac(c, state+31-i)<<i;
570 }
571
572 return v;
573 }
574
575 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
576 const int mirror_left= !highpass;
577 const int mirror_right= (width&1) ^ highpass;
578 const int w= (width>>1) - 1 + (highpass & width);
579 int i;
580
581 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
582 if(mirror_left){
583 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
584 dst += dst_step;
585 src += src_step;
586 }
587
588 for(i=0; i<w; i++){
589 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
590 }
591
592 if(mirror_right){
593 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
594 }
595 }
596
597 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
598 const int mirror_left= !highpass;
599 const int mirror_right= (width&1) ^ highpass;
600 const int w= (width>>1) - 1 + (highpass & width);
601 int i;
602
603 if(mirror_left){
604 int r= 3*2*ref[0];
605 r += r>>4;
606 r += r>>8;
607 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
608 dst += dst_step;
609 src += src_step;
610 }
611
612 for(i=0; i<w; i++){
613 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
614 r += r>>4;
615 r += r>>8;
616 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
617 }
618
619 if(mirror_right){
620 int r= 3*2*ref[w*ref_step];
621 r += r>>4;
622 r += r>>8;
623 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
624 }
625 }
626
627
628 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
629 int x, i;
630
631 for(x=start; x<width; x+=2){
632 int64_t sum=0;
633
634 for(i=0; i<n; i++){
635 int x2= x + 2*i - n + 1;
636 if (x2< 0) x2= -x2;
637 else if(x2>=width) x2= 2*width-x2-2;
638 sum += coeffs[i]*(int64_t)dst[x2];
639 }
640 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
641 else dst[x] += (sum + (1<<shift)/2)>>shift;
642 }
643 }
644
645 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
646 int x, y, i;
647 for(y=start; y<height; y+=2){
648 for(x=0; x<width; x++){
649 int64_t sum=0;
650
651 for(i=0; i<n; i++){
652 int y2= y + 2*i - n + 1;
653 if (y2< 0) y2= -y2;
654 else if(y2>=height) y2= 2*height-y2-2;
655 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
656 }
657 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
658 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
659 }
660 }
661 }
662
663 #define SCALEX 1
664 #define LX0 0
665 #define LX1 1
666
667 #if 0 // more accurate 9/7
668 #define N1 2
669 #define SHIFT1 14
670 #define COEFFS1 (int[]){-25987,-25987}
671 #define N2 2
672 #define SHIFT2 19
673 #define COEFFS2 (int[]){-27777,-27777}
674 #define N3 2
675 #define SHIFT3 15
676 #define COEFFS3 (int[]){28931,28931}
677 #define N4 2
678 #define SHIFT4 15
679 #define COEFFS4 (int[]){14533,14533}
680 #elif 1 // 13/7 CRF
681 #define N1 4
682 #define SHIFT1 4
683 #define COEFFS1 (int[]){1,-9,-9,1}
684 #define N2 4
685 #define SHIFT2 4
686 #define COEFFS2 (int[]){-1,5,5,-1}
687 #define N3 0
688 #define SHIFT3 1
689 #define COEFFS3 NULL
690 #define N4 0
691 #define SHIFT4 1
692 #define COEFFS4 NULL
693 #elif 1 // 3/5
694 #define LX0 1
695 #define LX1 0
696 #define SCALEX 0.5
697 #define N1 2
698 #define SHIFT1 1
699 #define COEFFS1 (int[]){1,1}
700 #define N2 2
701 #define SHIFT2 2
702 #define COEFFS2 (int[]){-1,-1}
703 #define N3 0
704 #define SHIFT3 0
705 #define COEFFS3 NULL
706 #define N4 0
707 #define SHIFT4 0
708 #define COEFFS4 NULL
709 #elif 1 // 11/5
710 #define N1 0
711 #define SHIFT1 1
712 #define COEFFS1 NULL
713 #define N2 2
714 #define SHIFT2 2
715 #define COEFFS2 (int[]){-1,-1}
716 #define N3 2
717 #define SHIFT3 0
718 #define COEFFS3 (int[]){-1,-1}
719 #define N4 4
720 #define SHIFT4 7
721 #define COEFFS4 (int[]){-5,29,29,-5}
722 #define SCALEX 4
723 #elif 1 // 9/7 CDF
724 #define N1 2
725 #define SHIFT1 7
726 #define COEFFS1 (int[]){-203,-203}
727 #define N2 2
728 #define SHIFT2 12
729 #define COEFFS2 (int[]){-217,-217}
730 #define N3 2
731 #define SHIFT3 7
732 #define COEFFS3 (int[]){113,113}
733 #define N4 2
734 #define SHIFT4 9
735 #define COEFFS4 (int[]){227,227}
736 #define SCALEX 1
737 #elif 1 // 7/5 CDF
738 #define N1 0
739 #define SHIFT1 1
740 #define COEFFS1 NULL
741 #define N2 2
742 #define SHIFT2 2
743 #define COEFFS2 (int[]){-1,-1}
744 #define N3 2
745 #define SHIFT3 0
746 #define COEFFS3 (int[]){-1,-1}
747 #define N4 2
748 #define SHIFT4 4
749 #define COEFFS4 (int[]){3,3}
750 #elif 1 // 9/7 MN
751 #define N1 4
752 #define SHIFT1 4
753 #define COEFFS1 (int[]){1,-9,-9,1}
754 #define N2 2
755 #define SHIFT2 2
756 #define COEFFS2 (int[]){1,1}
757 #define N3 0
758 #define SHIFT3 1
759 #define COEFFS3 NULL
760 #define N4 0
761 #define SHIFT4 1
762 #define COEFFS4 NULL
763 #else // 13/7 CRF
764 #define N1 4
765 #define SHIFT1 4
766 #define COEFFS1 (int[]){1,-9,-9,1}
767 #define N2 4
768 #define SHIFT2 4
769 #define COEFFS2 (int[]){-1,5,5,-1}
770 #define N3 0
771 #define SHIFT3 1
772 #define COEFFS3 NULL
773 #define N4 0
774 #define SHIFT4 1
775 #define COEFFS4 NULL
776 #endif
777 static void horizontal_decomposeX(DWTELEM *b, int width){
778 DWTELEM temp[width];
779 const int width2= width>>1;
780 const int w2= (width+1)>>1;
781 int A1,A2,A3,A4, x;
782
783 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
784 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
785 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
786 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
787
788 for(x=0; x<width2; x++){
789 temp[x ]= b[2*x ];
790 temp[x+w2]= b[2*x + 1];
791 }
792 if(width&1)
793 temp[x ]= b[2*x ];
794 memcpy(b, temp, width*sizeof(int));
795 }
796
797 static void horizontal_composeX(DWTELEM *b, int width){
798 DWTELEM temp[width];
799 const int width2= width>>1;
800 int A1,A2,A3,A4, x;
801 const int w2= (width+1)>>1;
802
803 memcpy(temp, b, width*sizeof(int));
804 for(x=0; x<width2; x++){
805 b[2*x ]= temp[x ];
806 b[2*x + 1]= temp[x+w2];
807 }
808 if(width&1)
809 b[2*x ]= temp[x ];
810
811 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
812 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
813 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
814 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
815 }
816
817 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
818 int x, y;
819
820 for(y=0; y<height; y++){
821 for(x=0; x<width; x++){
822 buffer[y*stride + x] *= SCALEX;
823 }
824 }
825
826 for(y=0; y<height; y++){
827 horizontal_decomposeX(buffer + y*stride, width);
828 }
829
830 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
831 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
832 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
833 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
834 }
835
836 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
837 int x, y;
838
839 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
840 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
841 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
842 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
843
844 for(y=0; y<height; y++){
845 horizontal_composeX(buffer + y*stride, width);
846 }
847
848 for(y=0; y<height; y++){
849 for(x=0; x<width; x++){
850 buffer[y*stride + x] /= SCALEX;
851 }
852 }
853 }
854
855 static void horizontal_decompose53i(DWTELEM *b, int width){
856 DWTELEM temp[width];
857 const int width2= width>>1;
858 int A1,A2,A3,A4, x;
859 const int w2= (width+1)>>1;
860
861 for(x=0; x<width2; x++){
862 temp[x ]= b[2*x ];
863 temp[x+w2]= b[2*x + 1];
864 }
865 if(width&1)
866 temp[x ]= b[2*x ];
867 #if 0
868 A2= temp[1 ];
869 A4= temp[0 ];
870 A1= temp[0+width2];
871 A1 -= (A2 + A4)>>1;
872 A4 += (A1 + 1)>>1;
873 b[0+width2] = A1;
874 b[0 ] = A4;
875 for(x=1; x+1<width2; x+=2){
876 A3= temp[x+width2];
877 A4= temp[x+1 ];
878 A3 -= (A2 + A4)>>1;
879 A2 += (A1 + A3 + 2)>>2;
880 b[x+width2] = A3;
881 b[x ] = A2;
882
883 A1= temp[x+1+width2];
884 A2= temp[x+2 ];
885 A1 -= (A2 + A4)>>1;
886 A4 += (A1 + A3 + 2)>>2;
887 b[x+1+width2] = A1;
888 b[x+1 ] = A4;
889 }
890 A3= temp[width-1];
891 A3 -= A2;
892 A2 += (A1 + A3 + 2)>>2;
893 b[width -1] = A3;
894 b[width2-1] = A2;
895 #else
896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
898 #endif
899 }
900
901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
902 int i;
903
904 for(i=0; i<width; i++){
905 b1[i] -= (b0[i] + b2[i])>>1;
906 }
907 }
908
909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
910 int i;
911
912 for(i=0; i<width; i++){
913 b1[i] += (b0[i] + b2[i] + 2)>>2;
914 }
915 }
916
917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
918 int y;
919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
921
922 for(y=-2; y<height; y+=2){
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
925
926 {START_TIMER
927 if(b1 <= b3) horizontal_decompose53i(b2, width);
928 if(y+2 < height) horizontal_decompose53i(b3, width);
929 STOP_TIMER("horizontal_decompose53i")}
930
931 {START_TIMER
932 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
933 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
934 STOP_TIMER("vertical_decompose53i*")}
935
936 b0=b2;
937 b1=b3;
938 }
939 }
940
941 #define lift5 lift
942 #if 1
943 #define W_AM 3
944 #define W_AO 0
945 #define W_AS 1
946
947 #define W_BM 1
948 #define W_BO 8
949 #define W_BS 4
950
951 #undef lift5
952 #define W_CM 9999
953 #define W_CO 2
954 #define W_CS 2
955
956 #define W_DM 15
957 #define W_DO 16
958 #define W_DS 5
959 #elif 0
960 #define W_AM 55
961 #define W_AO 16
962 #define W_AS 5
963
964 #define W_BM 3
965 #define W_BO 32
966 #define W_BS 6
967
968 #define W_CM 127
969 #define W_CO 64
970 #define W_CS 7
971
972 #define W_DM 7
973 #define W_DO 8
974 #define W_DS 4
975 #elif 0
976 #define W_AM 97
977 #define W_AO 32
978 #define W_AS 6
979
980 #define W_BM 63
981 #define W_BO 512
982 #define W_BS 10
983
984 #define W_CM 13
985 #define W_CO 8
986 #define W_CS 4
987
988 #define W_DM 15
989 #define W_DO 16
990 #define W_DS 5
991
992 #else
993
994 #define W_AM 203
995 #define W_AO 64
996 #define W_AS 7
997
998 #define W_BM 217
999 #define W_BO 2048
1000 #define W_BS 12
1001
1002 #define W_CM 113
1003 #define W_CO 64
1004 #define W_CS 7
1005
1006 #define W_DM 227
1007 #define W_DO 128
1008 #define W_DS 9
1009 #endif
1010 static void horizontal_decompose97i(DWTELEM *b, int width){
1011 DWTELEM temp[width];
1012 const int w2= (width+1)>>1;
1013
1014 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1015 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1016 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1017 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1018 }
1019
1020
1021 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1022 int i;
1023
1024 for(i=0; i<width; i++){
1025 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1026 }
1027 }
1028
1029 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1030 int i;
1031
1032 for(i=0; i<width; i++){
1033 #ifdef lift5
1034 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1035 #else
1036 int r= 3*(b0[i] + b2[i]);
1037 r+= r>>4;
1038 r+= r>>8;
1039 b1[i] += (r+W_CO)>>W_CS;
1040 #endif
1041 }
1042 }
1043
1044 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1045 int i;
1046
1047 for(i=0; i<width; i++){
1048 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1049 }
1050 }
1051
1052 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1053 int i;
1054
1055 for(i=0; i<width; i++){
1056 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1057 }
1058 }
1059
1060 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1061 int y;
1062 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1063 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1064 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1065 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1066
1067 for(y=-4; y<height; y+=2){
1068 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1069 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1070
1071 {START_TIMER
1072 if(b3 <= b5) horizontal_decompose97i(b4, width);
1073 if(y+4 < height) horizontal_decompose97i(b5, width);
1074 if(width>400){
1075 STOP_TIMER("horizontal_decompose97i")
1076 }}
1077
1078 {START_TIMER
1079 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1080 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1081 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1082 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1083
1084 if(width>400){
1085 STOP_TIMER("vertical_decompose97i")
1086 }}
1087
1088 b0=b2;
1089 b1=b3;
1090 b2=b4;
1091 b3=b5;
1092 }
1093 }
1094
1095 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1096 int level;
1097
1098 for(level=0; level<decomposition_count; level++){
1099 switch(type){
1100 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1101 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1102 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1103 }
1104 }
1105 }
1106
1107 static void horizontal_compose53i(DWTELEM *b, int width){
1108 DWTELEM temp[width];
1109 const int width2= width>>1;
1110 const int w2= (width+1)>>1;
1111 int A1,A2,A3,A4, x;
1112
1113 #if 0
1114 A2= temp[1 ];
1115 A4= temp[0 ];
1116 A1= temp[0+width2];
1117 A1 -= (A2 + A4)>>1;
1118 A4 += (A1 + 1)>>1;
1119 b[0+width2] = A1;
1120 b[0 ] = A4;
1121 for(x=1; x+1<width2; x+=2){
1122 A3= temp[x+width2];
1123 A4= temp[x+1 ];
1124 A3 -= (A2 + A4)>>1;
1125 A2 += (A1 + A3 + 2)>>2;
1126 b[x+width2] = A3;
1127 b[x ] = A2;
1128
1129 A1= temp[x+1+width2];
1130 A2= temp[x+2 ];
1131 A1 -= (A2 + A4)>>1;
1132 A4 += (A1 + A3 + 2)>>2;
1133 b[x+1+width2] = A1;
1134 b[x+1 ] = A4;
1135 }
1136 A3= temp[width-1];
1137 A3 -= A2;
1138 A2 += (A1 + A3 + 2)>>2;
1139 b[width -1] = A3;
1140 b[width2-1] = A2;
1141 #else
1142 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1143 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1144 #endif
1145 for(x=0; x<width2; x++){
1146 b[2*x ]= temp[x ];
1147 b[2*x + 1]= temp[x+w2];
1148 }
1149 if(width&1)
1150 b[2*x ]= temp[x ];
1151 }
1152
1153 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1154 int i;
1155
1156 for(i=0; i<width; i++){
1157 b1[i] += (b0[i] + b2[i])>>1;
1158 }
1159 }
1160
1161 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1162 int i;
1163
1164 for(i=0; i<width; i++){
1165 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1166 }
1167 }
1168
1169 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1170 int y;
1171 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1172 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride;
1173
1174 for(y=-1; y<=height; y+=2){
1175 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1176 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1177
1178 {START_TIMER
1179 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1180 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1181 STOP_TIMER("vertical_compose53i*")}
1182
1183 {START_TIMER
1184 if(y-1 >= 0) horizontal_compose53i(b0, width);
1185 if(b0 <= b2) horizontal_compose53i(b1, width);
1186 STOP_TIMER("horizontal_compose53i")}
1187
1188 b0=b2;
1189 b1=b3;
1190 }
1191 }
1192
1193
1194 static void horizontal_compose97i(DWTELEM *b, int width){
1195 DWTELEM temp[width];
1196 const int w2= (width+1)>>1;
1197
1198 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1199 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1200 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1201 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1202 }
1203
1204 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1205 int i;
1206
1207 for(i=0; i<width; i++){
1208 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1209 }
1210 }
1211
1212 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1213 int i;
1214
1215 for(i=0; i<width; i++){
1216 #ifdef lift5
1217 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1218 #else
1219 int r= 3*(b0[i] + b2[i]);
1220 r+= r>>4;
1221 r+= r>>8;
1222 b1[i] -= (r+W_CO)>>W_CS;
1223 #endif
1224 }
1225 }
1226
1227 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1228 int i;
1229
1230 for(i=0; i<width; i++){
1231 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1232 }
1233 }
1234
1235 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1236 int i;
1237
1238 for(i=0; i<width; i++){
1239 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1240 }
1241 }
1242
1243 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1244 int y;
1245 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1246 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride;
1247 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1248 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1249
1250 for(y=-3; y<=height; y+=2){
1251 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1252 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1253
1254 if(stride == width && y+4 < height && 0){
1255 int x;
1256 for(x=0; x<width/2; x++)
1257 b5[x] += 64*2;
1258 for(; x<width; x++)
1259 b5[x] += 169*2;
1260 }
1261
1262 {START_TIMER
1263 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1264 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1265 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1266 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1267 if(width>400){
1268 STOP_TIMER("vertical_compose97i")}}
1269
1270 {START_TIMER
1271 if(y-1>= 0) horizontal_compose97i(b0, width);
1272 if(b0 <= b2) horizontal_compose97i(b1, width);
1273 if(width>400 && b0 <= b2){
1274 STOP_TIMER("horizontal_compose97i")}}
1275
1276 b0=b2;
1277 b1=b3;
1278 b2=b4;
1279 b3=b5;
1280 }
1281 }
1282
1283 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1284 int level;
1285
1286 for(level=decomposition_count-1; level>=0; level--){
1287 switch(type){
1288 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1289 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1290 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break;
1291 }
1292 }
1293 }
1294
1295 static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1296 const int w= b->width;
1297 const int h= b->height;
1298 int x, y;
1299
1300 if(1){
1301 int run=0;
1302 int runs[w*h];
1303 int run_index=0;
1304
1305 for(y=0; y<h; y++){
1306 for(x=0; x<w; x++){
1307 int v, p=0;
1308 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1309 v= src[x + y*stride];
1310
1311 if(y){
1312 t= src[x + (y-1)*stride];
1313 if(x){
1314 lt= src[x - 1 + (y-1)*stride];
1315 }
1316 if(x + 1 < w){
1317 rt= src[x + 1 + (y-1)*stride];
1318 }
1319 }
1320 if(x){
1321 l= src[x - 1 + y*stride];
1322 /*if(x > 1){
1323 if(orientation==1) ll= src[y + (x-2)*stride];
1324 else ll= src[x - 2 + y*stride];
1325 }*/
1326 }
1327 if(parent){
1328 int px= x>>1;
1329 int py= y>>1;
1330 if(px<b->parent->width && py<b->parent->height)
1331 p= parent[px + py*2*stride];
1332 }
1333 if(!(/*ll|*/l|lt|t|rt|p)){
1334 if(v){
1335 runs[run_index++]= run;
1336 run=0;
1337 }else{
1338 run++;
1339 }
1340 }
1341 }
1342 }
1343 runs[run_index++]= run;
1344 run_index=0;
1345 run= runs[run_index++];
1346
1347 put_symbol2(&s->c, b->state[1], run, 3);
1348
1349 for(y=0; y<h; y++){
1350 for(x=0; x<w; x++){
1351 int v, p=0;
1352 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1353 v= src[x + y*stride];
1354
1355 if(y){
1356 t= src[x + (y-1)*stride];
1357 if(x){
1358 lt= src[x - 1 + (y-1)*stride];
1359 }
1360 if(x + 1 < w){
1361 rt= src[x + 1 + (y-1)*stride];
1362 }
1363 }
1364 if(x){
1365 l= src[x - 1 + y*stride];
1366 /*if(x > 1){
1367 if(orientation==1) ll= src[y + (x-2)*stride];
1368 else ll= src[x - 2 + y*stride];
1369 }*/
1370 }
1371 if(parent){
1372 int px= x>>1;
1373 int py= y>>1;
1374 if(px<b->parent->width && py<b->parent->height)
1375 p= parent[px + py*2*stride];
1376 }
1377 if(/*ll|*/l|lt|t|rt|p){
1378 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1379
1380 put_rac(&s->c, &b->state[0][context], !!v);
1381 }else{
1382 if(!run){
1383 run= runs[run_index++];
1384
1385 put_symbol2(&s->c, b->state[1], run, 3);
1386 assert(v);
1387 }else{
1388 run--;
1389 assert(!v);
1390 }
1391 }
1392 if(v){
1393 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1394
1395 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1396 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1397 }
1398 }
1399 }
1400 }
1401 }
1402
1403 static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1404 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1405 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1406 encode_subband_c0run(s, b, src, parent, stride, orientation);
1407 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1408 }
1409
1410 static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1411 const int w= b->width;
1412 const int h= b->height;
1413 int x,y;
1414 const int qlog= clip(s->qlog + b->qlog, 0, 128);
1415 int qmul= qexp[qlog&7]<<(qlog>>3);
1416 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1417
1418 START_TIMER
1419
1420 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1421 qadd= 0;
1422 qmul= 1<<QEXPSHIFT;
1423 }
1424
1425 if(1){
1426 int run;
1427 int index=0;
1428 int prev_index=-1;
1429 int prev2_index=0;
1430 int parent_index= 0;
1431 int prev_parent_index= 0;
1432
1433 for(y=0; y<b->height; y++)
1434 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1435
1436 run= get_symbol2(&s->c, b->state[1], 3);
1437 for(y=0; y<h; y++){
1438 int v=0;
1439 int lt=0, t=0, rt=0;
1440
1441 if(y && b->x[prev_index] == 0){
1442 rt= b->coeff[prev_index];
1443 }
1444 for(x=0; x<w; x++){
1445 int p=0;
1446 const int l= v;
1447
1448 lt= t; t= rt;
1449
1450 if(y){
1451 if(b->x[prev_index] <= x)
1452 prev_index++;
1453 if(b->x[prev_index] == x + 1)
1454 rt= b->coeff[prev_index];
1455 else
1456 rt=0;
1457 }
1458 if(parent){
1459 if(x>>1 > b->parent->x[parent_index]){
1460 parent_index++;
1461 }
1462 if(x>>1 == b->parent->x[parent_index]){
1463 p= b->parent->coeff[parent_index];
1464 }
1465 }
1466 if(/*ll|*/l|lt|t|rt|p){
1467 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1468
1469 v=get_rac(&s->c, &b->state[0][context]);
1470 }else{
1471 if(!run){
1472 run= get_symbol2(&s->c, b->state[1], 3);
1473 v=1;
1474 }else{
1475 run--;
1476 v=0;
1477
1478 if(y && parent){
1479 int max_run;
1480
1481 max_run= FFMIN(run, b->x[prev_index] - x - 2);
1482 max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1483 x+= max_run;
1484 run-= max_run;
1485 }
1486 }
1487 }
1488 if(v){
1489 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1490 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1491 if(get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1492 src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
1493 v= -v;
1494 }else{
1495 src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1496 }
1497 b->x[index]=x; //FIXME interleave x/coeff
1498 b->coeff[index++]= v;
1499 }
1500 }
1501 b->x[index++]= w+1; //end marker
1502 prev_index= prev2_index;
1503 prev2_index= index;
1504
1505 if(parent){
1506 while(b->parent->x[parent_index] != b->parent->width+1)
1507 parent_index++;
1508 parent_index++;
1509 if(y&1){
1510 prev_parent_index= parent_index;
1511 }else{
1512 parent_index= prev_parent_index;
1513 }
1514 }
1515 }
1516 b->x[index++]= w+1; //end marker
1517 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
1518 STOP_TIMER("decode_subband")
1519 }
1520
1521 return;
1522 }
1523 }
1524
1525 static void reset_contexts(SnowContext *s){
1526 int plane_index, level, orientation;
1527
1528 for(plane_index=0; plane_index<3; plane_index++){
1529 for(level=0; level<s->spatial_decomposition_count; level++){
1530 for(orientation=level ? 1:0; orientation<4; orientation++){
1531 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1532 }
1533 }
1534 }
1535 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1536 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1537 }
1538
1539 static int alloc_blocks(SnowContext *s){
1540 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1541 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1542
1543 s->b_width = w;
1544 s->b_height= h;
1545
1546 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1547 return 0;
1548 }
1549
1550 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1551 uint8_t *bytestream= d->bytestream;
1552 uint8_t *bytestream_start= d->bytestream_start;
1553 *d= *s;
1554 d->bytestream= bytestream;
1555 d->bytestream_start= bytestream_start;
1556 }
1557
1558 //near copy & paste from dsputil, FIXME
1559 static int pix_sum(uint8_t * pix, int line_size, int w)
1560 {
1561 int s, i, j;
1562
1563 s = 0;
1564 for (i = 0; i < w; i++) {
1565 for (j = 0; j < w; j++) {
1566 s += pix[0];
1567 pix ++;
1568 }
1569 pix += line_size - w;
1570 }
1571 return s;
1572 }
1573
1574 //near copy & paste from dsputil, FIXME
1575 static int pix_norm1(uint8_t * pix, int line_size, int w)
1576 {
1577 int s, i, j;
1578 uint32_t *sq = squareTbl + 256;
1579
1580 s = 0;
1581 for (i = 0; i < w; i++) {
1582 for (j = 0; j < w; j ++) {
1583 s += sq[pix[0]];
1584 pix ++;
1585 }
1586 pix += line_size - w;
1587 }
1588 return s;
1589 }
1590
1591 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1592 const int w= s->b_width << s->block_max_depth;
1593 const int rem_depth= s->block_max_depth - level;
1594 const int index= (x + y*w) << rem_depth;
1595 const int block_w= 1<<rem_depth;
1596 BlockNode block;
1597 int i,j;
1598
1599 block.color[0]= l;
1600 block.color[1]= cb;
1601 block.color[2]= cr;
1602 block.mx= mx;
1603 block.my= my;
1604 block.type= type;
1605 block.level= level;
1606
1607 for(j=0; j<block_w; j++){
1608 for(i=0; i<block_w; i++){
1609 s->block[index + i + j*w]= block;
1610 }
1611 }
1612 }
1613
1614 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1615 const int offset[3]= {
1616 y*c-> stride + x,
1617 ((y*c->uvstride + x)>>1),
1618 ((y*c->uvstride + x)>>1),
1619 };
1620 int i;
1621 for(i=0; i<3; i++){
1622 c->src[0][i]= src [i];
1623 c->ref[0][i]= ref [i] + offset[i];
1624 }
1625 assert(!ref_index);
1626 }
1627
1628 //FIXME copy&paste
1629 #define P_LEFT P[1]
1630 #define P_TOP P[2]
1631 #define P_TOPRIGHT P[3]
1632 #define P_MEDIAN P[4]
1633 #define P_MV1 P[9]
1634 #define FLAG_QPEL 1 //must be 1
1635
1636 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1637 uint8_t p_buffer[1024];
1638 uint8_t i_buffer[1024];
1639 uint8_t p_state[sizeof(s->block_state)];
1640 uint8_t i_state[sizeof(s->block_state)];
1641 RangeCoder pc, ic;
1642 uint8_t *pbbak= s->c.bytestream;
1643 uint8_t *pbbak_start= s->c.bytestream_start;
1644 int score, score2, iscore, i_len, p_len, block_s, sum;
1645 const int w= s->b_width << s->block_max_depth;
1646 const int h= s->b_height << s->block_max_depth;
1647 const int rem_depth= s->block_max_depth - level;
1648 const int index= (x + y*w) << rem_depth;
1649 const int block_w= 1<<(LOG2_MB_SIZE - level);
1650 static BlockNode null_block= { //FIXME add border maybe
1651 .color= {128,128,128},
1652 .mx= 0,
1653 .my= 0,
1654 .type= 0,
1655 .level= 0,
1656 };
1657 int trx= (x+1)<<rem_depth;
1658 int try= (y+1)<<rem_depth;
1659 BlockNode *left = x ? &s->block[index-1] : &null_block;
1660 BlockNode *top = y ? &s->block[index-w] : &null_block;
1661 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1662 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1663 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1664 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1665 int pl = left->color[0];
1666 int pcb= left->color[1];
1667 int pcr= left->color[2];
1668 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1669 int pmy= mid_pred(left->my, top->my, tr->my);
1670 int mx=0, my=0;
1671 int l,cr,cb, i;
1672 const int stride= s->current_picture.linesize[0];
1673 const int uvstride= s->current_picture.linesize[1];
1674 const int instride= s->input_picture.linesize[0];
1675 const int uvinstride= s->input_picture.linesize[1];
1676 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
1677 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1678 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1679 uint8_t current_mb[3][stride*block_w];
1680 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1681 int P[10][2];
1682 int16_t last_mv[3][2];
1683 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1684 const int shift= 1+qpel;
1685 MotionEstContext *c= &s->m.me;
1686 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1687 int my_context= av_log2(2*ABS(left->my - top->my));
1688 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1689
1690 assert(sizeof(s->block_state) >= 256);
1691 if(s->keyframe){
1692 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1693 return 0;
1694 }
1695
1696 //FIXME optimize
1697 for(i=0; i<block_w; i++)
1698 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
1699 for(i=0; i<block_w>>1; i++)
1700 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1701 for(i=0; i<block_w>>1; i++)
1702 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1703
1704 // clip predictors / edge ?
1705
1706 P_LEFT[0]= left->mx;
1707 P_LEFT[1]= left->my;
1708 P_TOP [0]= top->mx;
1709 P_TOP [1]= top->my;
1710 P_TOPRIGHT[0]= tr->mx;
1711 P_TOPRIGHT[1]= tr->my;
1712
1713 last_mv[0][0]= s->block[index].mx;
1714 last_mv[0][1]= s->block[index].my;
1715 last_mv[1][0]= right->mx;
1716 last_mv[1][1]= right->my;
1717 last_mv[2][0]= bottom->mx;
1718 last_mv[2][1]= bottom->my;
1719
1720 s->m.mb_stride=2;
1721 s->m.mb_x=
1722 s->m.mb_y= 0;
1723 s->m.me.skip= 0;
1724
1725 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1726
1727 assert(s->m.me. stride == stride);
1728 assert(s->m.me.uvstride == uvstride);
1729
1730 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1731 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1732 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1733 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1734
1735 c->xmin = - x*block_w - 16+2;
1736 c->ymin = - y*block_w - 16+2;
1737 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1738 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1739
1740 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1741 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1742 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1743 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1744 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1745 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1746 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1747
1748 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1749 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1750
1751 if (!y) {
1752 c->pred_x= P_LEFT[0];
1753 c->pred_y= P_LEFT[1];
1754 } else {
1755 c->pred_x = P_MEDIAN[0];
1756 c->pred_y = P_MEDIAN[1];
1757 }
1758
1759 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
1760 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1761
1762 assert(mx >= c->xmin);
1763 assert(mx <= c->xmax);
1764 assert(my >= c->ymin);
1765 assert(my <= c->ymax);
1766
1767 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1768 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1769 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1770
1771 // subpel search
1772 pc= s->c;
1773 pc.bytestream_start=
1774 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1775 memcpy(p_state, s->block_state, sizeof(s->block_state));
1776
1777 if(level!=s->block_max_depth)
1778 put_rac(&pc, &p_state[4 + s_context], 1);
1779 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1780 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1781 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1782 p_len= pc.bytestream - pc.bytestream_start;
1783 score += (s->lambda2*(p_len*8
1784 + (pc.outstanding_count - s->c.outstanding_count)*8
1785 + (-av_log2(pc.range) + av_log2(s->c.range))
1786 ))>>FF_LAMBDA_SHIFT;
1787
1788 block_s= block_w*block_w;
1789 sum = pix_sum(&current_mb[0][0], stride, block_w);
1790 l= (sum + block_s/2)/block_s;
1791 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1792
1793 block_s= block_w*block_w>>2;
1794 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1795 cb= (sum + block_s/2)/block_s;
1796 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1797 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1798 cr= (sum + block_s/2)/block_s;
1799 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1800
1801 ic= s->c;
1802 ic.bytestream_start=
1803 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1804 memcpy(i_state, s->block_state, sizeof(s->block_state));
1805 if(level!=s->block_max_depth)
1806 put_rac(&ic, &i_state[4 + s_context], 1);
1807 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1808 put_symbol(&ic, &i_state[32], l-pl , 1);
1809 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1810 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1811 i_len= ic.bytestream - ic.bytestream_start;
1812 iscore += (s->lambda2*(i_len*8
1813 + (ic.outstanding_count - s->c.outstanding_count)*8
1814 + (-av_log2(ic.range) + av_log2(s->c.range))
1815 ))>>FF_LAMBDA_SHIFT;
1816
1817 // assert(score==256*256*256*64-1);
1818 assert(iscore < 255*255*256 + s->lambda2*10);
1819 assert(iscore >= 0);
1820 assert(l>=0 && l<=255);
1821 assert(pl>=0 && pl<=255);
1822
1823 if(level==0){
1824 int varc= iscore >> 8;
1825 int vard= score >> 8;
1826 if (vard <= 64 || vard < varc)
1827 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1828 else
1829 c->scene_change_score+= s->m.qscale;
1830 }
1831
1832 if(level!=s->block_max_depth){
1833 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1834 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1835 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1836 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1837 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1838 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1839
1840 if(score2 < score && score2 < iscore)
1841 return score2;
1842 }
1843
1844 if(iscore < score){
1845 memcpy(pbbak, i_buffer, i_len);
1846 s->c= ic;
1847 s->c.bytestream_start= pbbak_start;
1848 s->c.bytestream= pbbak + i_len;
1849 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1850 memcpy(s->block_state, i_state, sizeof(s->block_state));
1851 return iscore;
1852 }else{
1853 memcpy(pbbak, p_buffer, p_len);
1854 s->c= pc;
1855 s->c.bytestream_start= pbbak_start;
1856 s->c.bytestream= pbbak + p_len;
1857 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1858 memcpy(s->block_state, p_state, sizeof(s->block_state));
1859 return score;
1860 }
1861 }
1862
1863 static void decode_q_branch(SnowContext *s, int level, int x, int y){
1864 const int w= s->b_width << s->block_max_depth;
1865 const int rem_depth= s->block_max_depth - level;
1866 const int index= (x + y*w) << rem_depth;
1867 static BlockNode null_block= { //FIXME add border maybe
1868 .color= {128,128,128},
1869 .mx= 0,
1870 .my= 0,
1871 .type= 0,
1872 .level= 0,
1873 };
1874 int trx= (x+1)<<rem_depth;
1875 BlockNode *left = x ? &s->block[index-1] : &null_block;
1876 BlockNode *top = y ? &s->block[index-w] : &null_block;
1877 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1878 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1879 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1880
1881 if(s->keyframe){
1882 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1883 return;
1884 }
1885
1886 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1887 int type;
1888 int l = left->color[0];
1889 int cb= left->color[1];
1890 int cr= left->color[2];
1891 int mx= mid_pred(left->mx, top->mx, tr->mx);
1892 int my= mid_pred(left->my, top->my, tr->my);
1893 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1894 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1895
1896 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1897
1898 if(type){
1899 l += get_symbol(&s->c, &s->block_state[32], 1);
1900 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1901 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1902 }else{
1903 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1904 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1905 }
1906 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1907 }else{
1908 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1909 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1910 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1911 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1912 }
1913 }
1914
1915 static void encode_blocks(SnowContext *s){
1916 int x, y;
1917 int w= s->b_width;
1918 int h= s->b_height;
1919
1920 for(y=0; y<h; y++){
1921 for(x=0; x<w; x++){
1922 encode_q_branch(s, 0, x, y);
1923 }
1924 }
1925 }
1926
1927 static void decode_blocks(SnowContext *s){
1928 int x, y;
1929 int w= s->b_width;
1930 int h= s->b_height;
1931
1932 for(y=0; y<h; y++){
1933 for(x=0; x<w; x++){
1934 decode_q_branch(s, 0, x, y);
1935 }
1936 }
1937 }
1938
1939 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1940 int x, y;
1941 START_TIMER
1942 for(y=0; y < b_h+5; y++){
1943 for(x=0; x < b_w; x++){
1944 int a0= src[x ];
1945 int a1= src[x + 1];
1946 int a2= src[x + 2];
1947 int a3= src[x + 3];
1948 int a4= src[x + 4];
1949 int a5= src[x + 5];
1950 // int am= 9*(a1+a2) - (a0+a3);
1951 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1952 // int am= 18*(a2+a3) - 2*(a1+a4);
1953 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1954 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1955
1956 // if(b_w==16) am= 8*(a1+a2);
1957
1958 if(dx<8) tmp[x]= (32*a2*( 8-dx) + am* dx + 128)>>8;
1959 else tmp[x]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1960
1961 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
1962 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
1963 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
1964 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1965 }
1966 tmp += stride;
1967 src += stride;
1968 }
1969 tmp -= (b_h+5)*stride;
1970
1971 for(y=0; y < b_h; y++){
1972 for(x=0; x < b_w; x++){
1973 int a0= tmp[x + 0*stride];
1974 int a1= tmp[x + 1*stride];
1975 int a2= tmp[x + 2*stride];
1976 int a3= tmp[x + 3*stride];
1977 int a4= tmp[x + 4*stride];
1978 int a5= tmp[x + 5*stride];
1979 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1980 // int am= 18*(a2+a3) - 2*(a1+a4);
1981 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1982 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1983
1984 // if(b_w==16) am= 8*(a1+a2);
1985
1986 if(dy<8) dst[x]= (32*a2*( 8-dy) + am* dy + 128)>>8;
1987 else dst[x]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1988
1989 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
1990 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
1991 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
1992 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1993 }
1994 dst += stride;
1995 tmp += stride;
1996 }
1997 STOP_TIMER("mc_block")
1998 }
1999
2000 #define mca(dx,dy,b_w)\
2001 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2002 uint8_t tmp[stride*(b_w+5)];\
2003 assert(h==b_w);\
2004 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2005 }
2006
2007 mca( 0, 0,16)
2008 mca( 8, 0,16)
2009 mca( 0, 8,16)
2010 mca( 8, 8,16)
2011 mca( 0, 0,8)
2012 mca( 8, 0,8)
2013 mca( 0, 8,8)
2014 mca( 8, 8,8)
2015
2016 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2017 if(block->type){
2018 int x, y;
2019 const int color= block->color[plane_index];
2020 for(y=0; y < b_h; y++){
2021 for(x=0; x < b_w; x++){
2022 dst[x + y*stride]= color;
2023 }
2024 }
2025 }else{
2026 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2027 int mx= block->mx*scale;
2028 int my= block->my*scale;
2029 const int dx= mx&15;
2030 const int dy= my&15;
2031 sx += (mx>>4) - 2;
2032 sy += (my>>4) - 2;
2033 src += sx + sy*stride;
2034 if( (unsigned)sx >= w - b_w - 4
2035 || (unsigned)sy >= h - b_h - 4){
2036 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2037 src= tmp + MB_SIZE;
2038 }
2039 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2040 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2041 else
2042 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2043 }
2044 }
2045
2046 static always_inline int same_block(BlockNode *a, BlockNode *b){
2047 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2048 }
2049
2050 //FIXME name clenup (b_w, block_w, b_width stuff)
2051 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2052 const int b_width = s->b_width << s->block_max_depth;
2053 const int b_height= s->b_height << s->block_max_depth;
2054 const int b_stride= b_width;
2055 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2056 BlockNode *rt= lt+1;
2057 BlockNode *lb= lt+b_stride;
2058 BlockNode *rb= lb+1;
2059 uint8_t *block[4];
2060 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2061 int x,y;
2062
2063 if(b_x<0){
2064 lt= rt;
2065 lb= rb;
2066 }else if(b_x + 1 >= b_width){
2067 rt= lt;
2068 rb= lb;
2069 }
2070 if(b_y<0){
2071 lt= lb;
2072 rt= rb;
2073 }else if(b_y + 1 >= b_height){
2074 lb= lt;
2075 rb= rt;
2076 }
2077
2078 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2079 obmc -= src_x;
2080 b_w += src_x;
2081 src_x=0;
2082 }else if(src_x + b_w > w){
2083 b_w = w - src_x;
2084 }
2085 if(src_y<0){
2086 obmc -= src_y*obmc_stride;
2087 b_h += src_y;
2088 src_y=0;
2089 }else if(src_y + b_h> h){
2090 b_h = h - src_y;
2091 }
2092
2093 if(b_w<=0 || b_h<=0) return;
2094
2095 assert(src_stride > 7*MB_SIZE);
2096 dst += src_x + src_y*dst_stride;
2097 dst8+= src_x + src_y*src_stride;
2098 // src += src_x + src_y*src_stride;
2099
2100 block[0]= tmp+3*MB_SIZE;
2101 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2102
2103 if(same_block(lt, rt)){
2104 block[1]= block[0];
2105 }else{
2106 block[1]= tmp + 4*MB_SIZE;
2107 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2108 }
2109
2110 if(same_block(lt, lb)){
2111 block[2]= block[0];
2112 }else if(same_block(rt, lb)){
2113 block[2]= block[1];
2114 }else{
2115 block[2]= tmp+5*MB_SIZE;
2116 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2117 }
2118
2119 if(same_block(lt, rb) ){
2120 block[3]= block[0];
2121 }else if(same_block(rt, rb)){
2122 block[3]= block[1];
2123 }else if(same_block(lb, rb)){
2124 block[3]= block[2];
2125 }else{
2126 block[3]= tmp+6*MB_SIZE;
2127 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2128 }
2129 #if 0
2130 for(y=0; y<b_h; y++){
2131 for(x=0; x<b_w; x++){
2132 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2133 if(add) dst[x + y*dst_stride] += v;
2134 else dst[x + y*dst_stride] -= v;
2135 }
2136 }
2137 for(y=0; y<b_h; y++){
2138 uint8_t *obmc2= obmc + (obmc_stride>>1);
2139 for(x=0; x<b_w; x++){
2140 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2141 if(add) dst[x + y*dst_stride] += v;
2142 else dst[x + y*dst_stride] -= v;
2143 }
2144 }
2145 for(y=0; y<b_h; y++){
2146 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2147 for(x=0; x<b_w; x++){
2148 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2149 if(add) dst[x + y*dst_stride] += v;
2150 else dst[x + y*dst_stride] -= v;
2151 }
2152 }
2153 for(y=0; y<b_h; y++){
2154 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2155 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2156 for(x=0; x<b_w; x++){
2157 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2158 if(add) dst[x + y*dst_stride] += v;
2159 else dst[x + y*dst_stride] -= v;
2160 }
2161 }
2162 #else
2163 for(y=0; y<b_h; y++){
2164 //FIXME ugly missue of obmc_stride
2165 uint8_t *obmc1= obmc + y*obmc_stride;
2166 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2167 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2168 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2169 for(x=0; x<b_w; x++){
2170 int v= obmc1[x] * block[3][x + y*src_stride]
2171 +obmc2[x] * block[2][x + y*src_stride]
2172 +obmc3[x] * block[1][x + y*src_stride]
2173 +obmc4[x] * block[0][x + y*src_stride];
2174
2175 v <<= 8 - LOG2_OBMC_MAX;
2176 if(FRAC_BITS != 8){
2177 v += 1<<(7 - FRAC_BITS);
2178 v >>= 8 - FRAC_BITS;
2179 }
2180 if(add){
2181 v += dst[x + y*dst_stride];
2182 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2183 if(v&(~255)) v= ~(v>>31);
2184 dst8[x + y*src_stride] = v;
2185 }else{
2186 dst[x + y*dst_stride] -= v;
2187 }
2188 }
2189 }
2190 #endif
2191 }
2192
2193 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2194 Plane *p= &s->plane[plane_index];
2195 const int mb_w= s->b_width << s->block_max_depth;
2196 const int mb_h= s->b_height << s->block_max_depth;
2197 int x, y, mb_x, mb_y;
2198 int block_size = MB_SIZE >> s->block_max_depth;
2199 int block_w = plane_index ? block_size/2 : block_size;
2200 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2201 int obmc_stride= plane_index ? block_size : 2*block_size;
2202 int ref_stride= s->current_picture.linesize[plane_index];
2203 uint8_t *ref = s->last_picture.data[plane_index];
2204 uint8_t *dst8= s->current_picture.data[plane_index];
2205 int w= p->width;
2206 int h= p->height;
2207 START_TIMER
2208
2209 if(s->keyframe || (s->avctx->debug&512)){
2210 if(add){
2211 for(y=0; y<h; y++){
2212 for(x=0; x<w; x++){
2213 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2214 v >>= FRAC_BITS;
2215 if(v&(~255)) v= ~(v>>31);
2216 dst8[x + y*ref_stride]= v;
2217 }
2218 }
2219 }else{
2220 for(y=0; y<h; y++){
2221 for(x=0; x<w; x++){
2222 buf[x + y*w]-= 128<<FRAC_BITS;
2223 }
2224 }
2225 }
2226
2227 return;
2228 }
2229
2230 for(mb_y=0; mb_y<=mb_h; mb_y++){
2231 for(mb_x=0; mb_x<=mb_w; mb_x++){
2232 START_TIMER
2233
2234 add_yblock(s, buf, dst8, ref, obmc,
2235 block_w*mb_x - block_w/2,
2236 block_w*mb_y - block_w/2,
2237 block_w, block_w,
2238 w, h,
2239 w, ref_stride, obmc_stride,
2240 mb_x - 1, mb_y - 1,
2241 add, plane_index);
2242
2243 STOP_TIMER("add_yblock")
2244 }
2245 }
2246
2247 STOP_TIMER("predict_plane")
2248 }
2249
2250 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2251 const int level= b->level;
2252 const int w= b->width;
2253 const int h= b->height;
2254 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2255 const int qmul= qexp[qlog&7]<<(qlog>>3);
2256 int x,y, thres1, thres2;
2257 START_TIMER
2258
2259 assert(QROOT==8);
2260
2261 if(s->qlog == LOSSLESS_QLOG) return;
2262
2263 bias= bias ? 0 : (3*qmul)>>3;
2264 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2265 thres2= 2*thres1;
2266
2267 if(!bias){
2268 for(y=0; y<h; y++){
2269 for(x=0; x<w; x++){
2270 int i= src[x + y*stride];
2271
2272 if((unsigned)(i+thres1) > thres2){
2273 if(i>=0){
2274 i<<= QEXPSHIFT;
2275 i/= qmul; //FIXME optimize
2276 src[x + y*stride]= i;
2277 }else{
2278 i= -i;
2279 i<<= QEXPSHIFT;
2280 i/= qmul; //FIXME optimize
2281 src[x + y*stride]= -i;
2282 }
2283 }else
2284 src[x + y*stride]= 0;
2285 }
2286 }
2287 }else{
2288 for(y=0; y<h; y++){
2289 for(x=0; x<w; x++){
2290 int i= src[x + y*stride];
2291
2292 if((unsigned)(i+thres1) > thres2){
2293 if(i>=0){
2294 i<<= QEXPSHIFT;
2295 i= (i + bias) / qmul; //FIXME optimize
2296 src[x + y*stride]= i;
2297 }else{
2298 i= -i;
2299 i<<= QEXPSHIFT;
2300 i= (i + bias) / qmul; //FIXME optimize
2301 src[x + y*stride]= -i;
2302 }
2303 }else
2304 src[x + y*stride]= 0;
2305 }
2306 }
2307 }
2308 if(level+1 == s->spatial_decomposition_count){
2309 // STOP_TIMER("quantize")
2310 }
2311 }
2312
2313 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2314 const int w= b->width;
2315 const int h= b->height;
2316 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2317 const int qmul= qexp[qlog&7]<<(qlog>>3);
2318 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2319 int x,y;
2320 START_TIMER
2321
2322 if(s->qlog == LOSSLESS_QLOG) return;
2323
2324 assert(QROOT==8);
2325
2326 for(y=0; y<h; y++){
2327 for(x=0; x<w; x++){
2328 int i= src[x + y*stride];
2329 if(i<0){
2330 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2331 }else if(i>0){
2332 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
2333 }
2334 }
2335 }
2336 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2337 STOP_TIMER("dquant")
2338 }
2339 }
2340
2341 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2342 const int w= b->width;
2343 const int h= b->height;
2344 int x,y;
2345
2346 for(y=h-1; y>=0; y--){
2347 for(x=w-1; x>=0; x--){
2348 int i= x + y*stride;
2349
2350 if(x){
2351 if(use_median){
2352 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2353 else src[i] -= src[i - 1];
2354 }else{
2355 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2356 else src[i] -= src[i - 1];
2357 }
2358 }else{
2359 if(y) src[i] -= src[i - stride];
2360 }
2361 }
2362 }
2363 }
2364
2365 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2366 const int w= b->width;