b35592055330ace13ba541a507be33baf72315bf
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 #include "avcodec.h"
20 #include "common.h"
21 #include "dsputil.h"
22
23 #include "rangecoder.h"
24 #define MID_STATE 128
25
26 #include "mpegvideo.h"
27
28 #undef NDEBUG
29 #include <assert.h>
30
31 #define MAX_DECOMPOSITIONS 8
32 #define MAX_PLANES 4
33 #define DWTELEM int
34 #define QSHIFT 3
35 #define QROOT (1<<QSHIFT)
36 #define LOSSLESS_QLOG -128
37 #define FRAC_BITS 8
38
39 static const int8_t quant3[256]={
40 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56 };
57 static const int8_t quant3b[256]={
58 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74 };
75 static const int8_t quant3bA[256]={
76 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92 };
93 static const int8_t quant5[256]={
94 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110 };
111 static const int8_t quant7[256]={
112 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128 };
129 static const int8_t quant9[256]={
130 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146 };
147 static const int8_t quant11[256]={
148 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164 };
165 static const int8_t quant13[256]={
166 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182 };
183
184 #define LOG2_OBMC_MAX 6
185 #define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186 #if 0 //64*cubic
187 static const uint8_t obmc32[1024]={
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 //error:0.000022
221 };
222 static const uint8_t obmc16[256]={
223 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239 //error:0.000033
240 };
241 #elif 1 // 64*linear
242 static const uint8_t obmc32[1024]={
243 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275 //error:0.000020
276 };
277 static const uint8_t obmc16[256]={
278 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294 //error:0.000015
295 };
296 #else //64*cos
297 static const uint8_t obmc32[1024]={
298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 //error:0.000022
331 };
332 static const uint8_t obmc16[256]={
333 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349 //error:0.000022
350 };
351 #endif
352
353 //linear *64
354 static const uint8_t obmc8[64]={
355 1, 3, 5, 7, 7, 5, 3, 1,
356 3, 9,15,21,21,15, 9, 3,
357 5,15,25,35,35,25,15, 5,
358 7,21,35,49,49,35,21, 7,
359 7,21,35,49,49,35,21, 7,
360 5,15,25,35,35,25,15, 5,
361 3, 9,15,21,21,15, 9, 3,
362 1, 3, 5, 7, 7, 5, 3, 1,
363 //error:0.000000
364 };
365
366 //linear *64
367 static const uint8_t obmc4[16]={
368 4,12,12, 4,
369 12,36,36,12,
370 12,36,36,12,
371 4,12,12, 4,
372 //error:0.000000
373 };
374
375 static const uint8_t *obmc_tab[4]={
376 obmc32, obmc16, obmc8, obmc4
377 };
378
379 typedef struct BlockNode{
380 int16_t mx;
381 int16_t my;
382 uint8_t color[3];
383 uint8_t type;
384 //#define TYPE_SPLIT 1
385 #define BLOCK_INTRA 1
386 //#define TYPE_NOCOLOR 4
387 uint8_t level; //FIXME merge into type?
388 }BlockNode;
389
390 #define LOG2_MB_SIZE 4
391 #define MB_SIZE (1<<LOG2_MB_SIZE)
392
393 typedef struct x_and_coeff{
394 int16_t x;
395 uint16_t coeff;
396 } x_and_coeff;
397
398 typedef struct SubBand{
399 int level;
400 int stride;
401 int width;
402 int height;
403 int qlog; ///< log(qscale)/log[2^(1/6)]
404 DWTELEM *buf;
405 int buf_x_offset;
406 int buf_y_offset;
407 int stride_line; ///< Stride measured in lines, not pixels.
408 x_and_coeff * x_coeff;
409 struct SubBand *parent;
410 uint8_t state[/*7*2*/ 7 + 512][32];
411 }SubBand;
412
413 typedef struct Plane{
414 int width;
415 int height;
416 SubBand band[MAX_DECOMPOSITIONS][4];
417 }Plane;
418
419 /** Used to minimize the amount of memory used in order to optimize cache performance. **/
420 typedef struct {
421 DWTELEM * * line; ///< For use by idwt and predict_slices.
422 DWTELEM * * data_stack; ///< Used for internal purposes.
423 int data_stack_top;
424 int line_count;
425 int line_width;
426 int data_count;
427 DWTELEM * base_buffer; ///< Buffer that this structure is caching.
428 } slice_buffer;
429
430 typedef struct SnowContext{
431 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
432
433 AVCodecContext *avctx;
434 RangeCoder c;
435 DSPContext dsp;
436 AVFrame input_picture;
437 AVFrame current_picture;
438 AVFrame last_picture;
439 AVFrame mconly_picture;
440 // uint8_t q_context[16];
441 uint8_t header_state[32];
442 uint8_t block_state[128 + 32*128];
443 int keyframe;
444 int always_reset;
445 int version;
446 int spatial_decomposition_type;
447 int temporal_decomposition_type;
448 int spatial_decomposition_count;
449 int temporal_decomposition_count;
450 DWTELEM *spatial_dwt_buffer;
451 int colorspace_type;
452 int chroma_h_shift;
453 int chroma_v_shift;
454 int spatial_scalability;
455 int qlog;
456 int lambda;
457 int lambda2;
458 int mv_scale;
459 int qbias;
460 #define QBIAS_SHIFT 3
461 int b_width;
462 int b_height;
463 int block_max_depth;
464 Plane plane[MAX_PLANES];
465 BlockNode *block;
466 slice_buffer sb;
467
468 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
469 }SnowContext;
470
471 typedef struct {
472 DWTELEM *b0;
473 DWTELEM *b1;
474 DWTELEM *b2;
475 DWTELEM *b3;
476 int y;
477 } dwt_compose_t;
478
479 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
480 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
481
482 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
483 {
484 int i;
485
486 buf->base_buffer = base_buffer;
487 buf->line_count = line_count;
488 buf->line_width = line_width;
489 buf->data_count = max_allocated_lines;
490 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
491 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
492
493 for (i = 0; i < max_allocated_lines; i++)
494 {
495 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
496 }
497
498 buf->data_stack_top = max_allocated_lines - 1;
499 }
500
501 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
502 {
503 int i;
504 int offset;
505 DWTELEM * buffer;
506
507 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
508
509 assert(buf->data_stack_top >= 0);
510 // assert(!buf->line[line]);
511 if (buf->line[line])
512 return buf->line[line];
513
514 offset = buf->line_width * line;
515 buffer = buf->data_stack[buf->data_stack_top];
516 buf->data_stack_top--;
517 buf->line[line] = buffer;
518
519 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
520
521 return buffer;
522 }
523
524 static void slice_buffer_release(slice_buffer * buf, int line)
525 {
526 int i;
527 int offset;
528 DWTELEM * buffer;
529
530 assert(line >= 0 && line < buf->line_count);
531 assert(buf->line[line]);
532
533 offset = buf->line_width * line;
534 buffer = buf->line[line];
535 buf->data_stack_top++;
536 buf->data_stack[buf->data_stack_top] = buffer;
537 buf->line[line] = NULL;
538
539 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
540 }
541
542 static void slice_buffer_flush(slice_buffer * buf)
543 {
544 int i;
545 for (i = 0; i < buf->line_count; i++)
546 {
547 if (buf->line[i])
548 {
549 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
550 slice_buffer_release(buf, i);
551 }
552 }
553 }
554
555 static void slice_buffer_destroy(slice_buffer * buf)
556 {
557 int i;
558 slice_buffer_flush(buf);
559
560 for (i = buf->data_count - 1; i >= 0; i--)
561 {
562 assert(buf->data_stack[i]);
563 av_free(buf->data_stack[i]);
564 }
565 assert(buf->data_stack);
566 av_free(buf->data_stack);
567 assert(buf->line);
568 av_free(buf->line);
569 }
570
571 #ifdef __sgi
572 // Avoid a name clash on SGI IRIX
573 #undef qexp
574 #endif
575 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
576 static uint8_t qexp[QROOT];
577
578 static inline int mirror(int v, int m){
579 if (v<0) return -v;
580 else if(v>m) return 2*m-v;
581 else return v;
582 }
583
584 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
585 int i;
586
587 if(v){
588 const int a= ABS(v);
589 const int e= av_log2(a);
590 #if 1
591 const int el= FFMIN(e, 10);
592 put_rac(c, state+0, 0);
593
594 for(i=0; i<el; i++){
595 put_rac(c, state+1+i, 1); //1..10
596 }
597 for(; i<e; i++){
598 put_rac(c, state+1+9, 1); //1..10
599 }
600 put_rac(c, state+1+FFMIN(i,9), 0);
601
602 for(i=e-1; i>=el; i--){
603 put_rac(c, state+22+9, (a>>i)&1); //22..31
604 }
605 for(; i>=0; i--){
606 put_rac(c, state+22+i, (a>>i)&1); //22..31
607 }
608
609 if(is_signed)
610 put_rac(c, state+11 + el, v < 0); //11..21
611 #else
612
613 put_rac(c, state+0, 0);
614 if(e<=9){
615 for(i=0; i<e; i++){
616 put_rac(c, state+1+i, 1); //1..10
617 }
618 put_rac(c, state+1+i, 0);
619
620 for(i=e-1; i>=0; i--){
621 put_rac(c, state+22+i, (a>>i)&1); //22..31
622 }
623
624 if(is_signed)
625 put_rac(c, state+11 + e, v < 0); //11..21
626 }else{
627 for(i=0; i<e; i++){
628 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
629 }
630 put_rac(c, state+1+FFMIN(i,9), 0);
631
632 for(i=e-1; i>=0; i--){
633 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
634 }
635
636 if(is_signed)
637 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
638 }
639 #endif
640 }else{
641 put_rac(c, state+0, 1);
642 }
643 }
644
645 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
646 if(get_rac(c, state+0))
647 return 0;
648 else{
649 int i, e, a;
650 e= 0;
651 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
652 e++;
653 }
654
655 a= 1;
656 for(i=e-1; i>=0; i--){
657 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
658 }
659
660 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
661 return -a;
662 else
663 return a;
664 }
665 }
666
667 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
668 int i;
669 int r= log2>=0 ? 1<<log2 : 1;
670
671 assert(v>=0);
672 assert(log2>=-4);
673
674 while(v >= r){
675 put_rac(c, state+4+log2, 1);
676 v -= r;
677 log2++;
678 if(log2>0) r+=r;
679 }
680 put_rac(c, state+4+log2, 0);
681
682 for(i=log2-1; i>=0; i--){
683 put_rac(c, state+31-i, (v>>i)&1);
684 }
685 }
686
687 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
688 int i;
689 int r= log2>=0 ? 1<<log2 : 1;
690 int v=0;
691
692 assert(log2>=-4);
693
694 while(get_rac(c, state+4+log2)){
695 v+= r;
696 log2++;
697 if(log2>0) r+=r;
698 }
699
700 for(i=log2-1; i>=0; i--){
701 v+= get_rac(c, state+31-i)<<i;
702 }
703
704 return v;
705 }
706
707 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
708 const int mirror_left= !highpass;
709 const int mirror_right= (width&1) ^ highpass;
710 const int w= (width>>1) - 1 + (highpass & width);
711 int i;
712
713 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
714 if(mirror_left){
715 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
716 dst += dst_step;
717 src += src_step;
718 }
719
720 for(i=0; i<w; i++){
721 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
722 }
723
724 if(mirror_right){
725 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
726 }
727 }
728
729 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
730 const int mirror_left= !highpass;
731 const int mirror_right= (width&1) ^ highpass;
732 const int w= (width>>1) - 1 + (highpass & width);
733 int i;
734
735 if(mirror_left){
736 int r= 3*2*ref[0];
737 r += r>>4;
738 r += r>>8;
739 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
740 dst += dst_step;
741 src += src_step;
742 }
743
744 for(i=0; i<w; i++){
745 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
746 r += r>>4;
747 r += r>>8;
748 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
749 }
750
751 if(mirror_right){
752 int r= 3*2*ref[w*ref_step];
753 r += r>>4;
754 r += r>>8;
755 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
756 }
757 }
758
759
760 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
761 int x, i;
762
763 for(x=start; x<width; x+=2){
764 int64_t sum=0;
765
766 for(i=0; i<n; i++){
767 int x2= x + 2*i - n + 1;
768 if (x2< 0) x2= -x2;
769 else if(x2>=width) x2= 2*width-x2-2;
770 sum += coeffs[i]*(int64_t)dst[x2];
771 }
772 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
773 else dst[x] += (sum + (1<<shift)/2)>>shift;
774 }
775 }
776
777 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
778 int x, y, i;
779 for(y=start; y<height; y+=2){
780 for(x=0; x<width; x++){
781 int64_t sum=0;
782
783 for(i=0; i<n; i++){
784 int y2= y + 2*i - n + 1;
785 if (y2< 0) y2= -y2;
786 else if(y2>=height) y2= 2*height-y2-2;
787 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
788 }
789 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
790 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
791 }
792 }
793 }
794
795 #define SCALEX 1
796 #define LX0 0
797 #define LX1 1
798
799 #if 0 // more accurate 9/7
800 #define N1 2
801 #define SHIFT1 14
802 #define COEFFS1 (int[]){-25987,-25987}
803 #define N2 2
804 #define SHIFT2 19
805 #define COEFFS2 (int[]){-27777,-27777}
806 #define N3 2
807 #define SHIFT3 15
808 #define COEFFS3 (int[]){28931,28931}
809 #define N4 2
810 #define SHIFT4 15
811 #define COEFFS4 (int[]){14533,14533}
812 #elif 1 // 13/7 CRF
813 #define N1 4
814 #define SHIFT1 4
815 #define COEFFS1 (int[]){1,-9,-9,1}
816 #define N2 4
817 #define SHIFT2 4
818 #define COEFFS2 (int[]){-1,5,5,-1}
819 #define N3 0
820 #define SHIFT3 1
821 #define COEFFS3 NULL
822 #define N4 0
823 #define SHIFT4 1
824 #define COEFFS4 NULL
825 #elif 1 // 3/5
826 #define LX0 1
827 #define LX1 0
828 #define SCALEX 0.5
829 #define N1 2
830 #define SHIFT1 1
831 #define COEFFS1 (int[]){1,1}
832 #define N2 2
833 #define SHIFT2 2
834 #define COEFFS2 (int[]){-1,-1}
835 #define N3 0
836 #define SHIFT3 0
837 #define COEFFS3 NULL
838 #define N4 0
839 #define SHIFT4 0
840 #define COEFFS4 NULL
841 #elif 1 // 11/5
842 #define N1 0
843 #define SHIFT1 1
844 #define COEFFS1 NULL
845 #define N2 2
846 #define SHIFT2 2
847 #define COEFFS2 (int[]){-1,-1}
848 #define N3 2
849 #define SHIFT3 0
850 #define COEFFS3 (int[]){-1,-1}
851 #define N4 4
852 #define SHIFT4 7
853 #define COEFFS4 (int[]){-5,29,29,-5}
854 #define SCALEX 4
855 #elif 1 // 9/7 CDF
856 #define N1 2
857 #define SHIFT1 7
858 #define COEFFS1 (int[]){-203,-203}
859 #define N2 2
860 #define SHIFT2 12
861 #define COEFFS2 (int[]){-217,-217}
862 #define N3 2
863 #define SHIFT3 7
864 #define COEFFS3 (int[]){113,113}
865 #define N4 2
866 #define SHIFT4 9
867 #define COEFFS4 (int[]){227,227}
868 #define SCALEX 1
869 #elif 1 // 7/5 CDF
870 #define N1 0
871 #define SHIFT1 1
872 #define COEFFS1 NULL
873 #define N2 2
874 #define SHIFT2 2
875 #define COEFFS2 (int[]){-1,-1}
876 #define N3 2
877 #define SHIFT3 0
878 #define COEFFS3 (int[]){-1,-1}
879 #define N4 2
880 #define SHIFT4 4
881 #define COEFFS4 (int[]){3,3}
882 #elif 1 // 9/7 MN
883 #define N1 4
884 #define SHIFT1 4
885 #define COEFFS1 (int[]){1,-9,-9,1}
886 #define N2 2
887 #define SHIFT2 2
888 #define COEFFS2 (int[]){1,1}
889 #define N3 0
890 #define SHIFT3 1
891 #define COEFFS3 NULL
892 #define N4 0
893 #define SHIFT4 1
894 #define COEFFS4 NULL
895 #else // 13/7 CRF
896 #define N1 4
897 #define SHIFT1 4
898 #define COEFFS1 (int[]){1,-9,-9,1}
899 #define N2 4
900 #define SHIFT2 4
901 #define COEFFS2 (int[]){-1,5,5,-1}
902 #define N3 0
903 #define SHIFT3 1
904 #define COEFFS3 NULL
905 #define N4 0
906 #define SHIFT4 1
907 #define COEFFS4 NULL
908 #endif
909 static void horizontal_decomposeX(DWTELEM *b, int width){
910 DWTELEM temp[width];
911 const int width2= width>>1;
912 const int w2= (width+1)>>1;
913 int A1,A2,A3,A4, x;
914
915 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
916 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
917 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
918 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
919
920 for(x=0; x<width2; x++){
921 temp[x ]= b[2*x ];
922 temp[x+w2]= b[2*x + 1];
923 }
924 if(width&1)
925 temp[x ]= b[2*x ];
926 memcpy(b, temp, width*sizeof(int));
927 }
928
929 static void horizontal_composeX(DWTELEM *b, int width){
930 DWTELEM temp[width];
931 const int width2= width>>1;
932 int A1,A2,A3,A4, x;
933 const int w2= (width+1)>>1;
934
935 memcpy(temp, b, width*sizeof(int));
936 for(x=0; x<width2; x++){
937 b[2*x ]= temp[x ];
938 b[2*x + 1]= temp[x+w2];
939 }
940 if(width&1)
941 b[2*x ]= temp[x ];
942
943 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
944 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
945 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
946 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
947 }
948
949 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
950 int x, y;
951
952 for(y=0; y<height; y++){
953 for(x=0; x<width; x++){
954 buffer[y*stride + x] *= SCALEX;
955 }
956 }
957
958 for(y=0; y<height; y++){
959 horizontal_decomposeX(buffer + y*stride, width);
960 }
961
962 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
963 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
964 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
965 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
966 }
967
968 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
969 int x, y;
970
971 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
972 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
973 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
974 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
975
976 for(y=0; y<height; y++){
977 horizontal_composeX(buffer + y*stride, width);
978 }
979
980 for(y=0; y<height; y++){
981 for(x=0; x<width; x++){
982 buffer[y*stride + x] /= SCALEX;
983 }
984 }
985 }
986
987 static void horizontal_decompose53i(DWTELEM *b, int width){
988 DWTELEM temp[width];
989 const int width2= width>>1;
990 int A1,A2,A3,A4, x;
991 const int w2= (width+1)>>1;
992
993 for(x=0; x<width2; x++){
994 temp[x ]= b[2*x ];
995 temp[x+w2]= b[2*x + 1];
996 }
997 if(width&1)
998 temp[x ]= b[2*x ];
999 #if 0
1000 A2= temp[1 ];
1001 A4= temp[0 ];
1002 A1= temp[0+width2];
1003 A1 -= (A2 + A4)>>1;
1004 A4 += (A1 + 1)>>1;
1005 b[0+width2] = A1;
1006 b[0 ] = A4;
1007 for(x=1; x+1<width2; x+=2){
1008 A3= temp[x+width2];
1009 A4= temp[x+1 ];
1010 A3 -= (A2 + A4)>>1;
1011 A2 += (A1 + A3 + 2)>>2;
1012 b[x+width2] = A3;
1013 b[x ] = A2;
1014
1015 A1= temp[x+1+width2];
1016 A2= temp[x+2 ];
1017 A1 -= (A2 + A4)>>1;
1018 A4 += (A1 + A3 + 2)>>2;
1019 b[x+1+width2] = A1;
1020 b[x+1 ] = A4;
1021 }
1022 A3= temp[width-1];
1023 A3 -= A2;
1024 A2 += (A1 + A3 + 2)>>2;
1025 b[width -1] = A3;
1026 b[width2-1] = A2;
1027 #else
1028 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1029 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1030 #endif
1031 }
1032
1033 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1034 int i;
1035
1036 for(i=0; i<width; i++){
1037 b1[i] -= (b0[i] + b2[i])>>1;
1038 }
1039 }
1040
1041 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1042 int i;
1043
1044 for(i=0; i<width; i++){
1045 b1[i] += (b0[i] + b2[i] + 2)>>2;
1046 }
1047 }
1048
1049 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1050 int y;
1051 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1052 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1053
1054 for(y=-2; y<height; y+=2){
1055 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1056 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1057
1058 {START_TIMER
1059 if(b1 <= b3) horizontal_decompose53i(b2, width);
1060 if(y+2 < height) horizontal_decompose53i(b3, width);
1061 STOP_TIMER("horizontal_decompose53i")}
1062
1063 {START_TIMER
1064 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1065 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1066 STOP_TIMER("vertical_decompose53i*")}
1067
1068 b0=b2;
1069 b1=b3;
1070 }
1071 }
1072
1073 #define lift5 lift
1074 #if 1
1075 #define W_AM 3
1076 #define W_AO 0
1077 #define W_AS 1
1078
1079 #define W_BM 1
1080 #define W_BO 8
1081 #define W_BS 4
1082
1083 #undef lift5
1084 #define W_CM 9999
1085 #define W_CO 2
1086 #define W_CS 2
1087
1088 #define W_DM 15
1089 #define W_DO 16
1090 #define W_DS 5
1091 #elif 0
1092 #define W_AM 55
1093 #define W_AO 16
1094 #define W_AS 5
1095
1096 #define W_BM 3
1097 #define W_BO 32
1098 #define W_BS 6
1099
1100 #define W_CM 127
1101 #define W_CO 64
1102 #define W_CS 7
1103
1104 #define W_DM 7
1105 #define W_DO 8
1106 #define W_DS 4
1107 #elif 0
1108 #define W_AM 97
1109 #define W_AO 32
1110 #define W_AS 6
1111
1112 #define W_BM 63
1113 #define W_BO 512
1114 #define W_BS 10
1115
1116 #define W_CM 13
1117 #define W_CO 8
1118 #define W_CS 4
1119
1120 #define W_DM 15
1121 #define W_DO 16
1122 #define W_DS 5
1123
1124 #else
1125
1126 #define W_AM 203
1127 #define W_AO 64
1128 #define W_AS 7
1129
1130 #define W_BM 217
1131 #define W_BO 2048
1132 #define W_BS 12
1133
1134 #define W_CM 113
1135 #define W_CO 64
1136 #define W_CS 7
1137
1138 #define W_DM 227
1139 #define W_DO 128
1140 #define W_DS 9
1141 #endif
1142 static void horizontal_decompose97i(DWTELEM *b, int width){
1143 DWTELEM temp[width];
1144 const int w2= (width+1)>>1;
1145
1146 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1147 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1148 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1149 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1150 }
1151
1152
1153 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1154 int i;
1155
1156 for(i=0; i<width; i++){
1157 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1158 }
1159 }
1160
1161 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1162 int i;
1163
1164 for(i=0; i<width; i++){
1165 #ifdef lift5
1166 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1167 #else
1168 int r= 3*(b0[i] + b2[i]);
1169 r+= r>>4;
1170 r+= r>>8;
1171 b1[i] += (r+W_CO)>>W_CS;
1172 #endif
1173 }
1174 }
1175
1176 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1177 int i;
1178
1179 for(i=0; i<width; i++){
1180 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1181 }
1182 }
1183
1184 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1185 int i;
1186
1187 for(i=0; i<width; i++){
1188 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1189 }
1190 }
1191
1192 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1193 int y;
1194 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1195 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1196 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1197 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1198
1199 for(y=-4; y<height; y+=2){
1200 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1201 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1202
1203 {START_TIMER
1204 if(b3 <= b5) horizontal_decompose97i(b4, width);
1205 if(y+4 < height) horizontal_decompose97i(b5, width);
1206 if(width>400){
1207 STOP_TIMER("horizontal_decompose97i")
1208 }}
1209
1210 {START_TIMER
1211 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1212 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1213 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1214 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1215
1216 if(width>400){
1217 STOP_TIMER("vertical_decompose97i")
1218 }}
1219
1220 b0=b2;
1221 b1=b3;
1222 b2=b4;
1223 b3=b5;
1224 }
1225 }
1226
1227 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1228 int level;
1229
1230 for(level=0; level<decomposition_count; level++){
1231 switch(type){
1232 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1233 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1234 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1235 }
1236 }
1237 }
1238
1239 static void horizontal_compose53i(DWTELEM *b, int width){
1240 DWTELEM temp[width];
1241 const int width2= width>>1;
1242 const int w2= (width+1)>>1;
1243 int A1,A2,A3,A4, x;
1244
1245 #if 0
1246 A2= temp[1 ];
1247 A4= temp[0 ];
1248 A1= temp[0+width2];
1249 A1 -= (A2 + A4)>>1;
1250 A4 += (A1 + 1)>>1;
1251 b[0+width2] = A1;
1252 b[0 ] = A4;
1253 for(x=1; x+1<width2; x+=2){
1254 A3= temp[x+width2];
1255 A4= temp[x+1 ];
1256 A3 -= (A2 + A4)>>1;
1257 A2 += (A1 + A3 + 2)>>2;
1258 b[x+width2] = A3;
1259 b[x ] = A2;
1260
1261 A1= temp[x+1+width2];
1262 A2= temp[x+2 ];
1263 A1 -= (A2 + A4)>>1;
1264 A4 += (A1 + A3 + 2)>>2;
1265 b[x+1+width2] = A1;
1266 b[x+1 ] = A4;
1267 }
1268 A3= temp[width-1];
1269 A3 -= A2;
1270 A2 += (A1 + A3 + 2)>>2;
1271 b[width -1] = A3;
1272 b[width2-1] = A2;
1273 #else
1274 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1275 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1276 #endif
1277 for(x=0; x<width2; x++){
1278 b[2*x ]= temp[x ];
1279 b[2*x + 1]= temp[x+w2];
1280 }
1281 if(width&1)
1282 b[2*x ]= temp[x ];
1283 }
1284
1285 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1286 int i;
1287
1288 for(i=0; i<width; i++){
1289 b1[i] += (b0[i] + b2[i])>>1;
1290 }
1291 }
1292
1293 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1294 int i;
1295
1296 for(i=0; i<width; i++){
1297 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1298 }
1299 }
1300
1301 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1302 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1303 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1304 cs->y = -1;
1305 }
1306
1307 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1308 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1309 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1310 cs->y = -1;
1311 }
1312
1313 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1314 int y= cs->y;
1315 int mirror0 = mirror(y-1, height-1);
1316 int mirror1 = mirror(y , height-1);
1317 int mirror2 = mirror(y+1, height-1);
1318 int mirror3 = mirror(y+2, height-1);
1319
1320 DWTELEM *b0= cs->b0;
1321 DWTELEM *b1= cs->b1;
1322 DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1323 DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1324
1325 {START_TIMER
1326 if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1327 if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1328 STOP_TIMER("vertical_compose53i*")}
1329
1330 {START_TIMER
1331 if(y-1 >= 0) horizontal_compose53i(b0, width);
1332 if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1333 STOP_TIMER("horizontal_compose53i")}
1334
1335 cs->b0 = b2;
1336 cs->b1 = b3;
1337 cs->y += 2;
1338 }
1339
1340 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1341 int y= cs->y;
1342 DWTELEM *b0= cs->b0;
1343 DWTELEM *b1= cs->b1;
1344 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1345 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1346
1347 {START_TIMER
1348 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1349 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1350 STOP_TIMER("vertical_compose53i*")}
1351
1352 {START_TIMER
1353 if(y-1 >= 0) horizontal_compose53i(b0, width);
1354 if(b0 <= b2) horizontal_compose53i(b1, width);
1355 STOP_TIMER("horizontal_compose53i")}
1356
1357 cs->b0 = b2;
1358 cs->b1 = b3;
1359 cs->y += 2;
1360 }
1361
1362 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1363 dwt_compose_t cs;
1364 spatial_compose53i_init(&cs, buffer, height, stride);
1365 while(cs.y <= height)
1366 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1367 }
1368
1369
1370 static void horizontal_compose97i(DWTELEM *b, int width){
1371 DWTELEM temp[width];
1372 const int w2= (width+1)>>1;
1373
1374 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1375 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1376 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1377 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1378 }
1379
1380 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1381 int i;
1382
1383 for(i=0; i<width; i++){
1384 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1385 }
1386 }
1387
1388 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1389 int i;
1390
1391 for(i=0; i<width; i++){
1392 #ifdef lift5
1393 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1394 #else
1395 int r= 3*(b0[i] + b2[i]);
1396 r+= r>>4;
1397 r+= r>>8;
1398 b1[i] -= (r+W_CO)>>W_CS;
1399 #endif
1400 }
1401 }
1402
1403 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1404 int i;
1405
1406 for(i=0; i<width; i++){
1407 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1408 }
1409 }
1410
1411 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1412 int i;
1413
1414 for(i=0; i<width; i++){
1415 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1416 }
1417 }
1418
1419 static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1420 int i;
1421
1422 for(i=0; i<width; i++){
1423 int r;
1424 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1425 #ifdef lift5
1426 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1427 #else
1428 r= 3*(b2[i] + b4[i]);
1429 r+= r>>4;
1430 r+= r>>8;
1431 b3[i] -= (r+W_CO)>>W_CS;
1432 #endif
1433 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1434 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1435 }
1436 }
1437
1438 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1439 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1440 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1441 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1442 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1443 cs->y = -3;
1444 }
1445
1446 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1447 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1448 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1449 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1450 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1451 cs->y = -3;
1452 }
1453
1454 static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1455 int y = cs->y;
1456
1457 int mirror0 = mirror(y - 1, height - 1);
1458 int mirror1 = mirror(y + 0, height - 1);
1459 int mirror2 = mirror(y + 1, height - 1);
1460 int mirror3 = mirror(y + 2, height - 1);
1461 int mirror4 = mirror(y + 3, height - 1);
1462 int mirror5 = mirror(y + 4, height - 1);
1463 DWTELEM *b0= cs->b0;
1464 DWTELEM *b1= cs->b1;
1465 DWTELEM *b2= cs->b2;
1466 DWTELEM *b3= cs->b3;
1467 DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1468 DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1469
1470 {START_TIMER
1471 if(y>0 && y+4<height){
1472 vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1473 }else{
1474 if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1475 if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1476 if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1477 if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1478 }
1479 if(width>400){
1480 STOP_TIMER("vertical_compose97i")}}
1481
1482 {START_TIMER
1483 if(y-1>= 0) horizontal_compose97i(b0, width);
1484 if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1485 if(width>400 && mirror0 <= mirror2){
1486 STOP_TIMER("horizontal_compose97i")}}
1487
1488 cs->b0=b2;
1489 cs->b1=b3;
1490 cs->b2=b4;
1491 cs->b3=b5;
1492 cs->y += 2;
1493 }
1494
1495 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1496 int y = cs->y;
1497 DWTELEM *b0= cs->b0;
1498 DWTELEM *b1= cs->b1;
1499 DWTELEM *b2= cs->b2;
1500 DWTELEM *b3= cs->b3;
1501 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1502 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1503
1504 if(stride == width && y+4 < height && 0){
1505 int x;
1506 for(x=0; x<width/2; x++)
1507 b5[x] += 64*2;
1508 for(; x<width; x++)
1509 b5[x] += 169*2;
1510 }
1511
1512 {START_TIMER
1513 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1514 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1515 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1516 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1517 if(width>400){
1518 STOP_TIMER("vertical_compose97i")}}
1519
1520 {START_TIMER
1521 if(y-1>= 0) horizontal_compose97i(b0, width);
1522 if(b0 <= b2) horizontal_compose97i(b1, width);
1523 if(width>400 && b0 <= b2){
1524 STOP_TIMER("horizontal_compose97i")}}
1525
1526 cs->b0=b2;
1527 cs->b1=b3;
1528 cs->b2=b4;
1529 cs->b3=b5;
1530 cs->y += 2;
1531 }
1532
1533 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1534 dwt_compose_t cs;
1535 spatial_compose97i_init(&cs, buffer, height, stride);
1536 while(cs.y <= height)
1537 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1538 }
1539
1540 void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1541 int level;
1542 for(level=decomposition_count-1; level>=0; level--){
1543 switch(type){
1544 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1545 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1546 /* not slicified yet */
1547 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1548 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1549 }
1550 }
1551 }
1552
1553 void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1554 int level;
1555 for(level=decomposition_count-1; level>=0; level--){
1556 switch(type){
1557 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1558 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1559 /* not slicified yet */
1560 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1561 }
1562 }
1563 }
1564
1565 void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1566 const int support = type==1 ? 3 : 5;
1567 int level;
1568 if(type==2) return;
1569
1570 for(level=decomposition_count-1; level>=0; level--){
1571 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1572 switch(type){
1573 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1574 break;
1575 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1576 break;
1577 case 2: break;
1578 }
1579 }
1580 }
1581 }
1582
1583 void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1584 const int support = type==1 ? 3 : 5;
1585 int level;
1586 if(type==2) return;
1587
1588 for(level=decomposition_count-1; level>=0; level--){
1589 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1590 switch(type){
1591 case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1592 break;
1593 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1594 break;
1595 case 2: break;
1596 }
1597 }
1598 }
1599 }
1600
1601 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1602 if(type==2){
1603 int level;
1604 for(level=decomposition_count-1; level>=0; level--)
1605 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1606 }else{
1607 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1608 int y;
1609 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1610 for(y=0; y<height; y+=4)
1611 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1612 }
1613 }
1614
1615 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1616 const int w= b->width;
1617 const int h= b->height;
1618 int x, y;
1619
1620 if(1){
1621 int run=0;
1622 int runs[w*h];
1623 int run_index=0;
1624
1625 for(y=0; y<h; y++){
1626 for(x=0; x<w; x++){
1627 int v, p=0;
1628 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1629 v= src[x + y*stride];
1630
1631 if(y){
1632 t= src[x + (y-1)*stride];
1633 if(x){
1634 lt= src[x - 1 + (y-1)*stride];
1635 }
1636 if(x + 1 < w){
1637 rt= src[x + 1 + (y-1)*stride];
1638 }
1639 }
1640 if(x){
1641 l= src[x - 1 + y*stride];
1642 /*if(x > 1){
1643 if(orientation==1) ll= src[y + (x-2)*stride];
1644 else ll= src[x - 2 + y*stride];
1645 }*/
1646 }
1647 if(parent){
1648 int px= x>>1;
1649 int py= y>>1;
1650 if(px<b->parent->width && py<b->parent->height)
1651 p= parent[px + py*2*stride];
1652 }
1653 if(!(/*ll|*/l|lt|t|rt|p)){
1654 if(v){
1655 runs[run_index++]= run;
1656 run=0;
1657 }else{
1658 run++;
1659 }
1660 }
1661 }
1662 }
1663 runs[run_index++]= run;
1664 run_index=0;
1665 run= runs[run_index++];
1666
1667 put_symbol2(&s->c, b->state[1], run, 3);
1668
1669 for(y=0; y<h; y++){
1670 if(s->c.bytestream_end - s->c.bytestream < w*40){
1671 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1672 return -1;
1673 }
1674 for(x=0; x<w; x++){
1675 int v, p=0;
1676 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1677 v= src[x + y*stride];
1678
1679 if(y){
1680 t= src[x + (y-1)*stride];
1681 if(x){
1682 lt= src[x - 1 + (y-1)*stride];
1683 }
1684 if(x + 1 < w){
1685 rt= src[x + 1 + (y-1)*stride];
1686 }
1687 }
1688 if(x){
1689 l= src[x - 1 + y*stride];
1690 /*if(x > 1){
1691 if(orientation==1) ll= src[y + (x-2)*stride];
1692 else ll= src[x - 2 + y*stride];
1693 }*/
1694 }
1695 if(parent){
1696 int px= x>>1;
1697 int py= y>>1;
1698 if(px<b->parent->width && py<b->parent->height)
1699 p= parent[px + py*2*stride];
1700 }
1701 if(/*ll|*/l|lt|t|rt|p){
1702 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1703
1704 put_rac(&s->c, &b->state[0][context], !!v);
1705 }else{
1706 if(!run){
1707 run= runs[run_index++];
1708
1709 put_symbol2(&s->c, b->state[1], run, 3);
1710 assert(v);
1711 }else{
1712 run--;
1713 assert(!v);
1714 }
1715 }
1716 if(v){
1717 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1718 int l2= 2*ABS(l) + (l<0);
1719 int t2= 2*ABS(t) + (t<0);
1720
1721 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1722 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1723 }
1724 }
1725 }
1726 }
1727 return 0;
1728 }
1729
1730 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1731 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1732 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1733 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1734 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1735 }
1736
1737 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1738 const int w= b->width;
1739 const int h= b->height;
1740 int x,y;
1741
1742 if(1){
1743 int run;
1744 int index=0;
1745 int prev_index=-1;
1746 int prev2_index=0;
1747 int parent_index= 0;
1748 int prev_parent_index= 0;
1749
1750 run= get_symbol2(&s->c, b->state[1], 3);
1751 for(y=0; y<h; y++){
1752 int v=0;
1753 int lt=0, t=0, rt=0;
1754
1755 if(y && b->x_coeff[prev_index].x == 0){
1756 rt= b->x_coeff[prev_index].coeff;
1757 }
1758 for(x=0; x<w; x++){
1759 int p=0;
1760 const int l= v;
1761
1762 lt= t; t= rt;
1763
1764 if(y){
1765 if(b->x_coeff[prev_index].x <= x)
1766 prev_index++;
1767 if(b->x_coeff[prev_index].x == x + 1)
1768 rt= b->x_coeff[prev_index].coeff;
1769 else
1770 rt=0;
1771 }
1772 if(parent){
1773 if(x>>1 > parent->x_coeff[parent_index].x){
1774 parent_index++;
1775 }
1776 if(x>>1 == parent->x_coeff[parent_index].x){
1777 p= parent->x_coeff[parent_index].coeff;
1778 }
1779 }
1780 if(/*ll|*/l|lt|t|rt|p){
1781 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1782
1783 v=get_rac(&s->c, &b->state[0][context]);
1784 }else{
1785 if(!run){
1786 run= get_symbol2(&s->c, b->state[1], 3);
1787 v=1;
1788 }else{
1789 run--;
1790 v=0;
1791
1792 if(y && parent){
1793 int max_run;
1794
1795 max_run= FFMIN(run, b->x_coeff[prev_index].x - x - 2);
1796 max_run= FFMIN(max_run, 2*parent->x_coeff[parent_index].x - x - 1);
1797 x+= max_run;
1798 run-= max_run;
1799 }
1800 }
1801 }
1802 if(v){
1803 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1804 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1805 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1806
1807 b->x_coeff[index].x=x;
1808 b->x_coeff[index++].coeff= v;
1809 }
1810 }
1811 b->x_coeff[index++].x= w+1; //end marker
1812 prev_index= prev2_index;
1813 prev2_index= index;
1814
1815 if(parent){
1816 if(y&1){
1817 while(parent->x_coeff[parent_index].x != parent->width+1)
1818 parent_index++;
1819 parent_index++;
1820 prev_parent_index= parent_index;
1821 }else{
1822 parent_index= prev_parent_index;
1823 }
1824 }
1825 }
1826
1827 b->x_coeff[index++].x= w+1; //end marker
1828 }
1829 }
1830
1831 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1832 const int w= b->width;
1833 int x,y;
1834 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1835 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1836 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1837 int new_index = 0;
1838
1839 START_TIMER
1840
1841 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1842 qadd= 0;
1843 qmul= 1<<QEXPSHIFT;
1844 }
1845
1846 /* If we are on the second or later slice, restore our index. */
1847 if (start_y != 0)
1848 new_index = save_state[0];
1849
1850
1851 for(y=start_y; y<h; y++){
1852 int x = 0;
1853 int v;
1854 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1855 memset(line, 0, b->width*sizeof(DWTELEM));
1856 v = b->x_coeff[new_index].coeff;
1857 x = b->x_coeff[new_index++].x;
1858 while(x < w)
1859 {
1860 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1861 register int u= -(v&1);
1862 line[x] = (t^u) - u;
1863
1864 v = b->x_coeff[new_index].coeff;
1865 x = b->x_coeff[new_index++].x;
1866 }
1867 }
1868 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1869 STOP_TIMER("decode_subband")
1870 }
1871
1872 /* Save our variables for the next slice. */
1873 save_state[0] = new_index;
1874
1875 return;
1876 }
1877
1878 static void reset_contexts(SnowContext *s){
1879 int plane_index, level, orientation;
1880
1881 for(plane_index=0; plane_index<3; plane_index++){
1882 for(level=0; level<s->spatial_decomposition_count; level++){
1883 for(orientation=level ? 1:0; orientation<4; orientation++){
1884 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1885 }
1886 }
1887 }
1888 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1889 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1890 }
1891
1892 static int alloc_blocks(SnowContext *s){
1893 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1894 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1895
1896 s->b_width = w;
1897 s->b_height= h;
1898
1899 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1900 return 0;
1901 }
1902
1903 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1904 uint8_t *bytestream= d->bytestream;
1905 uint8_t *bytestream_start= d->bytestream_start;
1906 *d= *s;
1907 d->bytestream= bytestream;
1908 d->bytestream_start= bytestream_start;
1909 }
1910
1911 //near copy & paste from dsputil, FIXME
1912 static int pix_sum(uint8_t * pix, int line_size, int w)
1913 {
1914 int s, i, j;
1915
1916 s = 0;
1917 for (i = 0; i < w; i++) {
1918 for (j = 0; j < w; j++) {
1919 s += pix[0];
1920 pix ++;
1921 }
1922 pix += line_size - w;
1923 }
1924 return s;
1925 }
1926
1927 //near copy & paste from dsputil, FIXME
1928 static int pix_norm1(uint8_t * pix, int line_size, int w)
1929 {
1930 int s, i, j;
1931 uint32_t *sq = squareTbl + 256;
1932
1933 s = 0;
1934 for (i = 0; i < w; i++) {
1935 for (j = 0; j < w; j ++) {
1936 s += sq[pix[0]];
1937 pix ++;
1938 }
1939 pix += line_size - w;
1940 }
1941 return s;
1942 }
1943
1944 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1945 const int w= s->b_width << s->block_max_depth;
1946 const int rem_depth= s->block_max_depth - level;
1947 const int index= (x + y*w) << rem_depth;
1948 const int block_w= 1<<rem_depth;
1949 BlockNode block;
1950 int i,j;
1951
1952 block.color[0]= l;
1953 block.color[1]= cb;
1954 block.color[2]= cr;
1955 block.mx= mx;
1956 block.my= my;
1957 block.type= type;
1958 block.level= level;
1959
1960 for(j=0; j<block_w; j++){
1961 for(i=0; i<block_w; i++){
1962 s->block[index + i + j*w]= block;
1963 }
1964 }
1965 }
1966
1967 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1968 const int offset[3]= {
1969 y*c-> stride + x,
1970 ((y*c->uvstride + x)>>1),
1971 ((y*c->uvstride + x)>>1),
1972 };
1973 int i;
1974 for(i=0; i<3; i++){
1975 c->src[0][i]= src [i];
1976 c->ref[0][i]= ref [i] + offset[i];
1977 }
1978 assert(!ref_index);
1979 }
1980
1981 //FIXME copy&paste
1982 #define P_LEFT P[1]
1983 #define P_TOP P[2]
1984 #define P_TOPRIGHT P[3]
1985 #define P_MEDIAN P[4]
1986 #define P_MV1 P[9]
1987 #define FLAG_QPEL 1 //must be 1
1988
1989 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1990 uint8_t p_buffer[1024];
1991 uint8_t i_buffer[1024];
1992 uint8_t p_state[sizeof(s->block_state)];
1993 uint8_t i_state[sizeof(s->block_state)];
1994 RangeCoder pc, ic;
1995 uint8_t *pbbak= s->c.bytestream;
1996 uint8_t *pbbak_start= s->c.bytestream_start;
1997 int score, score2, iscore, i_len, p_len, block_s, sum;
1998 const int w= s->b_width << s->block_max_depth;
1999 const int h= s->b_height << s->block_max_depth;
2000 const int rem_depth= s->block_max_depth - level;
2001 const int index= (x + y*w) << rem_depth;
2002 const int block_w= 1<<(LOG2_MB_SIZE - level);
2003 static BlockNode null_block= { //FIXME add border maybe
2004 .color= {128,128,128},
2005 .mx= 0,
2006 .my= 0,
2007 .type= 0,
2008 .level= 0,
2009 };
2010 int trx= (x+1)<<rem_depth;
2011 int try= (y+1)<<rem_depth;
2012 BlockNode *left = x ? &s->block[index-1] : &null_block;
2013 BlockNode *top = y ? &s->block[index-w] : &null_block;
2014 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2015 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2016 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2017 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2018 int pl = left->color[0];
2019 int pcb= left->color[1];
2020 int pcr= left->color[2];
2021 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2022 int pmy= mid_pred(left->my, top->my, tr->my);
2023 int mx=0, my=0;
2024 int l,cr,cb, i;
2025 const int stride= s->current_picture.linesize[0];
2026 const int uvstride= s->current_picture.linesize[1];
2027 const int instride= s->input_picture.linesize[0];
2028 const int uvinstride= s->input_picture.linesize[1];
2029 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
2030 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2031 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2032 uint8_t current_mb[3][stride*block_w];
2033 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2034 int P[10][2];
2035 int16_t last_mv[3][2];
2036 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2037 const int shift= 1+qpel;
2038 MotionEstContext *c= &s->m.me;
2039 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2040 int my_context= av_log2(2*ABS(left->my - top->my));
2041 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2042
2043 assert(sizeof(s->block_state) >= 256);
2044 if(s->keyframe){
2045 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2046 return 0;
2047 }
2048
2049 //FIXME optimize
2050 for(i=0; i<block_w; i++)
2051 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
2052 for(i=0; i<block_w>>1; i++)
2053 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2054 for(i=0; i<block_w>>1; i++)
2055 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2056
2057 // clip predictors / edge ?
2058
2059 P_LEFT[0]= left->mx;
2060 P_LEFT[1]= left->my;
2061 P_TOP [0]= top->mx;
2062 P_TOP [1]= top->my;
2063 P_TOPRIGHT[0]= tr->mx;
2064 P_TOPRIGHT[1]= tr->my;
2065
2066 last_mv[0][0]= s->block[index].mx;
2067 last_mv[0][1]= s->block[index].my;
2068 last_mv[1][0]= right->mx;
2069 last_mv[1][1]= right->my;
2070 last_mv[2][0]= bottom->mx;
2071 last_mv[2][1]= bottom->my;
2072
2073 s->m.mb_stride=2;
2074 s->m.mb_x=
2075 s->m.mb_y= 0;
2076 s->m.me.skip= 0;
2077
2078 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2079
2080 assert(s->m.me. stride == stride);
2081 assert(s->m.me.uvstride == uvstride);
2082
2083 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2084 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2085 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2086 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2087
2088 c->xmin = - x*block_w - 16+2;
2089 c->ymin = - y*block_w - 16+2;
2090 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2091 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2092
2093 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2094 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2095 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2096 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2097 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2098 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2099 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2100
2101 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2102 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2103
2104 if (!y) {
2105 c->pred_x= P_LEFT[0];
2106 c->pred_y= P_LEFT[1];
2107 } else {
2108 c->pred_x = P_MEDIAN[0];
2109 c->pred_y = P_MEDIAN[1];
2110 }
2111
2112 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2113 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2114
2115 assert(mx >= c->xmin);
2116 assert(mx <= c->xmax);
2117 assert(my >= c->ymin);
2118 assert(my <= c->ymax);
2119
2120 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2121 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2122 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2123
2124 // subpel search
2125 pc= s->c;
2126 pc.bytestream_start=
2127 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2128 memcpy(p_state, s->block_state, sizeof(s->block_state));
2129
2130 if(level!=s->block_max_depth)
2131 put_rac(&pc, &p_state[4 + s_context], 1);
2132 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2133 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2134 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2135 p_len= pc.bytestream - pc.bytestream_start;
2136 score += (s->lambda2*(p_len*8
2137 + (pc.outstanding_count - s->c.outstanding_count)*8
2138 + (-av_log2(pc.range) + av_log2(s->c.range))
2139 ))>>FF_LAMBDA_SHIFT;
2140
2141 block_s= block_w*block_w;
2142 sum = pix_sum(&current_mb[0][0], stride, block_w);
2143 l= (sum + block_s/2)/block_s;
2144 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2145
2146 block_s= block_w*block_w>>2;
2147 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2148 cb= (sum + block_s/2)/block_s;
2149 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2150 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2151 cr= (sum + block_s/2)/block_s;
2152 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2153
2154 ic= s->c;
2155 ic.bytestream_start=
2156 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2157 memcpy(i_state, s->block_state, sizeof(s->block_state));
2158 if(level!=s->block_max_depth)
2159 put_rac(&ic, &i_state[4 + s_context], 1);
2160 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2161 put_symbol(&ic, &i_state[32], l-pl , 1);
2162 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2163 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2164 i_len= ic.bytestream - ic.bytestream_start;
2165 iscore += (s->lambda2*(i_len*8
2166 + (ic.outstanding_count - s->c.outstanding_count)*8
2167 + (-av_log2(ic.range) + av_log2(s->c.range))
2168 ))>>FF_LAMBDA_SHIFT;
2169
2170 // assert(score==256*256*256*64-1);
2171 assert(iscore < 255*255*256 + s->lambda2*10);
2172 assert(iscore >= 0);
2173 assert(l>=0 && l<=255);
2174 assert(pl>=0 && pl<=255);
2175
2176 if(level==0){
2177 int varc= iscore >> 8;
2178 int vard= score >> 8;
2179 if (vard <= 64 || vard < varc)
2180 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2181 else
2182 c->scene_change_score+= s->m.qscale;
2183 }
2184
2185 if(level!=s->block_max_depth){
2186 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2187 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2188 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2189 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2190 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2191 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2192
2193 if(score2 < score && score2 < iscore)
2194 return score2;
2195 }
2196
2197 if(iscore < score){
2198 memcpy(pbbak, i_buffer, i_len);
2199 s->c= ic;
2200 s->c.bytestream_start= pbbak_start;
2201 s->c.bytestream= pbbak + i_len;
2202 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2203 memcpy(s->block_state, i_state, sizeof(s->block_state));
2204 return iscore;
2205 }else{
2206 memcpy(pbbak, p_buffer, p_len);
2207 s->c= pc;
2208 s->c.bytestream_start= pbbak_start;
2209 s->c.bytestream= pbbak + p_len;
2210 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2211 memcpy(s->block_state, p_state, sizeof(s->block_state));
2212 return score;
2213 }
2214 }
2215
2216 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2217 const int w= s->b_width << s->block_max_depth;
2218 const int rem_depth= s->block_max_depth - level;
2219 const int index= (x + y*w) << rem_depth;
2220 static BlockNode null_block= { //FIXME add border maybe
2221 .color= {128,128,128},
2222 .mx= 0,
2223 .my= 0,
2224 .type= 0,
2225 .level= 0,
2226 };
2227 int trx= (x+1)<<rem_depth;
2228 BlockNode *left = x ? &s->block[index-1] : &null_block;
2229 BlockNode *top = y ? &s->block[index-w] : &null_block;
2230 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2231 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2232 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2233
2234 if(s->keyframe){
2235 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2236 return;
2237 }
2238
2239 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2240 int type;
2241 int l = left->color[0];
2242 int cb= left->color[1];
2243 int cr= left->color[2];
2244 int mx= mid_pred(left->mx, top->mx, tr->mx);
2245 int my= mid_pred(left->my, top->my, tr->my);
2246 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2247 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2248
2249 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2250
2251 if(type){
2252 l += get_symbol(&s->c, &s->block_state[32], 1);
2253 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2254 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2255 }else{
2256 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2257 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2258 }
2259 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2260 }else{
2261 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2262 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2263 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2264 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2265 }
2266 }
2267
2268 static void encode_blocks(SnowContext *s){
2269 int x, y;
2270 int w= s->b_width;
2271 int h= s->b_height;
2272
2273 for(y=0; y<h; y++){
2274 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2275 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2276 return;
2277 }
2278 for(x=0; x<w; x++){
2279 encode_q_branch(s, 0, x, y);
2280 }
2281 }
2282 }
2283
2284 static void decode_blocks(SnowContext *s){
2285 int x, y;
2286 int w= s->b_width;
2287 int h= s->b_height;
2288
2289 for(y=0; y<h; y++){
2290 for(x=0; x<w; x++){
2291 decode_q_branch(s, 0, x, y);
2292 }
2293 }
2294 }
2295
2296 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2297 int x, y;
2298 START_TIMER
2299 for(y=0; y < b_h+5; y++){
2300 for(x=0; x < b_w; x++){
2301 int a0= src[x ];
2302 int a1= src[x + 1];
2303 int a2= src[x + 2];
2304 int a3= src[x + 3];
2305 int a4= src[x + 4];
2306 int a5= src[x + 5];
2307 // int am= 9*(a1+a2) - (a0+a3);
2308 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2309 // int am= 18*(a2+a3) - 2*(a1+a4);
2310 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2311 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2312
2313 // if(b_w==16) am= 8*(a1+a2);
2314
2315 if(dx<8) tmp[x]= (32*a2*( 8-dx) + am* dx + 128)>>8;
2316 else tmp[x]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2317
2318 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2319 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2320 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2321 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2322 }
2323 tmp += stride;
2324 src += stride;
2325 }
2326 tmp -= (b_h+5)*stride;
2327
2328 for(y=0; y < b_h; y++){
2329 for(x=0; x < b_w; x++){
2330 int a0= tmp[x + 0*stride];
2331 int a1= tmp[x + 1*stride];
2332 int a2= tmp[x + 2*stride];
2333 int a3= tmp[x + 3*stride];
2334 int a4= tmp[x + 4*stride];
2335 int a5= tmp[x + 5*stride];
2336 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2337 // int am= 18*(a2+a3) - 2*(a1+a4);
2338 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2339 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/