20% faster predict_plane()
[libav.git] / libavcodec / snow.c
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 #include "avcodec.h"
20 #include "common.h"
21 #include "dsputil.h"
22 #include "cabac.h"
23
24 #include "mpegvideo.h"
25
26 #undef NDEBUG
27 #include <assert.h>
28
29 #define MAX_DECOMPOSITIONS 8
30 #define MAX_PLANES 4
31 #define DWTELEM int
32 #define QROOT 8
33 #define LOSSLESS_QLOG -128
34
35 static const int8_t quant3[256]={
36 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52 };
53 static const int8_t quant3b[256]={
54 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 };
71 static const int8_t quant5[256]={
72 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
88 };
89 static const int8_t quant7[256]={
90 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
93 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
106 };
107 static const int8_t quant9[256]={
108 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
123 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
124 };
125 static const int8_t quant11[256]={
126 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
142 };
143 static const int8_t quant13[256]={
144 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
145 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
160 };
161
162 #define OBMC_MAX 64
163 #if 0 //64*cubic
164 static const uint8_t obmc32[1024]={
165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
167 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
168 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
169 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
170 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
171 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
172 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
173 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
174 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
175 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
176 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
177 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
178 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
179 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
180 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
182 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
183 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
184 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
185 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
186 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
190 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
191 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
194 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
195 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 //error:0.000022
198 };
199 static const uint8_t obmc16[256]={
200 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
201 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
202 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
203 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
204 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
205 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
206 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
207 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
209 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
210 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
211 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
212 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
213 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216 //error:0.000033
217 };
218 #elif 1 // 64*linear
219 static const uint8_t obmc32[1024]={
220 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
221 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
222 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
223 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
224 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
225 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
226 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
227 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
228 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
229 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
230 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
231 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
232 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
233 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
234 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
235 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
237 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
238 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
239 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
240 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
241 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
242 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
243 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
244 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
245 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
246 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
249 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
250 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
251 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
252 //error:0.000020
253 };
254 static const uint8_t obmc16[256]={
255 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
256 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
257 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
258 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
259 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
260 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
261 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
262 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
264 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
265 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
266 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
267 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
268 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
269 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
270 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
271 //error:0.000015
272 };
273 #else //64*cos
274 static const uint8_t obmc32[1024]={
275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
277 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
278 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
279 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
280 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
281 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
282 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
283 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
284 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
285 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
286 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
287 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
288 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
289 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
290 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
292 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
293 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
294 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
295 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
296 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
300 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
301 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
304 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
305 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
306 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307 //error:0.000022
308 };
309 static const uint8_t obmc16[256]={
310 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
311 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
312 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
313 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
314 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
315 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
316 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
317 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
319 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
320 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
321 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
322 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
323 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326 //error:0.000022
327 };
328 #endif
329
330 //linear *64
331 static const uint8_t obmc8[64]={
332 1, 3, 5, 7, 7, 5, 3, 1,
333 3, 9,15,21,21,15, 9, 3,
334 5,15,25,35,35,25,15, 5,
335 7,21,35,49,49,35,21, 7,
336 7,21,35,49,49,35,21, 7,
337 5,15,25,35,35,25,15, 5,
338 3, 9,15,21,21,15, 9, 3,
339 1, 3, 5, 7, 7, 5, 3, 1,
340 //error:0.000000
341 };
342
343 //linear *64
344 static const uint8_t obmc4[16]={
345 4,12,12, 4,
346 12,36,36,12,
347 12,36,36,12,
348 4,12,12, 4,
349 //error:0.000000
350 };
351
352 static const uint8_t *obmc_tab[4]={
353 obmc32, obmc16, obmc8, obmc4
354 };
355
356 typedef struct BlockNode{
357 int16_t mx;
358 int16_t my;
359 uint8_t color[3];
360 uint8_t type;
361 //#define TYPE_SPLIT 1
362 #define BLOCK_INTRA 1
363 //#define TYPE_NOCOLOR 4
364 uint8_t level; //FIXME merge into type?
365 }BlockNode;
366
367 #define LOG2_MB_SIZE 4
368 #define MB_SIZE (1<<LOG2_MB_SIZE)
369
370 typedef struct SubBand{
371 int level;
372 int stride;
373 int width;
374 int height;
375 int qlog; ///< log(qscale)/log[2^(1/6)]
376 DWTELEM *buf;
377 int16_t *x;
378 DWTELEM *coeff;
379 struct SubBand *parent;
380 uint8_t state[/*7*2*/ 7 + 512][32];
381 }SubBand;
382
383 typedef struct Plane{
384 int width;
385 int height;
386 SubBand band[MAX_DECOMPOSITIONS][4];
387 }Plane;
388
389 typedef struct SnowContext{
390 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
391
392 AVCodecContext *avctx;
393 CABACContext c;
394 DSPContext dsp;
395 AVFrame input_picture;
396 AVFrame current_picture;
397 AVFrame last_picture;
398 AVFrame mconly_picture;
399 // uint8_t q_context[16];
400 uint8_t header_state[32];
401 uint8_t block_state[128 + 32*128];
402 int keyframe;
403 int always_reset;
404 int version;
405 int spatial_decomposition_type;
406 int temporal_decomposition_type;
407 int spatial_decomposition_count;
408 int temporal_decomposition_count;
409 DWTELEM *spatial_dwt_buffer;
410 DWTELEM *pred_buffer;
411 int colorspace_type;
412 int chroma_h_shift;
413 int chroma_v_shift;
414 int spatial_scalability;
415 int qlog;
416 int lambda;
417 int lambda2;
418 int mv_scale;
419 int qbias;
420 #define QBIAS_SHIFT 3
421 int b_width;
422 int b_height;
423 int block_max_depth;
424 Plane plane[MAX_PLANES];
425 BlockNode *block;
426
427 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
428 }SnowContext;
429
430 #define QEXPSHIFT 7 //FIXME try to change this to 0
431 static const uint8_t qexp[8]={
432 128, 140, 152, 166, 181, 197, 215, 235
433 // 64, 70, 76, 83, 91, 99, 108, 117
434 // 32, 35, 38, 41, 45, 49, 54, 59
435 // 16, 17, 19, 21, 23, 25, 27, 29
436 // 8, 9, 10, 10, 11, 12, 13, 15
437 };
438
439 static inline int mirror(int v, int m){
440 if (v<0) return -v;
441 else if(v>m) return 2*m-v;
442 else return v;
443 }
444
445 static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
446 int i;
447
448 if(v){
449 const int a= ABS(v);
450 const int e= av_log2(a);
451 #if 1
452 const int el= FFMIN(e, 10);
453 put_cabac(c, state+0, 0);
454
455 for(i=0; i<el; i++){
456 put_cabac(c, state+1+i, 1); //1..10
457 }
458 for(; i<e; i++){
459 put_cabac(c, state+1+9, 1); //1..10
460 }
461 put_cabac(c, state+1+FFMIN(i,9), 0);
462
463 for(i=e-1; i>=el; i--){
464 put_cabac(c, state+22+9, (a>>i)&1); //22..31
465 }
466 for(; i>=0; i--){
467 put_cabac(c, state+22+i, (a>>i)&1); //22..31
468 }
469
470 if(is_signed)
471 put_cabac(c, state+11 + el, v < 0); //11..21
472 #else
473
474 put_cabac(c, state+0, 0);
475 if(e<=9){
476 for(i=0; i<e; i++){
477 put_cabac(c, state+1+i, 1); //1..10
478 }
479 put_cabac(c, state+1+i, 0);
480
481 for(i=e-1; i>=0; i--){
482 put_cabac(c, state+22+i, (a>>i)&1); //22..31
483 }
484
485 if(is_signed)
486 put_cabac(c, state+11 + e, v < 0); //11..21
487 }else{
488 for(i=0; i<e; i++){
489 put_cabac(c, state+1+FFMIN(i,9), 1); //1..10
490 }
491 put_cabac(c, state+1+FFMIN(i,9), 0);
492
493 for(i=e-1; i>=0; i--){
494 put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
495 }
496
497 if(is_signed)
498 put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
499 }
500 #endif
501 }else{
502 put_cabac(c, state+0, 1);
503 }
504 }
505
506 static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
507 if(get_cabac(c, state+0))
508 return 0;
509 else{
510 int i, e, a, el;
511 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
512 for(e=0; e<10; e++){
513 if(get_cabac(c, state + 1 + e)==0) // 1..10
514 break;
515 }
516 el= e;
517
518 if(e==10){
519 while(get_cabac(c, state + 1 + 9)) //10
520 e++;
521 }
522 a= 1;
523 for(i=e-1; i>=el; i--){
524 a += a + get_cabac(c, state+22+9); //31
525 }
526 for(; i>=0; i--){
527 a += a + get_cabac(c, state+22+i); //22..31
528 }
529
530 if(is_signed && get_cabac(c, state+11 + el)) //11..21
531 return -a;
532 else
533 return a;
534 }
535 }
536
537 static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
538 int i;
539 int r= log2>=0 ? 1<<log2 : 1;
540
541 assert(v>=0);
542 assert(log2>=-4);
543
544 while(v >= r){
545 put_cabac(c, state+4+log2, 1);
546 v -= r;
547 log2++;
548 if(log2>0) r+=r;
549 }
550 put_cabac(c, state+4+log2, 0);
551
552 for(i=log2-1; i>=0; i--){
553 put_cabac(c, state+31-i, (v>>i)&1);
554 }
555 }
556
557 static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
558 int i;
559 int r= log2>=0 ? 1<<log2 : 1;
560 int v=0;
561
562 assert(log2>=-4);
563
564 while(get_cabac(c, state+4+log2)){
565 v+= r;
566 log2++;
567 if(log2>0) r+=r;
568 }
569
570 for(i=log2-1; i>=0; i--){
571 v+= get_cabac(c, state+31-i)<<i;
572 }
573
574 return v;
575 }
576
577 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
578 const int mirror_left= !highpass;
579 const int mirror_right= (width&1) ^ highpass;
580 const int w= (width>>1) - 1 + (highpass & width);
581 int i;
582
583 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
584 if(mirror_left){
585 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
586 dst += dst_step;
587 src += src_step;
588 }
589
590 for(i=0; i<w; i++){
591 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
592 }
593
594 if(mirror_right){
595 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
596 }
597 }
598
599 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
600 const int mirror_left= !highpass;
601 const int mirror_right= (width&1) ^ highpass;
602 const int w= (width>>1) - 1 + (highpass & width);
603 int i;
604
605 if(mirror_left){
606 int r= 3*2*ref[0];
607 r += r>>4;
608 r += r>>8;
609 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
610 dst += dst_step;
611 src += src_step;
612 }
613
614 for(i=0; i<w; i++){
615 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
616 r += r>>4;
617 r += r>>8;
618 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
619 }
620
621 if(mirror_right){
622 int r= 3*2*ref[w*ref_step];
623 r += r>>4;
624 r += r>>8;
625 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
626 }
627 }
628
629
630 static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
631 int x, i;
632
633 for(x=start; x<width; x+=2){
634 int64_t sum=0;
635
636 for(i=0; i<n; i++){
637 int x2= x + 2*i - n + 1;
638 if (x2< 0) x2= -x2;
639 else if(x2>=width) x2= 2*width-x2-2;
640 sum += coeffs[i]*(int64_t)dst[x2];
641 }
642 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
643 else dst[x] += (sum + (1<<shift)/2)>>shift;
644 }
645 }
646
647 static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
648 int x, y, i;
649 for(y=start; y<height; y+=2){
650 for(x=0; x<width; x++){
651 int64_t sum=0;
652
653 for(i=0; i<n; i++){
654 int y2= y + 2*i - n + 1;
655 if (y2< 0) y2= -y2;
656 else if(y2>=height) y2= 2*height-y2-2;
657 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
658 }
659 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
660 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
661 }
662 }
663 }
664
665 #define SCALEX 1
666 #define LX0 0
667 #define LX1 1
668
669 #if 0 // more accurate 9/7
670 #define N1 2
671 #define SHIFT1 14
672 #define COEFFS1 (int[]){-25987,-25987}
673 #define N2 2
674 #define SHIFT2 19
675 #define COEFFS2 (int[]){-27777,-27777}
676 #define N3 2
677 #define SHIFT3 15
678 #define COEFFS3 (int[]){28931,28931}
679 #define N4 2
680 #define SHIFT4 15
681 #define COEFFS4 (int[]){14533,14533}
682 #elif 1 // 13/7 CRF
683 #define N1 4
684 #define SHIFT1 4
685 #define COEFFS1 (int[]){1,-9,-9,1}
686 #define N2 4
687 #define SHIFT2 4
688 #define COEFFS2 (int[]){-1,5,5,-1}
689 #define N3 0
690 #define SHIFT3 1
691 #define COEFFS3 NULL
692 #define N4 0
693 #define SHIFT4 1
694 #define COEFFS4 NULL
695 #elif 1 // 3/5
696 #define LX0 1
697 #define LX1 0
698 #define SCALEX 0.5
699 #define N1 2
700 #define SHIFT1 1
701 #define COEFFS1 (int[]){1,1}
702 #define N2 2
703 #define SHIFT2 2
704 #define COEFFS2 (int[]){-1,-1}
705 #define N3 0
706 #define SHIFT3 0
707 #define COEFFS3 NULL
708 #define N4 0
709 #define SHIFT4 0
710 #define COEFFS4 NULL
711 #elif 1 // 11/5
712 #define N1 0
713 #define SHIFT1 1
714 #define COEFFS1 NULL
715 #define N2 2
716 #define SHIFT2 2
717 #define COEFFS2 (int[]){-1,-1}
718 #define N3 2
719 #define SHIFT3 0
720 #define COEFFS3 (int[]){-1,-1}
721 #define N4 4
722 #define SHIFT4 7
723 #define COEFFS4 (int[]){-5,29,29,-5}
724 #define SCALEX 4
725 #elif 1 // 9/7 CDF
726 #define N1 2
727 #define SHIFT1 7
728 #define COEFFS1 (int[]){-203,-203}
729 #define N2 2
730 #define SHIFT2 12
731 #define COEFFS2 (int[]){-217,-217}
732 #define N3 2
733 #define SHIFT3 7
734 #define COEFFS3 (int[]){113,113}
735 #define N4 2
736 #define SHIFT4 9
737 #define COEFFS4 (int[]){227,227}
738 #define SCALEX 1
739 #elif 1 // 7/5 CDF
740 #define N1 0
741 #define SHIFT1 1
742 #define COEFFS1 NULL
743 #define N2 2
744 #define SHIFT2 2
745 #define COEFFS2 (int[]){-1,-1}
746 #define N3 2
747 #define SHIFT3 0
748 #define COEFFS3 (int[]){-1,-1}
749 #define N4 2
750 #define SHIFT4 4
751 #define COEFFS4 (int[]){3,3}
752 #elif 1 // 9/7 MN
753 #define N1 4
754 #define SHIFT1 4
755 #define COEFFS1 (int[]){1,-9,-9,1}
756 #define N2 2
757 #define SHIFT2 2
758 #define COEFFS2 (int[]){1,1}
759 #define N3 0
760 #define SHIFT3 1
761 #define COEFFS3 NULL
762 #define N4 0
763 #define SHIFT4 1
764 #define COEFFS4 NULL
765 #else // 13/7 CRF
766 #define N1 4
767 #define SHIFT1 4
768 #define COEFFS1 (int[]){1,-9,-9,1}
769 #define N2 4
770 #define SHIFT2 4
771 #define COEFFS2 (int[]){-1,5,5,-1}
772 #define N3 0
773 #define SHIFT3 1
774 #define COEFFS3 NULL
775 #define N4 0
776 #define SHIFT4 1
777 #define COEFFS4 NULL
778 #endif
779 static void horizontal_decomposeX(int *b, int width){
780 int temp[width];
781 const int width2= width>>1;
782 const int w2= (width+1)>>1;
783 int A1,A2,A3,A4, x;
784
785 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
786 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
787 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
788 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
789
790 for(x=0; x<width2; x++){
791 temp[x ]= b[2*x ];
792 temp[x+w2]= b[2*x + 1];
793 }
794 if(width&1)
795 temp[x ]= b[2*x ];
796 memcpy(b, temp, width*sizeof(int));
797 }
798
799 static void horizontal_composeX(int *b, int width){
800 int temp[width];
801 const int width2= width>>1;
802 int A1,A2,A3,A4, x;
803 const int w2= (width+1)>>1;
804
805 memcpy(temp, b, width*sizeof(int));
806 for(x=0; x<width2; x++){
807 b[2*x ]= temp[x ];
808 b[2*x + 1]= temp[x+w2];
809 }
810 if(width&1)
811 b[2*x ]= temp[x ];
812
813 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
814 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
815 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
816 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
817 }
818
819 static void spatial_decomposeX(int *buffer, int width, int height, int stride){
820 int x, y;
821
822 for(y=0; y<height; y++){
823 for(x=0; x<width; x++){
824 buffer[y*stride + x] *= SCALEX;
825 }
826 }
827
828 for(y=0; y<height; y++){
829 horizontal_decomposeX(buffer + y*stride, width);
830 }
831
832 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
833 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
834 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
835 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
836 }
837
838 static void spatial_composeX(int *buffer, int width, int height, int stride){
839 int x, y;
840
841 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
842 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
843 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
844 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
845
846 for(y=0; y<height; y++){
847 horizontal_composeX(buffer + y*stride, width);
848 }
849
850 for(y=0; y<height; y++){
851 for(x=0; x<width; x++){
852 buffer[y*stride + x] /= SCALEX;
853 }
854 }
855 }
856
857 static void horizontal_decompose53i(int *b, int width){
858 int temp[width];
859 const int width2= width>>1;
860 int A1,A2,A3,A4, x;
861 const int w2= (width+1)>>1;
862
863 for(x=0; x<width2; x++){
864 temp[x ]= b[2*x ];
865 temp[x+w2]= b[2*x + 1];
866 }
867 if(width&1)
868 temp[x ]= b[2*x ];
869 #if 0
870 A2= temp[1 ];
871 A4= temp[0 ];
872 A1= temp[0+width2];
873 A1 -= (A2 + A4)>>1;
874 A4 += (A1 + 1)>>1;
875 b[0+width2] = A1;
876 b[0 ] = A4;
877 for(x=1; x+1<width2; x+=2){
878 A3= temp[x+width2];
879 A4= temp[x+1 ];
880 A3 -= (A2 + A4)>>1;
881 A2 += (A1 + A3 + 2)>>2;
882 b[x+width2] = A3;
883 b[x ] = A2;
884
885 A1= temp[x+1+width2];
886 A2= temp[x+2 ];
887 A1 -= (A2 + A4)>>1;
888 A4 += (A1 + A3 + 2)>>2;
889 b[x+1+width2] = A1;
890 b[x+1 ] = A4;
891 }
892 A3= temp[width-1];
893 A3 -= A2;
894 A2 += (A1 + A3 + 2)>>2;
895 b[width -1] = A3;
896 b[width2-1] = A2;
897 #else
898 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
899 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
900 #endif
901 }
902
903 static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
904 int i;
905
906 for(i=0; i<width; i++){
907 b1[i] -= (b0[i] + b2[i])>>1;
908 }
909 }
910
911 static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
912 int i;
913
914 for(i=0; i<width; i++){
915 b1[i] += (b0[i] + b2[i] + 2)>>2;
916 }
917 }
918
919 static void spatial_decompose53i(int *buffer, int width, int height, int stride){
920 int y;
921 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
922 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
923
924 for(y=-2; y<height; y+=2){
925 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
926 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
927
928 {START_TIMER
929 if(b1 <= b3) horizontal_decompose53i(b2, width);
930 if(y+2 < height) horizontal_decompose53i(b3, width);
931 STOP_TIMER("horizontal_decompose53i")}
932
933 {START_TIMER
934 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
935 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
936 STOP_TIMER("vertical_decompose53i*")}
937
938 b0=b2;
939 b1=b3;
940 }
941 }
942
943 #define lift5 lift
944 #if 1
945 #define W_AM 3
946 #define W_AO 0
947 #define W_AS 1
948
949 #define W_BM 1
950 #define W_BO 8
951 #define W_BS 4
952
953 #undef lift5
954 #define W_CM 9999
955 #define W_CO 2
956 #define W_CS 2
957
958 #define W_DM 15
959 #define W_DO 16
960 #define W_DS 5
961 #elif 0
962 #define W_AM 55
963 #define W_AO 16
964 #define W_AS 5
965
966 #define W_BM 3
967 #define W_BO 32
968 #define W_BS 6
969
970 #define W_CM 127
971 #define W_CO 64
972 #define W_CS 7
973
974 #define W_DM 7
975 #define W_DO 8
976 #define W_DS 4
977 #elif 0
978 #define W_AM 97
979 #define W_AO 32
980 #define W_AS 6
981
982 #define W_BM 63
983 #define W_BO 512
984 #define W_BS 10
985
986 #define W_CM 13
987 #define W_CO 8
988 #define W_CS 4
989
990 #define W_DM 15
991 #define W_DO 16
992 #define W_DS 5
993
994 #else
995
996 #define W_AM 203
997 #define W_AO 64
998 #define W_AS 7
999
1000 #define W_BM 217
1001 #define W_BO 2048
1002 #define W_BS 12
1003
1004 #define W_CM 113
1005 #define W_CO 64
1006 #define W_CS 7
1007
1008 #define W_DM 227
1009 #define W_DO 128
1010 #define W_DS 9
1011 #endif
1012 static void horizontal_decompose97i(int *b, int width){
1013 int temp[width];
1014 const int w2= (width+1)>>1;
1015
1016 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1017 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1018 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1019 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1020 }
1021
1022
1023 static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
1024 int i;
1025
1026 for(i=0; i<width; i++){
1027 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1028 }
1029 }
1030
1031 static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
1032 int i;
1033
1034 for(i=0; i<width; i++){
1035 #ifdef lift5
1036 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1037 #else
1038 int r= 3*(b0[i] + b2[i]);
1039 r+= r>>4;
1040 r+= r>>8;
1041 b1[i] += (r+W_CO)>>W_CS;
1042 #endif
1043 }
1044 }
1045
1046 static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1047 int i;
1048
1049 for(i=0; i<width; i++){
1050 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1051 }
1052 }
1053
1054 static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1055 int i;
1056
1057 for(i=0; i<width; i++){
1058 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1059 }
1060 }
1061
1062 static void spatial_decompose97i(int *buffer, int width, int height, int stride){
1063 int y;
1064 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1065 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1066 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1067 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1068
1069 for(y=-4; y<height; y+=2){
1070 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1071 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1072
1073 {START_TIMER
1074 if(b3 <= b5) horizontal_decompose97i(b4, width);
1075 if(y+4 < height) horizontal_decompose97i(b5, width);
1076 if(width>400){
1077 STOP_TIMER("horizontal_decompose97i")
1078 }}
1079
1080 {START_TIMER
1081 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1082 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1083 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1084 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1085
1086 if(width>400){
1087 STOP_TIMER("vertical_decompose97i")
1088 }}
1089
1090 b0=b2;
1091 b1=b3;
1092 b2=b4;
1093 b3=b5;
1094 }
1095 }
1096
1097 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1098 int level;
1099
1100 for(level=0; level<decomposition_count; level++){
1101 switch(type){
1102 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1103 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1104 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1105 }
1106 }
1107 }
1108
1109 static void horizontal_compose53i(int *b, int width){
1110 int temp[width];
1111 const int width2= width>>1;
1112 const int w2= (width+1)>>1;
1113 int A1,A2,A3,A4, x;
1114
1115 #if 0
1116 A2= temp[1 ];
1117 A4= temp[0 ];
1118 A1= temp[0+width2];
1119 A1 -= (A2 + A4)>>1;
1120 A4 += (A1 + 1)>>1;
1121 b[0+width2] = A1;
1122 b[0 ] = A4;
1123 for(x=1; x+1<width2; x+=2){
1124 A3= temp[x+width2];
1125 A4= temp[x+1 ];
1126 A3 -= (A2 + A4)>>1;
1127 A2 += (A1 + A3 + 2)>>2;
1128 b[x+width2] = A3;
1129 b[x ] = A2;
1130
1131 A1= temp[x+1+width2];
1132 A2= temp[x+2 ];
1133 A1 -= (A2 + A4)>>1;
1134 A4 += (A1 + A3 + 2)>>2;
1135 b[x+1+width2] = A1;
1136 b[x+1 ] = A4;
1137 }
1138 A3= temp[width-1];
1139 A3 -= A2;
1140 A2 += (A1 + A3 + 2)>>2;
1141 b[width -1] = A3;
1142 b[width2-1] = A2;
1143 #else
1144 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1145 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1146 #endif
1147 for(x=0; x<width2; x++){
1148 b[2*x ]= temp[x ];
1149 b[2*x + 1]= temp[x+w2];
1150 }
1151 if(width&1)
1152 b[2*x ]= temp[x ];
1153 }
1154
1155 static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1156 int i;
1157
1158 for(i=0; i<width; i++){
1159 b1[i] += (b0[i] + b2[i])>>1;
1160 }
1161 }
1162
1163 static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1164 int i;
1165
1166 for(i=0; i<width; i++){
1167 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1168 }
1169 }
1170
1171 static void spatial_compose53i(int *buffer, int width, int height, int stride){
1172 int y;
1173 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1174 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride;
1175
1176 for(y=-1; y<=height; y+=2){
1177 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1178 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1179
1180 {START_TIMER
1181 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1182 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1183 STOP_TIMER("vertical_compose53i*")}
1184
1185 {START_TIMER
1186 if(y-1 >= 0) horizontal_compose53i(b0, width);
1187 if(b0 <= b2) horizontal_compose53i(b1, width);
1188 STOP_TIMER("horizontal_compose53i")}
1189
1190 b0=b2;
1191 b1=b3;
1192 }
1193 }
1194
1195
1196 static void horizontal_compose97i(int *b, int width){
1197 int temp[width];
1198 const int w2= (width+1)>>1;
1199
1200 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1201 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1202 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1203 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1204 }
1205
1206 static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1207 int i;
1208
1209 for(i=0; i<width; i++){
1210 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1211 }
1212 }
1213
1214 static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1215 int i;
1216
1217 for(i=0; i<width; i++){
1218 #ifdef lift5
1219 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1220 #else
1221 int r= 3*(b0[i] + b2[i]);
1222 r+= r>>4;
1223 r+= r>>8;
1224 b1[i] -= (r+W_CO)>>W_CS;
1225 #endif
1226 }
1227 }
1228
1229 static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1230 int i;
1231
1232 for(i=0; i<width; i++){
1233 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1234 }
1235 }
1236
1237 static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1238 int i;
1239
1240 for(i=0; i<width; i++){
1241 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1242 }
1243 }
1244
1245 static void spatial_compose97i(int *buffer, int width, int height, int stride){
1246 int y;
1247 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1248 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride;
1249 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1250 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1251
1252 for(y=-3; y<=height; y+=2){
1253 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1254 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1255
1256 if(stride == width && y+4 < height && 0){
1257 int x;
1258 for(x=0; x<width/2; x++)
1259 b5[x] += 64*2;
1260 for(; x<width; x++)
1261 b5[x] += 169*2;
1262 }
1263
1264 {START_TIMER
1265 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1266 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1267 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1268 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1269 if(width>400){
1270 STOP_TIMER("vertical_compose97i")}}
1271
1272 {START_TIMER
1273 if(y-1>= 0) horizontal_compose97i(b0, width);
1274 if(b0 <= b2) horizontal_compose97i(b1, width);
1275 if(width>400 && b0 <= b2){
1276 STOP_TIMER("horizontal_compose97i")}}
1277
1278 b0=b2;
1279 b1=b3;
1280 b2=b4;
1281 b3=b5;
1282 }
1283 }
1284
1285 void ff_spatial_idwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1286 int level;
1287
1288 for(level=decomposition_count-1; level>=0; level--){
1289 switch(type){
1290 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1291 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1292 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break;
1293 }
1294 }
1295 }
1296
1297 static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1298 const int w= b->width;
1299 const int h= b->height;
1300 int x, y;
1301
1302 if(1){
1303 int run=0;
1304 int runs[w*h];
1305 int run_index=0;
1306
1307 for(y=0; y<h; y++){
1308 for(x=0; x<w; x++){
1309 int v, p=0;
1310 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1311 v= src[x + y*stride];
1312
1313 if(y){
1314 t= src[x + (y-1)*stride];
1315 if(x){
1316 lt= src[x - 1 + (y-1)*stride];
1317 }
1318 if(x + 1 < w){
1319 rt= src[x + 1 + (y-1)*stride];
1320 }
1321 }
1322 if(x){
1323 l= src[x - 1 + y*stride];
1324 /*if(x > 1){
1325 if(orientation==1) ll= src[y + (x-2)*stride];
1326 else ll= src[x - 2 + y*stride];
1327 }*/
1328 }
1329 if(parent){
1330 int px= x>>1;
1331 int py= y>>1;
1332 if(px<b->parent->width && py<b->parent->height)
1333 p= parent[px + py*2*stride];
1334 }
1335 if(!(/*ll|*/l|lt|t|rt|p)){
1336 if(v){
1337 runs[run_index++]= run;
1338 run=0;
1339 }else{
1340 run++;
1341 }
1342 }
1343 }
1344 }
1345 runs[run_index++]= run;
1346 run_index=0;
1347 run= runs[run_index++];
1348
1349 put_symbol2(&s->c, b->state[1], run, 3);
1350
1351 for(y=0; y<h; y++){
1352 for(x=0; x<w; x++){
1353 int v, p=0;
1354 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1355 v= src[x + y*stride];
1356
1357 if(y){
1358 t= src[x + (y-1)*stride];
1359 if(x){
1360 lt= src[x - 1 + (y-1)*stride];
1361 }
1362 if(x + 1 < w){
1363 rt= src[x + 1 + (y-1)*stride];
1364 }
1365 }
1366 if(x){
1367 l= src[x - 1 + y*stride];
1368 /*if(x > 1){
1369 if(orientation==1) ll= src[y + (x-2)*stride];
1370 else ll= src[x - 2 + y*stride];
1371 }*/
1372 }
1373 if(parent){
1374 int px= x>>1;
1375 int py= y>>1;
1376 if(px<b->parent->width && py<b->parent->height)
1377 p= parent[px + py*2*stride];
1378 }
1379 if(/*ll|*/l|lt|t|rt|p){
1380 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1381
1382 put_cabac(&s->c, &b->state[0][context], !!v);
1383 }else{
1384 if(!run){
1385 run= runs[run_index++];
1386
1387 put_symbol2(&s->c, b->state[1], run, 3);
1388 assert(v);
1389 }else{
1390 run--;
1391 assert(!v);
1392 }
1393 }
1394 if(v){
1395 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1396
1397 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1398 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1399 }
1400 }
1401 }
1402 }
1403 }
1404
1405 static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1406 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1407 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1408 encode_subband_c0run(s, b, src, parent, stride, orientation);
1409 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1410 }
1411
1412 static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1413 const int w= b->width;
1414 const int h= b->height;
1415 int x,y;
1416 const int qlog= clip(s->qlog + b->qlog, 0, 128);
1417 int qmul= qexp[qlog&7]<<(qlog>>3);
1418 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1419
1420 START_TIMER
1421
1422 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1423 qadd= 0;
1424 qmul= 1<<QEXPSHIFT;
1425 }
1426
1427 if(1){
1428 int run;
1429 int index=0;
1430 int prev_index=-1;
1431 int prev2_index=0;
1432 int parent_index= 0;
1433 int prev_parent_index= 0;
1434
1435 for(y=0; y<b->height; y++)
1436 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1437
1438 run= get_symbol2(&s->c, b->state[1], 3);
1439 for(y=0; y<h; y++){
1440 int v=0;
1441 int lt=0, t=0, rt=0;
1442
1443 if(y && b->x[prev_index] == 0){
1444 rt= b->coeff[prev_index];
1445 }
1446 for(x=0; x<w; x++){
1447 int p=0;
1448 const int l= v;
1449
1450 lt= t; t= rt;
1451
1452 if(y){
1453 if(b->x[prev_index] <= x)
1454 prev_index++;
1455 if(b->x[prev_index] == x + 1)
1456 rt= b->coeff[prev_index];
1457 else
1458 rt=0;
1459 }
1460 if(parent){
1461 if(x>>1 > b->parent->x[parent_index]){
1462 parent_index++;
1463 }
1464 if(x>>1 == b->parent->x[parent_index]){
1465 p= b->parent->coeff[parent_index];
1466 }
1467 }
1468 if(/*ll|*/l|lt|t|rt|p){
1469 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1470
1471 v=get_cabac(&s->c, &b->state[0][context]);
1472 }else{
1473 if(!run){
1474 run= get_symbol2(&s->c, b->state[1], 3);
1475 v=1;
1476 }else{
1477 run--;
1478 v=0;
1479
1480 if(y && parent){
1481 int max_run;
1482
1483 max_run= FFMIN(run, b->x[prev_index] - x - 2);
1484 max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1485 x+= max_run;
1486 run-= max_run;
1487 }
1488 }
1489 }
1490 if(v){
1491 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1492 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1493 if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1494 src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
1495 v= -v;
1496 }else{
1497 src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1498 }
1499 b->x[index]=x; //FIXME interleave x/coeff
1500 b->coeff[index++]= v;
1501 }
1502 }
1503 b->x[index++]= w+1; //end marker
1504 prev_index= prev2_index;
1505 prev2_index= index;
1506
1507 if(parent){
1508 while(b->parent->x[parent_index] != b->parent->width+1)
1509 parent_index++;
1510 parent_index++;
1511 if(y&1){
1512 prev_parent_index= parent_index;
1513 }else{
1514 parent_index= prev_parent_index;
1515 }
1516 }
1517 }
1518 b->x[index++]= w+1; //end marker
1519 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
1520 STOP_TIMER("decode_subband")
1521 }
1522
1523 return;
1524 }
1525 }
1526
1527 static void reset_contexts(SnowContext *s){
1528 int plane_index, level, orientation;
1529
1530 for(plane_index=0; plane_index<3; plane_index++){
1531 for(level=0; level<s->spatial_decomposition_count; level++){
1532 for(orientation=level ? 1:0; orientation<4; orientation++){
1533 memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
1534 }
1535 }
1536 }
1537 memset(s->header_state, 0, sizeof(s->header_state));
1538 memset(s->block_state, 0, sizeof(s->block_state));
1539 }
1540
1541 static int alloc_blocks(SnowContext *s){
1542 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1543 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1544
1545 s->b_width = w;
1546 s->b_height= h;
1547
1548 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1549 return 0;
1550 }
1551
1552 static inline void copy_cabac_state(CABACContext *d, CABACContext *s){
1553 PutBitContext bak= d->pb;
1554 *d= *s;
1555 d->pb= bak;
1556 }
1557
1558 //near copy & paste from dsputil, FIXME
1559 static int pix_sum(uint8_t * pix, int line_size, int w)
1560 {
1561 int s, i, j;
1562
1563 s = 0;
1564 for (i = 0; i < w; i++) {
1565 for (j = 0; j < w; j++) {
1566 s += pix[0];
1567 pix ++;
1568 }
1569 pix += line_size - w;
1570 }
1571 return s;
1572 }
1573
1574 //near copy & paste from dsputil, FIXME
1575 static int pix_norm1(uint8_t * pix, int line_size, int w)
1576 {
1577 int s, i, j;
1578 uint32_t *sq = squareTbl + 256;
1579
1580 s = 0;
1581 for (i = 0; i < w; i++) {
1582 for (j = 0; j < w; j ++) {
1583 s += sq[pix[0]];
1584 pix ++;
1585 }
1586 pix += line_size - w;
1587 }
1588 return s;
1589 }
1590
1591 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1592 const int w= s->b_width << s->block_max_depth;
1593 const int rem_depth= s->block_max_depth - level;
1594 const int index= (x + y*w) << rem_depth;
1595 const int block_w= 1<<rem_depth;
1596 BlockNode block;
1597 int i,j;
1598
1599 block.color[0]= l;
1600 block.color[1]= cb;
1601 block.color[2]= cr;
1602 block.mx= mx;
1603 block.my= my;
1604 block.type= type;
1605 block.level= level;
1606
1607 for(j=0; j<block_w; j++){
1608 for(i=0; i<block_w; i++){
1609 s->block[index + i + j*w]= block;
1610 }
1611 }
1612 }
1613
1614 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1615 const int offset[3]= {
1616 y*c-> stride + x,
1617 ((y*c->uvstride + x)>>1),
1618 ((y*c->uvstride + x)>>1),
1619 };
1620 int i;
1621 for(i=0; i<3; i++){
1622 c->src[0][i]= src [i];
1623 c->ref[0][i]= ref [i] + offset[i];
1624 }
1625 assert(!ref_index);
1626 }
1627
1628 //FIXME copy&paste
1629 #define P_LEFT P[1]
1630 #define P_TOP P[2]
1631 #define P_TOPRIGHT P[3]
1632 #define P_MEDIAN P[4]
1633 #define P_MV1 P[9]
1634 #define FLAG_QPEL 1 //must be 1
1635
1636 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1637 uint8_t p_buffer[1024];
1638 uint8_t i_buffer[1024];
1639 uint8_t p_state[sizeof(s->block_state)];
1640 uint8_t i_state[sizeof(s->block_state)];
1641 CABACContext pc, ic;
1642 PutBitContext pbbak= s->c.pb;
1643 int score, score2, iscore, i_len, p_len, block_s, sum;
1644 const int w= s->b_width << s->block_max_depth;
1645 const int h= s->b_height << s->block_max_depth;
1646 const int rem_depth= s->block_max_depth - level;
1647 const int index= (x + y*w) << rem_depth;
1648 const int block_w= 1<<(LOG2_MB_SIZE - level);
1649 static BlockNode null_block= { //FIXME add border maybe
1650 .color= {128,128,128},
1651 .mx= 0,
1652 .my= 0,
1653 .type= 0,
1654 .level= 0,
1655 };
1656 int trx= (x+1)<<rem_depth;
1657 int try= (y+1)<<rem_depth;
1658 BlockNode *left = x ? &s->block[index-1] : &null_block;
1659 BlockNode *top = y ? &s->block[index-w] : &null_block;
1660 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1661 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1662 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1663 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1664 int pl = left->color[0];
1665 int pcb= left->color[1];
1666 int pcr= left->color[2];
1667 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1668 int pmy= mid_pred(left->my, top->my, tr->my);
1669 int mx=0, my=0;
1670 int l,cr,cb, i;
1671 const int stride= s->current_picture.linesize[0];
1672 const int uvstride= s->current_picture.linesize[1];
1673 const int instride= s->input_picture.linesize[0];
1674 const int uvinstride= s->input_picture.linesize[1];
1675 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
1676 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1677 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1678 uint8_t current_mb[3][stride*block_w];
1679 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1680 int P[10][2];
1681 int16_t last_mv[3][2];
1682 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1683 const int shift= 1+qpel;
1684 MotionEstContext *c= &s->m.me;
1685 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1686 int my_context= av_log2(2*ABS(left->my - top->my));
1687 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1688
1689 assert(sizeof(s->block_state) >= 256);
1690 if(s->keyframe){
1691 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1692 return 0;
1693 }
1694
1695 //FIXME optimize
1696 for(i=0; i<block_w; i++)
1697 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
1698 for(i=0; i<block_w>>1; i++)
1699 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1700 for(i=0; i<block_w>>1; i++)
1701 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1702
1703 // clip predictors / edge ?
1704
1705 P_LEFT[0]= left->mx;
1706 P_LEFT[1]= left->my;
1707 P_TOP [0]= top->mx;
1708 P_TOP [1]= top->my;
1709 P_TOPRIGHT[0]= tr->mx;
1710 P_TOPRIGHT[1]= tr->my;
1711
1712 last_mv[0][0]= s->block[index].mx;
1713 last_mv[0][1]= s->block[index].my;
1714 last_mv[1][0]= right->mx;
1715 last_mv[1][1]= right->my;
1716 last_mv[2][0]= bottom->mx;
1717 last_mv[2][1]= bottom->my;
1718
1719 s->m.mb_stride=2;
1720 s->m.mb_x=
1721 s->m.mb_y= 0;
1722 s->m.me.skip= 0;
1723
1724 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1725
1726 assert(s->m.me. stride == stride);
1727 assert(s->m.me.uvstride == uvstride);
1728
1729 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1730 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1731 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1732 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1733
1734 c->xmin = - x*block_w - 16+2;
1735 c->ymin = - y*block_w - 16+2;
1736 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1737 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1738
1739 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1740 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1741 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1742 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1743 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1744 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1745 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1746
1747 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1748 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1749
1750 if (!y) {
1751 c->pred_x= P_LEFT[0];
1752 c->pred_y= P_LEFT[1];
1753 } else {
1754 c->pred_x = P_MEDIAN[0];
1755 c->pred_y = P_MEDIAN[1];
1756 }
1757
1758 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
1759 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1760
1761 assert(mx >= c->xmin);
1762 assert(mx <= c->xmax);
1763 assert(my >= c->ymin);
1764 assert(my <= c->ymax);
1765
1766 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1767 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1768 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1769
1770 // subpel search
1771 pc= s->c;
1772 init_put_bits(&pc.pb, p_buffer, sizeof(p_buffer));
1773 memcpy(p_state, s->block_state, sizeof(s->block_state));
1774
1775 if(level!=s->block_max_depth)
1776 put_cabac(&pc, &p_state[4 + s_context], 1);
1777 put_cabac(&pc, &p_state[1 + left->type + top->type], 0);
1778 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1779 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1780 p_len= put_bits_count(&pc.pb);
1781 score += (s->lambda2*(p_len + pc.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1782
1783 block_s= block_w*block_w;
1784 sum = pix_sum(&current_mb[0][0], stride, block_w);
1785 l= (sum + block_s/2)/block_s;
1786 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1787
1788 block_s= block_w*block_w>>2;
1789 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1790 cb= (sum + block_s/2)/block_s;
1791 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1792 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1793 cr= (sum + block_s/2)/block_s;
1794 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1795
1796 ic= s->c;
1797 init_put_bits(&ic.pb, i_buffer, sizeof(i_buffer));
1798 memcpy(i_state, s->block_state, sizeof(s->block_state));
1799 if(level!=s->block_max_depth)
1800 put_cabac(&ic, &i_state[4 + s_context], 1);
1801 put_cabac(&ic, &i_state[1 + left->type + top->type], 1);
1802 put_symbol(&ic, &i_state[32], l-pl , 1);
1803 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1804 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1805 i_len= put_bits_count(&ic.pb);
1806 iscore += (s->lambda2*(i_len + ic.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1807
1808 // assert(score==256*256*256*64-1);
1809 assert(iscore < 255*255*256 + s->lambda2*10);
1810 assert(iscore >= 0);
1811 assert(l>=0 && l<=255);
1812 assert(pl>=0 && pl<=255);
1813
1814 if(level==0){
1815 int varc= iscore >> 8;
1816 int vard= score >> 8;
1817 if (vard <= 64 || vard < varc)
1818 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1819 else
1820 c->scene_change_score+= s->m.qscale;
1821 }
1822
1823 if(level!=s->block_max_depth){
1824 put_cabac(&s->c, &s->block_state[4 + s_context], 0);
1825 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1826 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1827 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1828 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1829 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1830
1831 if(score2 < score && score2 < iscore)
1832 return score2;
1833 }
1834
1835 if(iscore < score){
1836 flush_put_bits(&ic.pb);
1837 ff_copy_bits(&pbbak, i_buffer, i_len);
1838 s->c= ic;
1839 s->c.pb= pbbak;
1840 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1841 memcpy(s->block_state, i_state, sizeof(s->block_state));
1842 return iscore;
1843 }else{
1844 flush_put_bits(&pc.pb);
1845 ff_copy_bits(&pbbak, p_buffer, p_len);
1846 s->c= pc;
1847 s->c.pb= pbbak;
1848 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1849 memcpy(s->block_state, p_state, sizeof(s->block_state));
1850 return score;
1851 }
1852 }
1853
1854 static void decode_q_branch(SnowContext *s, int level, int x, int y){
1855 const int w= s->b_width << s->block_max_depth;
1856 const int rem_depth= s->block_max_depth - level;
1857 const int index= (x + y*w) << rem_depth;
1858 static BlockNode null_block= { //FIXME add border maybe
1859 .color= {128,128,128},
1860 .mx= 0,
1861 .my= 0,
1862 .type= 0,
1863 .level= 0,
1864 };
1865 int trx= (x+1)<<rem_depth;
1866 BlockNode *left = x ? &s->block[index-1] : &null_block;
1867 BlockNode *top = y ? &s->block[index-w] : &null_block;
1868 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1869 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1870 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1871
1872 if(s->keyframe){
1873 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1874 return;
1875 }
1876
1877 if(level==s->block_max_depth || get_cabac(&s->c, &s->block_state[4 + s_context])){
1878 int type;
1879 int l = left->color[0];
1880 int cb= left->color[1];
1881 int cr= left->color[2];
1882 int mx= mid_pred(left->mx, top->mx, tr->mx);
1883 int my= mid_pred(left->my, top->my, tr->my);
1884 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1885 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1886
1887 type= get_cabac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1888
1889 if(type){
1890 l += get_symbol(&s->c, &s->block_state[32], 1);
1891 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1892 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1893 }else{
1894 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1895 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1896 }
1897 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1898 }else{
1899 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1900 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1901 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1902 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1903 }
1904 }
1905
1906 static void encode_blocks(SnowContext *s){
1907 int x, y;
1908 int w= s->b_width;
1909 int h= s->b_height;
1910
1911 for(y=0; y<h; y++){
1912 for(x=0; x<w; x++){
1913 encode_q_branch(s, 0, x, y);
1914 }
1915 }
1916 }
1917
1918 static void decode_blocks(SnowContext *s){
1919 int x, y;
1920 int w= s->b_width;
1921 int h= s->b_height;
1922
1923 for(y=0; y<h; y++){
1924 for(x=0; x<w; x++){
1925 decode_q_branch(s, 0, x, y);
1926 }
1927 }
1928 }
1929
1930 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1931 int x, y;
1932 for(y=0; y < b_h+5; y++){
1933 for(x=0; x < b_w; x++){
1934 int a0= src[x + y*stride];
1935 int a1= src[x + 1 + y*stride];
1936 int a2= src[x + 2 + y*stride];
1937 int a3= src[x + 3 + y*stride];
1938 int a4= src[x + 4 + y*stride];
1939 int a5= src[x + 5 + y*stride];
1940 // int am= 9*(a1+a2) - (a0+a3);
1941 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1942 // int am= 18*(a2+a3) - 2*(a1+a4);
1943 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1944 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1945
1946 // if(b_w==16) am= 8*(a1+a2);
1947
1948 if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) + am* dx + 128)>>8;
1949 else tmp[x + y*stride]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1950
1951 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
1952 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
1953 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
1954 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1955 }
1956 }
1957 for(y=0; y < b_h; y++){
1958 for(x=0; x < b_w; x++){
1959 int a0= tmp[x + y *stride];
1960 int a1= tmp[x + (y + 1)*stride];
1961 int a2= tmp[x + (y + 2)*stride];
1962 int a3= tmp[x + (y + 3)*stride];
1963 int a4= tmp[x + (y + 4)*stride];
1964 int a5= tmp[x + (y + 5)*stride];
1965 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1966 // int am= 18*(a2+a3) - 2*(a1+a4);
1967 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1968 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1969
1970 // if(b_w==16) am= 8*(a1+a2);
1971
1972 if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) + am* dy + 128)>>8;
1973 else dst[x + y*stride]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1974
1975 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
1976 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
1977 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
1978 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1979 }
1980 }
1981 }
1982
1983 #define mcb(dx,dy,b_w)\
1984 static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
1985 uint8_t tmp[stride*(b_w+5)];\
1986 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1987 }
1988
1989 mcb( 0, 0,16)
1990 mcb( 4, 0,16)
1991 mcb( 8, 0,16)
1992 mcb(12, 0,16)
1993 mcb( 0, 4,16)
1994 mcb( 4, 4,16)
1995 mcb( 8, 4,16)
1996 mcb(12, 4,16)
1997 mcb( 0, 8,16)
1998 mcb( 4, 8,16)
1999 mcb( 8, 8,16)
2000 mcb(12, 8,16)
2001 mcb( 0,12,16)
2002 mcb( 4,12,16)
2003 mcb( 8,12,16)
2004 mcb(12,12,16)
2005
2006 #define mca(dx,dy,b_w)\
2007 static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
2008 uint8_t tmp[stride*(b_w+5)];\
2009 assert(h==b_w);\
2010 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2011 }
2012
2013 mca( 0, 0,16)
2014 mca( 8, 0,16)
2015 mca( 0, 8,16)
2016 mca( 8, 8,16)
2017
2018 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2019 if(block->type){
2020 int x, y;
2021 const int color= block->color[plane_index];
2022 for(y=0; y < b_h; y++){
2023 for(x=0; x < b_w; x++){
2024 dst[x + y*stride]= color;
2025 }
2026 }
2027 }else{
2028 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2029 int mx= block->mx*scale;
2030 int my= block->my*scale;
2031 sx += (mx>>4) - 2;
2032 sy += (my>>4) - 2;
2033 src += sx + sy*stride;
2034 if( (unsigned)sx >= w - b_w - 4
2035 || (unsigned)sy >= h - b_h - 4){
2036 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2037 src= tmp + MB_SIZE;
2038 }
2039 mc_block(dst, src, tmp, stride, b_w, b_h, mx&15, my&15);
2040 }
2041 }
2042
2043 static always_inline int same_block(BlockNode *a, BlockNode *b){
2044 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2045 }
2046
2047 //FIXME name clenup (b_w, block_w, b_width stuff)
2048 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2049 const int b_width = s->b_width << s->block_max_depth;
2050 const int b_height= s->b_height << s->block_max_depth;
2051 const int b_stride= b_width;
2052 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2053 BlockNode *rt= lt+1;
2054 BlockNode *lb= lt+b_stride;
2055 BlockNode *rb= lb+1;
2056 uint8_t *block[4];
2057 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2058 int x,y;
2059
2060 if(b_x<0){
2061 lt= rt;
2062 lb= rb;
2063 }else if(b_x + 1 >= b_width){
2064 rt= lt;
2065 rb= lb;
2066 }
2067 if(b_y<0){
2068 lt= lb;
2069 rt= rb;
2070 }else if(b_y + 1 >= b_height){
2071 lb= lt;
2072 rb= rt;
2073 }