fixing interlaced direct mode field select values
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
5509bffa 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
MN
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
059715a4 22#include "snow.h"
28869757
MN
23
24#include "rangecoder.h"
791e7b83
MN
25
26#include "mpegvideo.h"
27
28#undef NDEBUG
29#include <assert.h>
30
791e7b83
MN
31static const int8_t quant3[256]={
32 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
41-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
48};
49static const int8_t quant3b[256]={
50 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66};
538a3841
MN
67static const int8_t quant3bA[256]={
68 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
69 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84};
791e7b83
MN
85static const int8_t quant5[256]={
86 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
95-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
102};
103static const int8_t quant7[256]={
104 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
107 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
113-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
118-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
119-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
120};
121static const int8_t quant9[256]={
122 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
123 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
124 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
131-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
137-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
138};
139static const int8_t quant11[256]={
140 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
141 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
142 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
143 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
149-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
154-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
155-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
156};
157static const int8_t quant13[256]={
158 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
159 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
160 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
162 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
167-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
171-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
172-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
174};
175
791e7b83
MN
176#if 0 //64*cubic
177static const uint8_t obmc32[1024]={
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
180 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
181 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
182 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
183 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
184 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
185 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
186 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
191 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
192 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
193 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
196 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
197 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
198 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
199 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
200 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
201 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
202 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
203 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
204 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
205 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
206 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
207 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
208 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210//error:0.000022
211};
212static const uint8_t obmc16[256]={
213 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
218 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
219 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
220 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
223 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
224 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
225 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
226 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
227 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
228 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
229//error:0.000033
230};
231#elif 1 // 64*linear
232static const uint8_t obmc32[1024]={
233 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
234 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
235 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
236 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
237 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
238 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
239 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
240 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
241 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
242 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
243 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
244 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
245 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
246 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
247 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
248 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
249 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
250 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
251 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
252 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
253 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
254 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
255 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
256 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
257 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
258 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
259 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
260 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
261 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
262 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
263 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
264 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
265 //error:0.000020
266};
267static const uint8_t obmc16[256]={
268 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
269 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
270 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
271 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
272 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
273 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
274 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
275 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
276 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
277 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
278 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
279 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
280 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
281 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
282 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
283 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
284//error:0.000015
285};
286#else //64*cos
287static const uint8_t obmc32[1024]={
288 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
289 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
291 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
292 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
293 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
294 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
295 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
296 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
301 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
302 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
303 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
306 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
307 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
308 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
309 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
310 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
311 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
312 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
313 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
314 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
315 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
316 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
317 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
318 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
320//error:0.000022
321};
322static const uint8_t obmc16[256]={
323 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
328 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
329 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
330 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
333 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
334 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
335 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
336 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
337 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
338 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
339//error:0.000022
340};
341#endif
342
155ec6ed
MN
343//linear *64
344static const uint8_t obmc8[64]={
345 1, 3, 5, 7, 7, 5, 3, 1,
346 3, 9,15,21,21,15, 9, 3,
347 5,15,25,35,35,25,15, 5,
348 7,21,35,49,49,35,21, 7,
349 7,21,35,49,49,35,21, 7,
350 5,15,25,35,35,25,15, 5,
351 3, 9,15,21,21,15, 9, 3,
352 1, 3, 5, 7, 7, 5, 3, 1,
353//error:0.000000
354};
355
356//linear *64
357static const uint8_t obmc4[16]={
358 4,12,12, 4,
35912,36,36,12,
36012,36,36,12,
361 4,12,12, 4,
362//error:0.000000
363};
364
365static const uint8_t *obmc_tab[4]={
366 obmc32, obmc16, obmc8, obmc4
367};
368
369typedef struct BlockNode{
370 int16_t mx;
371 int16_t my;
372 uint8_t color[3];
373 uint8_t type;
374//#define TYPE_SPLIT 1
375#define BLOCK_INTRA 1
51d6a3cf 376#define BLOCK_OPT 2
155ec6ed
MN
377//#define TYPE_NOCOLOR 4
378 uint8_t level; //FIXME merge into type?
379}BlockNode;
380
51d6a3cf
MN
381static const BlockNode null_block= { //FIXME add border maybe
382 .color= {128,128,128},
383 .mx= 0,
384 .my= 0,
385 .type= 0,
386 .level= 0,
387};
388
155ec6ed
MN
389#define LOG2_MB_SIZE 4
390#define MB_SIZE (1<<LOG2_MB_SIZE)
391
a0d1931c
Y
392typedef struct x_and_coeff{
393 int16_t x;
538a3841 394 uint16_t coeff;
a0d1931c
Y
395} x_and_coeff;
396
791e7b83
MN
397typedef struct SubBand{
398 int level;
399 int stride;
400 int width;
401 int height;
402 int qlog; ///< log(qscale)/log[2^(1/6)]
403 DWTELEM *buf;
a0d1931c
Y
404 int buf_x_offset;
405 int buf_y_offset;
406 int stride_line; ///< Stride measured in lines, not pixels.
407 x_and_coeff * x_coeff;
791e7b83
MN
408 struct SubBand *parent;
409 uint8_t state[/*7*2*/ 7 + 512][32];
410}SubBand;
411
412typedef struct Plane{
413 int width;
414 int height;
415 SubBand band[MAX_DECOMPOSITIONS][4];
416}Plane;
417
418typedef struct SnowContext{
419// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
420
421 AVCodecContext *avctx;
28869757 422 RangeCoder c;
791e7b83 423 DSPContext dsp;
51d6a3cf
MN
424 AVFrame new_picture;
425 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83
MN
426 AVFrame current_picture;
427 AVFrame last_picture;
428 AVFrame mconly_picture;
429// uint8_t q_context[16];
430 uint8_t header_state[32];
155ec6ed 431 uint8_t block_state[128 + 32*128];
791e7b83 432 int keyframe;
19aa028d 433 int always_reset;
791e7b83
MN
434 int version;
435 int spatial_decomposition_type;
436 int temporal_decomposition_type;
437 int spatial_decomposition_count;
438 int temporal_decomposition_count;
439 DWTELEM *spatial_dwt_buffer;
791e7b83
MN
440 int colorspace_type;
441 int chroma_h_shift;
442 int chroma_v_shift;
443 int spatial_scalability;
444 int qlog;
155ec6ed
MN
445 int lambda;
446 int lambda2;
791e7b83
MN
447 int mv_scale;
448 int qbias;
449#define QBIAS_SHIFT 3
155ec6ed
MN
450 int b_width;
451 int b_height;
452 int block_max_depth;
791e7b83 453 Plane plane[MAX_PLANES];
155ec6ed 454 BlockNode *block;
51d6a3cf
MN
455#define ME_CACHE_SIZE 1024
456 int me_cache[ME_CACHE_SIZE];
457 int me_cache_generation;
a0d1931c 458 slice_buffer sb;
155ec6ed 459
791e7b83
MN
460 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
461}SnowContext;
462
f9e6ebf7
LM
463typedef struct {
464 DWTELEM *b0;
465 DWTELEM *b1;
466 DWTELEM *b2;
467 DWTELEM *b3;
468 int y;
469} dwt_compose_t;
470
a0d1931c
Y
471#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
472//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
473
51d6a3cf
MN
474static void iterative_me(SnowContext *s);
475
a0d1931c
Y
476static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
477{
478 int i;
115329f1 479
a0d1931c
Y
480 buf->base_buffer = base_buffer;
481 buf->line_count = line_count;
482 buf->line_width = line_width;
483 buf->data_count = max_allocated_lines;
484 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
485 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
115329f1 486
a0d1931c
Y
487 for (i = 0; i < max_allocated_lines; i++)
488 {
489 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
490 }
115329f1 491
a0d1931c
Y
492 buf->data_stack_top = max_allocated_lines - 1;
493}
494
495static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
496{
a0d1931c
Y
497 int offset;
498 DWTELEM * buffer;
115329f1
DB
499
500// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
501
a0d1931c
Y
502 assert(buf->data_stack_top >= 0);
503// assert(!buf->line[line]);
504 if (buf->line[line])
505 return buf->line[line];
115329f1 506
a0d1931c
Y
507 offset = buf->line_width * line;
508 buffer = buf->data_stack[buf->data_stack_top];
509 buf->data_stack_top--;
510 buf->line[line] = buffer;
115329f1 511
a0d1931c 512// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
115329f1 513
a0d1931c
Y
514 return buffer;
515}
516
517static void slice_buffer_release(slice_buffer * buf, int line)
518{
a0d1931c
Y
519 int offset;
520 DWTELEM * buffer;
521
522 assert(line >= 0 && line < buf->line_count);
523 assert(buf->line[line]);
524
525 offset = buf->line_width * line;
526 buffer = buf->line[line];
527 buf->data_stack_top++;
528 buf->data_stack[buf->data_stack_top] = buffer;
529 buf->line[line] = NULL;
115329f1 530
a0d1931c
Y
531// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
532}
533
534static void slice_buffer_flush(slice_buffer * buf)
535{
536 int i;
537 for (i = 0; i < buf->line_count; i++)
538 {
539 if (buf->line[i])
540 {
541// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
542 slice_buffer_release(buf, i);
543 }
544 }
545}
546
547static void slice_buffer_destroy(slice_buffer * buf)
548{
549 int i;
550 slice_buffer_flush(buf);
115329f1 551
a0d1931c
Y
552 for (i = buf->data_count - 1; i >= 0; i--)
553 {
554 assert(buf->data_stack[i]);
e7c8206e 555 av_freep(&buf->data_stack[i]);
a0d1931c
Y
556 }
557 assert(buf->data_stack);
e7c8206e 558 av_freep(&buf->data_stack);
a0d1931c 559 assert(buf->line);
e7c8206e 560 av_freep(&buf->line);
a0d1931c
Y
561}
562
bb270c08 563#ifdef __sgi
2554db9b 564// Avoid a name clash on SGI IRIX
bb270c08 565#undef qexp
2554db9b 566#endif
034aff03 567#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 568static uint8_t qexp[QROOT];
791e7b83
MN
569
570static inline int mirror(int v, int m){
13705b69
MN
571 while((unsigned)v > (unsigned)m){
572 v=-v;
573 if(v<0) v+= 2*m;
574 }
575 return v;
791e7b83
MN
576}
577
28869757 578static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
579 int i;
580
581 if(v){
582 const int a= ABS(v);
583 const int e= av_log2(a);
584#if 1
115329f1 585 const int el= FFMIN(e, 10);
28869757 586 put_rac(c, state+0, 0);
791e7b83
MN
587
588 for(i=0; i<el; i++){
28869757 589 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
590 }
591 for(; i<e; i++){
28869757 592 put_rac(c, state+1+9, 1); //1..10
791e7b83 593 }
28869757 594 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
595
596 for(i=e-1; i>=el; i--){
28869757 597 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
598 }
599 for(; i>=0; i--){
28869757 600 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
601 }
602
603 if(is_signed)
28869757 604 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 605#else
115329f1 606
28869757 607 put_rac(c, state+0, 0);
791e7b83
MN
608 if(e<=9){
609 for(i=0; i<e; i++){
28869757 610 put_rac(c, state+1+i, 1); //1..10
791e7b83 611 }
28869757 612 put_rac(c, state+1+i, 0);
791e7b83
MN
613
614 for(i=e-1; i>=0; i--){
28869757 615 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
616 }
617
618 if(is_signed)
28869757 619 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
620 }else{
621 for(i=0; i<e; i++){
28869757 622 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 623 }
28869757 624 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
625
626 for(i=e-1; i>=0; i--){
28869757 627 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
628 }
629
630 if(is_signed)
28869757 631 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83
MN
632 }
633#endif
634 }else{
28869757 635 put_rac(c, state+0, 1);
791e7b83
MN
636 }
637}
638
28869757
MN
639static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
640 if(get_rac(c, state+0))
791e7b83
MN
641 return 0;
642 else{
7c2425d2
LM
643 int i, e, a;
644 e= 0;
28869757 645 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 646 e++;
791e7b83 647 }
7c2425d2 648
791e7b83 649 a= 1;
7c2425d2 650 for(i=e-1; i>=0; i--){
28869757 651 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
652 }
653
28869757 654 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
655 return -a;
656 else
657 return a;
658 }
659}
660
28869757 661static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 662 int i;
0635cbfc 663 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
664
665 assert(v>=0);
0635cbfc
MN
666 assert(log2>=-4);
667
668 while(v >= r){
28869757 669 put_rac(c, state+4+log2, 1);
0635cbfc 670 v -= r;
4f4e9633 671 log2++;
0635cbfc 672 if(log2>0) r+=r;
4f4e9633 673 }
28869757 674 put_rac(c, state+4+log2, 0);
115329f1 675
4f4e9633 676 for(i=log2-1; i>=0; i--){
28869757 677 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 678 }
4f4e9633
MN
679}
680
28869757 681static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 682 int i;
0635cbfc 683 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
684 int v=0;
685
0635cbfc
MN
686 assert(log2>=-4);
687
28869757 688 while(get_rac(c, state+4+log2)){
0635cbfc 689 v+= r;
4f4e9633 690 log2++;
0635cbfc 691 if(log2>0) r+=r;
4f4e9633 692 }
115329f1 693
4f4e9633 694 for(i=log2-1; i>=0; i--){
28869757 695 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
696 }
697
698 return v;
699}
700
791e7b83
MN
701static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
702 const int mirror_left= !highpass;
703 const int mirror_right= (width&1) ^ highpass;
704 const int w= (width>>1) - 1 + (highpass & width);
705 int i;
706
707#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
708 if(mirror_left){
709 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
710 dst += dst_step;
711 src += src_step;
712 }
115329f1 713
791e7b83
MN
714 for(i=0; i<w; i++){
715 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
716 }
115329f1 717
791e7b83
MN
718 if(mirror_right){
719 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
720 }
721}
722
059715a4 723#ifndef lift5
791e7b83
MN
724static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
725 const int mirror_left= !highpass;
726 const int mirror_right= (width&1) ^ highpass;
727 const int w= (width>>1) - 1 + (highpass & width);
728 int i;
729
730 if(mirror_left){
731 int r= 3*2*ref[0];
732 r += r>>4;
733 r += r>>8;
734 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
735 dst += dst_step;
736 src += src_step;
737 }
115329f1 738
791e7b83
MN
739 for(i=0; i<w; i++){
740 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
741 r += r>>4;
742 r += r>>8;
743 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
744 }
115329f1 745
791e7b83
MN
746 if(mirror_right){
747 int r= 3*2*ref[w*ref_step];
748 r += r>>4;
749 r += r>>8;
750 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
751 }
752}
059715a4 753#endif
791e7b83 754
059715a4 755#ifndef liftS
f5a71928
MN
756static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
757 const int mirror_left= !highpass;
758 const int mirror_right= (width&1) ^ highpass;
759 const int w= (width>>1) - 1 + (highpass & width);
760 int i;
761
762 assert(shift == 4);
763#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
764 if(mirror_left){
765 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
766 dst += dst_step;
767 src += src_step;
768 }
115329f1 769
f5a71928
MN
770 for(i=0; i<w; i++){
771 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
772 }
115329f1 773
f5a71928
MN
774 if(mirror_right){
775 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
776 }
777}
059715a4 778#endif
f5a71928 779
791e7b83 780
aa25a462 781static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791e7b83 782 int x, i;
115329f1 783
791e7b83
MN
784 for(x=start; x<width; x+=2){
785 int64_t sum=0;
786
787 for(i=0; i<n; i++){
788 int x2= x + 2*i - n + 1;
789 if (x2< 0) x2= -x2;
790 else if(x2>=width) x2= 2*width-x2-2;
791 sum += coeffs[i]*(int64_t)dst[x2];
792 }
793 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
794 else dst[x] += (sum + (1<<shift)/2)>>shift;
795 }
796}
797
aa25a462 798static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
799 int x, y, i;
800 for(y=start; y<height; y+=2){
801 for(x=0; x<width; x++){
802 int64_t sum=0;
115329f1 803
791e7b83
MN
804 for(i=0; i<n; i++){
805 int y2= y + 2*i - n + 1;
806 if (y2< 0) y2= -y2;
807 else if(y2>=height) y2= 2*height-y2-2;
808 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
809 }
810 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
811 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
812 }
813 }
814}
815
816#define SCALEX 1
817#define LX0 0
818#define LX1 1
819
de890c9b 820#if 0 // more accurate 9/7
791e7b83
MN
821#define N1 2
822#define SHIFT1 14
823#define COEFFS1 (int[]){-25987,-25987}
824#define N2 2
825#define SHIFT2 19
826#define COEFFS2 (int[]){-27777,-27777}
827#define N3 2
828#define SHIFT3 15
829#define COEFFS3 (int[]){28931,28931}
830#define N4 2
831#define SHIFT4 15
832#define COEFFS4 (int[]){14533,14533}
833#elif 1 // 13/7 CRF
834#define N1 4
835#define SHIFT1 4
836#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 837#define N2 4
791e7b83
MN
838#define SHIFT2 4
839#define COEFFS2 (int[]){-1,5,5,-1}
840#define N3 0
841#define SHIFT3 1
842#define COEFFS3 NULL
843#define N4 0
844#define SHIFT4 1
845#define COEFFS4 NULL
846#elif 1 // 3/5
847#define LX0 1
848#define LX1 0
849#define SCALEX 0.5
850#define N1 2
851#define SHIFT1 1
852#define COEFFS1 (int[]){1,1}
853#define N2 2
854#define SHIFT2 2
855#define COEFFS2 (int[]){-1,-1}
856#define N3 0
857#define SHIFT3 0
858#define COEFFS3 NULL
859#define N4 0
860#define SHIFT4 0
861#define COEFFS4 NULL
115329f1 862#elif 1 // 11/5
791e7b83
MN
863#define N1 0
864#define SHIFT1 1
865#define COEFFS1 NULL
866#define N2 2
867#define SHIFT2 2
868#define COEFFS2 (int[]){-1,-1}
869#define N3 2
870#define SHIFT3 0
871#define COEFFS3 (int[]){-1,-1}
872#define N4 4
873#define SHIFT4 7
874#define COEFFS4 (int[]){-5,29,29,-5}
875#define SCALEX 4
876#elif 1 // 9/7 CDF
877#define N1 2
878#define SHIFT1 7
879#define COEFFS1 (int[]){-203,-203}
880#define N2 2
881#define SHIFT2 12
882#define COEFFS2 (int[]){-217,-217}
883#define N3 2
884#define SHIFT3 7
885#define COEFFS3 (int[]){113,113}
886#define N4 2
887#define SHIFT4 9
888#define COEFFS4 (int[]){227,227}
889#define SCALEX 1
890#elif 1 // 7/5 CDF
891#define N1 0
892#define SHIFT1 1
893#define COEFFS1 NULL
894#define N2 2
895#define SHIFT2 2
896#define COEFFS2 (int[]){-1,-1}
897#define N3 2
898#define SHIFT3 0
899#define COEFFS3 (int[]){-1,-1}
900#define N4 2
901#define SHIFT4 4
902#define COEFFS4 (int[]){3,3}
903#elif 1 // 9/7 MN
904#define N1 4
905#define SHIFT1 4
906#define COEFFS1 (int[]){1,-9,-9,1}
907#define N2 2
908#define SHIFT2 2
909#define COEFFS2 (int[]){1,1}
910#define N3 0
911#define SHIFT3 1
912#define COEFFS3 NULL
913#define N4 0
914#define SHIFT4 1
915#define COEFFS4 NULL
916#else // 13/7 CRF
917#define N1 4
918#define SHIFT1 4
919#define COEFFS1 (int[]){1,-9,-9,1}
920#define N2 4
921#define SHIFT2 4
922#define COEFFS2 (int[]){-1,5,5,-1}
923#define N3 0
924#define SHIFT3 1
925#define COEFFS3 NULL
926#define N4 0
927#define SHIFT4 1
928#define COEFFS4 NULL
929#endif
aa25a462
RFI
930static void horizontal_decomposeX(DWTELEM *b, int width){
931 DWTELEM temp[width];
791e7b83
MN
932 const int width2= width>>1;
933 const int w2= (width+1)>>1;
62ab0b78 934 int x;
791e7b83
MN
935
936 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
937 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
938 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
939 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
115329f1 940
791e7b83
MN
941 for(x=0; x<width2; x++){
942 temp[x ]= b[2*x ];
943 temp[x+w2]= b[2*x + 1];
944 }
945 if(width&1)
946 temp[x ]= b[2*x ];
947 memcpy(b, temp, width*sizeof(int));
948}
949
aa25a462
RFI
950static void horizontal_composeX(DWTELEM *b, int width){
951 DWTELEM temp[width];
791e7b83 952 const int width2= width>>1;
62ab0b78 953 int x;
791e7b83
MN
954 const int w2= (width+1)>>1;
955
956 memcpy(temp, b, width*sizeof(int));
957 for(x=0; x<width2; x++){
958 b[2*x ]= temp[x ];
959 b[2*x + 1]= temp[x+w2];
960 }
961 if(width&1)
962 b[2*x ]= temp[x ];
963
964 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
965 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
966 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
967 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
968}
969
aa25a462 970static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83 971 int x, y;
115329f1 972
791e7b83
MN
973 for(y=0; y<height; y++){
974 for(x=0; x<width; x++){
975 buffer[y*stride + x] *= SCALEX;
976 }
977 }
978
979 for(y=0; y<height; y++){
980 horizontal_decomposeX(buffer + y*stride, width);
981 }
115329f1 982
791e7b83
MN
983 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
984 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
985 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
115329f1 986 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
791e7b83
MN
987}
988
aa25a462 989static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83 990 int x, y;
115329f1 991
791e7b83
MN
992 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
993 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
994 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
995 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
996
997 for(y=0; y<height; y++){
998 horizontal_composeX(buffer + y*stride, width);
999 }
1000
1001 for(y=0; y<height; y++){
1002 for(x=0; x<width; x++){
1003 buffer[y*stride + x] /= SCALEX;
1004 }
1005 }
1006}
1007
aa25a462
RFI
1008static void horizontal_decompose53i(DWTELEM *b, int width){
1009 DWTELEM temp[width];
791e7b83 1010 const int width2= width>>1;
62ab0b78 1011 int x;
791e7b83
MN
1012 const int w2= (width+1)>>1;
1013
1014 for(x=0; x<width2; x++){
1015 temp[x ]= b[2*x ];
1016 temp[x+w2]= b[2*x + 1];
1017 }
1018 if(width&1)
1019 temp[x ]= b[2*x ];
1020#if 0
62ab0b78
AJ
1021 {
1022 int A1,A2,A3,A4;
791e7b83
MN
1023 A2= temp[1 ];
1024 A4= temp[0 ];
1025 A1= temp[0+width2];
1026 A1 -= (A2 + A4)>>1;
1027 A4 += (A1 + 1)>>1;
1028 b[0+width2] = A1;
1029 b[0 ] = A4;
1030 for(x=1; x+1<width2; x+=2){
1031 A3= temp[x+width2];
1032 A4= temp[x+1 ];
1033 A3 -= (A2 + A4)>>1;
1034 A2 += (A1 + A3 + 2)>>2;
1035 b[x+width2] = A3;
1036 b[x ] = A2;
1037
1038 A1= temp[x+1+width2];
1039 A2= temp[x+2 ];
1040 A1 -= (A2 + A4)>>1;
1041 A4 += (A1 + A3 + 2)>>2;
1042 b[x+1+width2] = A1;
1043 b[x+1 ] = A4;
1044 }
1045 A3= temp[width-1];
1046 A3 -= A2;
1047 A2 += (A1 + A3 + 2)>>2;
1048 b[width -1] = A3;
1049 b[width2-1] = A2;
62ab0b78 1050 }
115329f1 1051#else
791e7b83
MN
1052 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1053 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1054#endif
1055}
1056
aa25a462 1057static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1058 int i;
115329f1 1059
791e7b83
MN
1060 for(i=0; i<width; i++){
1061 b1[i] -= (b0[i] + b2[i])>>1;
1062 }
1063}
1064
aa25a462 1065static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1066 int i;
115329f1 1067
791e7b83
MN
1068 for(i=0; i<width; i++){
1069 b1[i] += (b0[i] + b2[i] + 2)>>2;
1070 }
1071}
1072
aa25a462 1073static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1074 int y;
791e7b83
MN
1075 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1076 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
115329f1 1077
791e7b83
MN
1078 for(y=-2; y<height; y+=2){
1079 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1080 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1081
1082{START_TIMER
13705b69
MN
1083 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1084 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
791e7b83 1085STOP_TIMER("horizontal_decompose53i")}
115329f1 1086
791e7b83 1087{START_TIMER
13705b69
MN
1088 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1089 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
791e7b83 1090STOP_TIMER("vertical_decompose53i*")}
115329f1 1091
791e7b83
MN
1092 b0=b2;
1093 b1=b3;
1094 }
1095}
1096
aa25a462
RFI
1097static void horizontal_decompose97i(DWTELEM *b, int width){
1098 DWTELEM temp[width];
791e7b83
MN
1099 const int w2= (width+1)>>1;
1100
1101 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
f5a71928 1102 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
791e7b83
MN
1103 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1104 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1105}
1106
1107
aa25a462 1108static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1109 int i;
115329f1 1110
791e7b83
MN
1111 for(i=0; i<width; i++){
1112 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1113 }
1114}
1115
aa25a462 1116static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1117 int i;
115329f1 1118
791e7b83
MN
1119 for(i=0; i<width; i++){
1120#ifdef lift5
1121 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1122#else
1123 int r= 3*(b0[i] + b2[i]);
1124 r+= r>>4;
1125 r+= r>>8;
1126 b1[i] += (r+W_CO)>>W_CS;
1127#endif
1128 }
1129}
1130
aa25a462 1131static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1132 int i;
115329f1 1133
791e7b83 1134 for(i=0; i<width; i++){
f5a71928 1135#ifdef liftS
791e7b83 1136 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1137#else
1138 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1139#endif
791e7b83
MN
1140 }
1141}
1142
aa25a462 1143static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1144 int i;
115329f1 1145
791e7b83
MN
1146 for(i=0; i<width; i++){
1147 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1148 }
1149}
1150
aa25a462 1151static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1152 int y;
791e7b83
MN
1153 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1154 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1155 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1156 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
115329f1 1157
791e7b83
MN
1158 for(y=-4; y<height; y+=2){
1159 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1160 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1161
1162{START_TIMER
13705b69
MN
1163 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1164 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
791e7b83
MN
1165if(width>400){
1166STOP_TIMER("horizontal_decompose97i")
1167}}
115329f1 1168
791e7b83 1169{START_TIMER
13705b69
MN
1170 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1171 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1172 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1173 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
791e7b83
MN
1174
1175if(width>400){
1176STOP_TIMER("vertical_decompose97i")
1177}}
115329f1 1178
791e7b83
MN
1179 b0=b2;
1180 b1=b3;
1181 b2=b4;
1182 b3=b5;
1183 }
1184}
1185
aa25a462 1186void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83 1187 int level;
115329f1 1188
46c281e8
MN
1189 for(level=0; level<decomposition_count; level++){
1190 switch(type){
791e7b83
MN
1191 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1192 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1193 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1194 }
1195 }
1196}
1197
aa25a462
RFI
1198static void horizontal_compose53i(DWTELEM *b, int width){
1199 DWTELEM temp[width];
791e7b83
MN
1200 const int width2= width>>1;
1201 const int w2= (width+1)>>1;
62ab0b78 1202 int x;
791e7b83
MN
1203
1204#if 0
62ab0b78 1205 int A1,A2,A3,A4;
791e7b83
MN
1206 A2= temp[1 ];
1207 A4= temp[0 ];
1208 A1= temp[0+width2];
1209 A1 -= (A2 + A4)>>1;
1210 A4 += (A1 + 1)>>1;
1211 b[0+width2] = A1;
1212 b[0 ] = A4;
1213 for(x=1; x+1<width2; x+=2){
1214 A3= temp[x+width2];
1215 A4= temp[x+1 ];
1216 A3 -= (A2 + A4)>>1;
1217 A2 += (A1 + A3 + 2)>>2;
1218 b[x+width2] = A3;
1219 b[x ] = A2;
1220
1221 A1= temp[x+1+width2];
1222 A2= temp[x+2 ];
1223 A1 -= (A2 + A4)>>1;
1224 A4 += (A1 + A3 + 2)>>2;
1225 b[x+1+width2] = A1;
1226 b[x+1 ] = A4;
1227 }
1228 A3= temp[width-1];
1229 A3 -= A2;
1230 A2 += (A1 + A3 + 2)>>2;
1231 b[width -1] = A3;
1232 b[width2-1] = A2;
115329f1 1233#else
791e7b83
MN
1234 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1235 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1236#endif
1237 for(x=0; x<width2; x++){
1238 b[2*x ]= temp[x ];
1239 b[2*x + 1]= temp[x+w2];
1240 }
1241 if(width&1)
1242 b[2*x ]= temp[x ];
1243}
1244
aa25a462 1245static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1246 int i;
115329f1 1247
791e7b83
MN
1248 for(i=0; i<width; i++){
1249 b1[i] += (b0[i] + b2[i])>>1;
1250 }
1251}
1252
aa25a462 1253static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1254 int i;
115329f1 1255
791e7b83
MN
1256 for(i=0; i<width; i++){
1257 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1258 }
1259}
1260
a0d1931c
Y
1261static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1262 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1263 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1264 cs->y = -1;
1265}
1266
f9e6ebf7
LM
1267static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1268 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1269 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1270 cs->y = -1;
1271}
1272
a0d1931c
Y
1273static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1274 int y= cs->y;
115329f1 1275
a0d1931c
Y
1276 DWTELEM *b0= cs->b0;
1277 DWTELEM *b1= cs->b1;
3b6ab26c
MN
1278 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1279 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
a0d1931c
Y
1280
1281{START_TIMER
13705b69
MN
1282 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1283 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
a0d1931c
Y
1284STOP_TIMER("vertical_compose53i*")}
1285
1286{START_TIMER
13705b69
MN
1287 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1288 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
a0d1931c
Y
1289STOP_TIMER("horizontal_compose53i")}
1290
1291 cs->b0 = b2;
1292 cs->b1 = b3;
1293 cs->y += 2;
1294}
1295
f9e6ebf7
LM
1296static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1297 int y= cs->y;
1298 DWTELEM *b0= cs->b0;
1299 DWTELEM *b1= cs->b1;
1300 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1301 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83
MN
1302
1303{START_TIMER
13705b69
MN
1304 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1305 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
791e7b83
MN
1306STOP_TIMER("vertical_compose53i*")}
1307
1308{START_TIMER
13705b69
MN
1309 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1310 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
791e7b83
MN
1311STOP_TIMER("horizontal_compose53i")}
1312
f9e6ebf7
LM
1313 cs->b0 = b2;
1314 cs->b1 = b3;
1315 cs->y += 2;
1316}
1317
1318static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1319 dwt_compose_t cs;
1320 spatial_compose53i_init(&cs, buffer, height, stride);
1321 while(cs.y <= height)
1322 spatial_compose53i_dy(&cs, buffer, width, height, stride);
115329f1
DB
1323}
1324
791e7b83 1325
059715a4 1326void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
aa25a462 1327 DWTELEM temp[width];
791e7b83
MN
1328 const int w2= (width+1)>>1;
1329
1330 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1331 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
f5a71928 1332 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
791e7b83
MN
1333 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1334}
1335
aa25a462 1336static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1337 int i;
115329f1 1338
791e7b83
MN
1339 for(i=0; i<width; i++){
1340 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1341 }
1342}
1343
aa25a462 1344static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1345 int i;
115329f1 1346
791e7b83
MN
1347 for(i=0; i<width; i++){
1348#ifdef lift5
1349 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1350#else
1351 int r= 3*(b0[i] + b2[i]);
1352 r+= r>>4;
1353 r+= r>>8;
1354 b1[i] -= (r+W_CO)>>W_CS;
1355#endif
1356 }
1357}
1358
aa25a462 1359static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1360 int i;
115329f1 1361
791e7b83 1362 for(i=0; i<width; i++){
f5a71928 1363#ifdef liftS
791e7b83 1364 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1365#else
1366 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1367#endif
791e7b83
MN
1368 }
1369}
1370
aa25a462 1371static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1372 int i;
115329f1 1373
791e7b83
MN
1374 for(i=0; i<width; i++){
1375 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1376 }
1377}
1378
059715a4 1379void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
565a45ac 1380 int i;
115329f1 1381
565a45ac 1382 for(i=0; i<width; i++){
62ab0b78 1383#ifndef lift5
565a45ac 1384 int r;
62ab0b78 1385#endif
565a45ac
MN
1386 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1387#ifdef lift5
1388 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1389#else
1390 r= 3*(b2[i] + b4[i]);
1391 r+= r>>4;
1392 r+= r>>8;
1393 b3[i] -= (r+W_CO)>>W_CS;
1394#endif
f5a71928 1395#ifdef liftS
565a45ac 1396 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1397#else
1398 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1399#endif
565a45ac
MN
1400 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1401 }
1402}
1403
a0d1931c
Y
1404static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1405 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1406 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1407 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1408 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1409 cs->y = -3;
1410}
1411
f9e6ebf7
LM
1412static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1413 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1414 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1415 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1416 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1417 cs->y = -3;
1418}
791e7b83 1419
059715a4 1420static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1421 int y = cs->y;
115329f1 1422
a0d1931c
Y
1423 DWTELEM *b0= cs->b0;
1424 DWTELEM *b1= cs->b1;
1425 DWTELEM *b2= cs->b2;
1426 DWTELEM *b3= cs->b3;
3b6ab26c
MN
1427 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1428 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
115329f1 1429
a0d1931c 1430{START_TIMER
565a45ac 1431 if(y>0 && y+4<height){
059715a4 1432 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
565a45ac 1433 }else{
13705b69
MN
1434 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1435 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1436 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1437 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
565a45ac 1438 }
a0d1931c
Y
1439if(width>400){
1440STOP_TIMER("vertical_compose97i")}}
a0d1931c
Y
1441
1442{START_TIMER
059715a4
RE
1443 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1444 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
3b6ab26c 1445if(width>400 && y+0<(unsigned)height){
a0d1931c
Y
1446STOP_TIMER("horizontal_compose97i")}}
1447
1448 cs->b0=b2;
1449 cs->b1=b3;
1450 cs->b2=b4;
1451 cs->b3=b5;
1452 cs->y += 2;
1453}
1454
f9e6ebf7
LM
1455static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1456 int y = cs->y;
1457 DWTELEM *b0= cs->b0;
1458 DWTELEM *b1= cs->b1;
1459 DWTELEM *b2= cs->b2;
1460 DWTELEM *b3= cs->b3;
1461 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1462 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83 1463
791e7b83 1464{START_TIMER
13705b69
MN
1465 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1466 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1467 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1468 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
791e7b83
MN
1469if(width>400){
1470STOP_TIMER("vertical_compose97i")}}
1471
1472{START_TIMER
059715a4
RE
1473 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1474 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
791e7b83
MN
1475if(width>400 && b0 <= b2){
1476STOP_TIMER("horizontal_compose97i")}}
f9e6ebf7
LM
1477
1478 cs->b0=b2;
1479 cs->b1=b3;
1480 cs->b2=b4;
1481 cs->b3=b5;
1482 cs->y += 2;
1483}
1484
1485static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1486 dwt_compose_t cs;
1487 spatial_compose97i_init(&cs, buffer, height, stride);
1488 while(cs.y <= height)
1489 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1490}
1491
ceaf1909 1492static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
a0d1931c
Y
1493 int level;
1494 for(level=decomposition_count-1; level>=0; level--){
1495 switch(type){
1496 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1497 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1498 /* not slicified yet */
1499 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1500 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1501 }
1502 }
1503}
1504
ceaf1909 1505static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1506 int level;
1507 for(level=decomposition_count-1; level>=0; level--){
1508 switch(type){
1509 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1510 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1511 /* not slicified yet */
1512 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1513 }
791e7b83
MN
1514 }
1515}
1516
ceaf1909 1517static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
f9e6ebf7 1518 const int support = type==1 ? 3 : 5;
791e7b83 1519 int level;
f9e6ebf7 1520 if(type==2) return;
791e7b83 1521
46c281e8 1522 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1523 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1524 switch(type){
1525 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1526 break;
1527 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1528 break;
1529 case 2: break;
1530 }
791e7b83
MN
1531 }
1532 }
1533}
1534
059715a4 1535static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
a0d1931c
Y
1536 const int support = type==1 ? 3 : 5;
1537 int level;
1538 if(type==2) return;
1539
1540 for(level=decomposition_count-1; level>=0; level--){
1541 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1542 switch(type){
059715a4 1543 case 0: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
a0d1931c
Y
1544 break;
1545 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1546 break;
1547 case 2: break;
1548 }
1549 }
1550 }
1551}
1552
ceaf1909 1553static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1554 if(type==2){
1555 int level;
1556 for(level=decomposition_count-1; level>=0; level--)
1557 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1558 }else{
1559 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1560 int y;
1561 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1562 for(y=0; y<height; y+=4)
1563 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1564 }
1565}
1566
0ecca7a4 1567static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1568 const int w= b->width;
1569 const int h= b->height;
1570 int x, y;
1571
791e7b83 1572 if(1){
791e7b83 1573 int run=0;
a8d73e56 1574 int runs[w*h];
791e7b83 1575 int run_index=0;
b44985ba 1576 int max_index;
115329f1 1577
791e7b83
MN
1578 for(y=0; y<h; y++){
1579 for(x=0; x<w; x++){
78486403 1580 int v, p=0;
6b2f6646 1581 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1582 v= src[x + y*stride];
791e7b83
MN
1583
1584 if(y){
a8d73e56 1585 t= src[x + (y-1)*stride];
791e7b83 1586 if(x){
a8d73e56 1587 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1588 }
1589 if(x + 1 < w){
a8d73e56 1590 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1591 }
1592 }
1593 if(x){
a8d73e56 1594 l= src[x - 1 + y*stride];
6b2f6646
MN
1595 /*if(x > 1){
1596 if(orientation==1) ll= src[y + (x-2)*stride];
1597 else ll= src[x - 2 + y*stride];
791e7b83
MN
1598 }*/
1599 }
78486403 1600 if(parent){
a8d73e56
MN
1601 int px= x>>1;
1602 int py= y>>1;
115329f1 1603 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1604 p= parent[px + py*2*stride];
1605 }
1606 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1607 if(v){
1608 runs[run_index++]= run;
1609 run=0;
1610 }else{
1611 run++;
1612 }
1613 }
1614 }
1615 }
b44985ba 1616 max_index= run_index;
791e7b83
MN
1617 runs[run_index++]= run;
1618 run_index=0;
1619 run= runs[run_index++];
1620
b44985ba
MN
1621 put_symbol2(&s->c, b->state[30], max_index, 0);
1622 if(run_index <= max_index)
1623 put_symbol2(&s->c, b->state[1], run, 3);
115329f1 1624
791e7b83 1625 for(y=0; y<h; y++){
d06c75a8 1626 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1627 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1628 return -1;
1629 }
791e7b83 1630 for(x=0; x<w; x++){
78486403 1631 int v, p=0;
6b2f6646 1632 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1633 v= src[x + y*stride];
791e7b83
MN
1634
1635 if(y){
a8d73e56 1636 t= src[x + (y-1)*stride];
791e7b83 1637 if(x){
a8d73e56 1638 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1639 }
1640 if(x + 1 < w){
a8d73e56 1641 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1642 }
1643 }
1644 if(x){
a8d73e56 1645 l= src[x - 1 + y*stride];
6b2f6646
MN
1646 /*if(x > 1){
1647 if(orientation==1) ll= src[y + (x-2)*stride];
1648 else ll= src[x - 2 + y*stride];
791e7b83
MN
1649 }*/
1650 }
78486403 1651 if(parent){
a8d73e56
MN
1652 int px= x>>1;
1653 int py= y>>1;
115329f1 1654 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1655 p= parent[px + py*2*stride];
1656 }
1657 if(/*ll|*/l|lt|t|rt|p){
1658 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1659
28869757 1660 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1661 }else{
1662 if(!run){
1663 run= runs[run_index++];
4f4e9633 1664
b44985ba
MN
1665 if(run_index <= max_index)
1666 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1667 assert(v);
1668 }else{
1669 run--;
1670 assert(!v);
1671 }
1672 }
1673 if(v){
78486403 1674 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
538a3841
MN
1675 int l2= 2*ABS(l) + (l<0);
1676 int t2= 2*ABS(t) + (t<0);
6b2f6646 1677
0635cbfc 1678 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
538a3841 1679 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1680 }
1681 }
1682 }
791e7b83 1683 }
0ecca7a4 1684 return 0;
791e7b83
MN
1685}
1686
115329f1 1687static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1688// encode_subband_qtree(s, b, src, parent, stride, orientation);
1689// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1690 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1691// encode_subband_dzr(s, b, src, parent, stride, orientation);
1692}
1693
a0d1931c 1694static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1695 const int w= b->width;
1696 const int h= b->height;
1697 int x,y;
115329f1 1698
791e7b83 1699 if(1){
b44985ba 1700 int run, runs;
cbb1d2b1
MN
1701 x_and_coeff *xc= b->x_coeff;
1702 x_and_coeff *prev_xc= NULL;
1703 x_and_coeff *prev2_xc= xc;
1704 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1705 x_and_coeff *prev_parent_xc= parent_xc;
791e7b83 1706
b44985ba
MN
1707 runs= get_symbol2(&s->c, b->state[30], 0);
1708 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1709 else run= INT_MAX;
1710
791e7b83 1711 for(y=0; y<h; y++){
0cea8a03
MN
1712 int v=0;
1713 int lt=0, t=0, rt=0;
1714
cbb1d2b1
MN
1715 if(y && prev_xc->x == 0){
1716 rt= prev_xc->coeff;
0cea8a03 1717 }
791e7b83 1718 for(x=0; x<w; x++){
0cea8a03
MN
1719 int p=0;
1720 const int l= v;
115329f1 1721
0cea8a03 1722 lt= t; t= rt;
791e7b83 1723
ff765159 1724 if(y){
cbb1d2b1
MN
1725 if(prev_xc->x <= x)
1726 prev_xc++;
1727 if(prev_xc->x == x + 1)
1728 rt= prev_xc->coeff;
ff765159
MN
1729 else
1730 rt=0;
1731 }
cbb1d2b1
MN
1732 if(parent_xc){
1733 if(x>>1 > parent_xc->x){
1734 parent_xc++;
7b49c309 1735 }
cbb1d2b1
MN
1736 if(x>>1 == parent_xc->x){
1737 p= parent_xc->coeff;
ff765159 1738 }
78486403
MN
1739 }
1740 if(/*ll|*/l|lt|t|rt|p){
538a3841 1741 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1742
28869757 1743 v=get_rac(&s->c, &b->state[0][context]);
3c096ac7
MN
1744 if(v){
1745 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1746 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 1747
cbb1d2b1
MN
1748 xc->x=x;
1749 (xc++)->coeff= v;
3c096ac7 1750 }
791e7b83
MN
1751 }else{
1752 if(!run){
b44985ba
MN
1753 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1754 else run= INT_MAX;
3c096ac7
MN
1755 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1756 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
115329f1 1757
cbb1d2b1
MN
1758 xc->x=x;
1759 (xc++)->coeff= v;
791e7b83 1760 }else{
99cd59e5 1761 int max_run;
791e7b83
MN
1762 run--;
1763 v=0;
3c1adccd 1764
cbb1d2b1 1765 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
99cd59e5 1766 else max_run= FFMIN(run, w-x-1);
cbb1d2b1
MN
1767 if(parent_xc)
1768 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
99cd59e5
MN
1769 x+= max_run;
1770 run-= max_run;
791e7b83
MN
1771 }
1772 }
7b49c309 1773 }
cbb1d2b1
MN
1774 (xc++)->x= w+1; //end marker
1775 prev_xc= prev2_xc;
1776 prev2_xc= xc;
115329f1 1777
cbb1d2b1 1778 if(parent_xc){
7b49c309 1779 if(y&1){
cbb1d2b1
MN
1780 while(parent_xc->x != parent->width+1)
1781 parent_xc++;
1782 parent_xc++;
1783 prev_parent_xc= parent_xc;
7b49c309 1784 }else{
cbb1d2b1 1785 parent_xc= prev_parent_xc;
791e7b83
MN
1786 }
1787 }
1788 }
a0d1931c 1789
cbb1d2b1 1790 (xc++)->x= w+1; //end marker
a0d1931c
Y
1791 }
1792}
1793
1794static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1795 const int w= b->width;
62ab0b78 1796 int y;
c97de57c
MN
1797 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1798 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1799 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1800 int new_index = 0;
115329f1 1801
a0d1931c
Y
1802 START_TIMER
1803
1804 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1805 qadd= 0;
1806 qmul= 1<<QEXPSHIFT;
1807 }
1808
1809 /* If we are on the second or later slice, restore our index. */
1810 if (start_y != 0)
1811 new_index = save_state[0];
1812
115329f1 1813
a0d1931c
Y
1814 for(y=start_y; y<h; y++){
1815 int x = 0;
1816 int v;
1817 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1818 memset(line, 0, b->width*sizeof(DWTELEM));
1819 v = b->x_coeff[new_index].coeff;
1820 x = b->x_coeff[new_index++].x;
1821 while(x < w)
1822 {
538a3841
MN
1823 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1824 register int u= -(v&1);
1825 line[x] = (t^u) - u;
1826
a0d1931c
Y
1827 v = b->x_coeff[new_index].coeff;
1828 x = b->x_coeff[new_index++].x;
1829 }
791e7b83 1830 }
a0d1931c
Y
1831 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1832 STOP_TIMER("decode_subband")
1833 }
115329f1 1834
a0d1931c
Y
1835 /* Save our variables for the next slice. */
1836 save_state[0] = new_index;
115329f1 1837
a0d1931c 1838 return;
791e7b83
MN
1839}
1840
1841static void reset_contexts(SnowContext *s){
1842 int plane_index, level, orientation;
1843
19aa028d 1844 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1845 for(level=0; level<s->spatial_decomposition_count; level++){
1846 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1847 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1848 }
1849 }
1850 }
28869757
MN
1851 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1852 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1853}
1854
1855static int alloc_blocks(SnowContext *s){
1856 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1857 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 1858
155ec6ed
MN
1859 s->b_width = w;
1860 s->b_height= h;
115329f1 1861
155ec6ed
MN
1862 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1863 return 0;
1864}
1865
28869757
MN
1866static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1867 uint8_t *bytestream= d->bytestream;
1868 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1869 *d= *s;
28869757
MN
1870 d->bytestream= bytestream;
1871 d->bytestream_start= bytestream_start;
155ec6ed
MN
1872}
1873
1874//near copy & paste from dsputil, FIXME
1875static int pix_sum(uint8_t * pix, int line_size, int w)
1876{
1877 int s, i, j;
1878
1879 s = 0;
1880 for (i = 0; i < w; i++) {
1881 for (j = 0; j < w; j++) {
1882 s += pix[0];
1883 pix ++;
1884 }
1885 pix += line_size - w;
1886 }
1887 return s;
1888}
1889
1890//near copy & paste from dsputil, FIXME
1891static int pix_norm1(uint8_t * pix, int line_size, int w)
1892{
1893 int s, i, j;
1894 uint32_t *sq = squareTbl + 256;
1895
1896 s = 0;
1897 for (i = 0; i < w; i++) {
1898 for (j = 0; j < w; j ++) {
1899 s += sq[pix[0]];
1900 pix ++;
1901 }
1902 pix += line_size - w;
1903 }
1904 return s;
1905}
1906
1907static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1908 const int w= s->b_width << s->block_max_depth;
1909 const int rem_depth= s->block_max_depth - level;
1910 const int index= (x + y*w) << rem_depth;
1911 const int block_w= 1<<rem_depth;
1912 BlockNode block;
1913 int i,j;
115329f1 1914
155ec6ed
MN
1915 block.color[0]= l;
1916 block.color[1]= cb;
1917 block.color[2]= cr;
1918 block.mx= mx;
1919 block.my= my;
1920 block.type= type;
1921 block.level= level;
1922
1923 for(j=0; j<block_w; j++){
1924 for(i=0; i<block_w; i++){
1925 s->block[index + i + j*w]= block;
1926 }
1927 }
1928}
1929
1930static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1931 const int offset[3]= {
1932 y*c-> stride + x,
1933 ((y*c->uvstride + x)>>1),
1934 ((y*c->uvstride + x)>>1),
1935 };
1936 int i;
1937 for(i=0; i<3; i++){
1938 c->src[0][i]= src [i];
1939 c->ref[0][i]= ref [i] + offset[i];
1940 }
1941 assert(!ref_index);
1942}
1943
1944//FIXME copy&paste
1945#define P_LEFT P[1]
1946#define P_TOP P[2]
1947#define P_TOPRIGHT P[3]
1948#define P_MEDIAN P[4]
1949#define P_MV1 P[9]
1950#define FLAG_QPEL 1 //must be 1
1951
1952static int encode_q_branch(SnowContext *s, int level, int x, int y){
1953 uint8_t p_buffer[1024];
1954 uint8_t i_buffer[1024];
1955 uint8_t p_state[sizeof(s->block_state)];
1956 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
1957 RangeCoder pc, ic;
1958 uint8_t *pbbak= s->c.bytestream;
1959 uint8_t *pbbak_start= s->c.bytestream_start;
155ec6ed
MN
1960 int score, score2, iscore, i_len, p_len, block_s, sum;
1961 const int w= s->b_width << s->block_max_depth;
1962 const int h= s->b_height << s->block_max_depth;
1963 const int rem_depth= s->block_max_depth - level;
1964 const int index= (x + y*w) << rem_depth;
1965 const int block_w= 1<<(LOG2_MB_SIZE - level);
155ec6ed
MN
1966 int trx= (x+1)<<rem_depth;
1967 int try= (y+1)<<rem_depth;
1968 BlockNode *left = x ? &s->block[index-1] : &null_block;
1969 BlockNode *top = y ? &s->block[index-w] : &null_block;
1970 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1971 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1972 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1973 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1974 int pl = left->color[0];
1975 int pcb= left->color[1];
1976 int pcr= left->color[2];
1977 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1978 int pmy= mid_pred(left->my, top->my, tr->my);
1979 int mx=0, my=0;
51d6a3cf 1980 int l,cr,cb;
155ec6ed
MN
1981 const int stride= s->current_picture.linesize[0];
1982 const int uvstride= s->current_picture.linesize[1];
51d6a3cf
MN
1983 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1984 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1985 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
155ec6ed
MN
1986 int P[10][2];
1987 int16_t last_mv[3][2];
1988 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1989 const int shift= 1+qpel;
1990 MotionEstContext *c= &s->m.me;
1991 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1992 int my_context= av_log2(2*ABS(left->my - top->my));
1993 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1994
1995 assert(sizeof(s->block_state) >= 256);
1996 if(s->keyframe){
1997 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1998 return 0;
1999 }
2000
155ec6ed
MN
2001// clip predictors / edge ?
2002
2003 P_LEFT[0]= left->mx;
2004 P_LEFT[1]= left->my;
2005 P_TOP [0]= top->mx;
2006 P_TOP [1]= top->my;
2007 P_TOPRIGHT[0]= tr->mx;
2008 P_TOPRIGHT[1]= tr->my;
115329f1 2009
155ec6ed
MN
2010 last_mv[0][0]= s->block[index].mx;
2011 last_mv[0][1]= s->block[index].my;
2012 last_mv[1][0]= right->mx;
2013 last_mv[1][1]= right->my;
2014 last_mv[2][0]= bottom->mx;
2015 last_mv[2][1]= bottom->my;
115329f1 2016
155ec6ed 2017 s->m.mb_stride=2;
115329f1 2018 s->m.mb_x=
155ec6ed
MN
2019 s->m.mb_y= 0;
2020 s->m.me.skip= 0;
2021
2022 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
115329f1 2023
155ec6ed
MN
2024 assert(s->m.me. stride == stride);
2025 assert(s->m.me.uvstride == uvstride);
115329f1 2026
155ec6ed
MN
2027 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2028 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2029 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2030 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
115329f1 2031
ff158dc9
MN
2032 c->xmin = - x*block_w - 16+2;
2033 c->ymin = - y*block_w - 16+2;
2034 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2035 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
2036
2037 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
115329f1 2038 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
155ec6ed
MN
2039 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2040 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2041 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2042 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2043 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2044
2045 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2046 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2047
2048 if (!y) {
2049 c->pred_x= P_LEFT[0];
2050 c->pred_y= P_LEFT[1];
2051 } else {
2052 c->pred_x = P_MEDIAN[0];
2053 c->pred_y = P_MEDIAN[1];
2054 }
2055
115329f1 2056 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
155ec6ed
MN
2057 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2058
2059 assert(mx >= c->xmin);
2060 assert(mx <= c->xmax);
2061 assert(my >= c->ymin);
2062 assert(my <= c->ymax);
115329f1 2063
155ec6ed
MN
2064 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2065 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2066 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
115329f1 2067
155ec6ed
MN
2068 // subpel search
2069 pc= s->c;
28869757
MN
2070 pc.bytestream_start=
2071 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2072 memcpy(p_state, s->block_state, sizeof(s->block_state));
2073
2074 if(level!=s->block_max_depth)
28869757
MN
2075 put_rac(&pc, &p_state[4 + s_context], 1);
2076 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
155ec6ed
MN
2077 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2078 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
28869757
MN
2079 p_len= pc.bytestream - pc.bytestream_start;
2080 score += (s->lambda2*(p_len*8
2081 + (pc.outstanding_count - s->c.outstanding_count)*8
2082 + (-av_log2(pc.range) + av_log2(s->c.range))
2083 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2084
2085 block_s= block_w*block_w;
51d6a3cf 2086 sum = pix_sum(current_data[0], stride, block_w);
155ec6ed 2087 l= (sum + block_s/2)/block_s;
51d6a3cf 2088 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
115329f1 2089
155ec6ed 2090 block_s= block_w*block_w>>2;
51d6a3cf 2091 sum = pix_sum(current_data[1], uvstride, block_w>>1);
155ec6ed
MN
2092 cb= (sum + block_s/2)/block_s;
2093// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
51d6a3cf 2094 sum = pix_sum(current_data[2], uvstride, block_w>>1);
155ec6ed
MN
2095 cr= (sum + block_s/2)/block_s;
2096// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2097
2098 ic= s->c;
28869757
MN
2099 ic.bytestream_start=
2100 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2101 memcpy(i_state, s->block_state, sizeof(s->block_state));
2102 if(level!=s->block_max_depth)
28869757
MN
2103 put_rac(&ic, &i_state[4 + s_context], 1);
2104 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
2105 put_symbol(&ic, &i_state[32], l-pl , 1);
2106 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2107 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757
MN
2108 i_len= ic.bytestream - ic.bytestream_start;
2109 iscore += (s->lambda2*(i_len*8
2110 + (ic.outstanding_count - s->c.outstanding_count)*8
2111 + (-av_log2(ic.range) + av_log2(s->c.range))
2112 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2113
2114// assert(score==256*256*256*64-1);
2115 assert(iscore < 255*255*256 + s->lambda2*10);
2116 assert(iscore >= 0);
2117 assert(l>=0 && l<=255);
2118 assert(pl>=0 && pl<=255);
2119
2120 if(level==0){
2121 int varc= iscore >> 8;
2122 int vard= score >> 8;
2123 if (vard <= 64 || vard < varc)
2124 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2125 else
2126 c->scene_change_score+= s->m.qscale;
2127 }
115329f1 2128
155ec6ed 2129 if(level!=s->block_max_depth){
28869757 2130 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
2131 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2132 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2133 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2134 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2135 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
115329f1 2136
155ec6ed
MN
2137 if(score2 < score && score2 < iscore)
2138 return score2;
2139 }
115329f1 2140
155ec6ed 2141 if(iscore < score){
28869757 2142 memcpy(pbbak, i_buffer, i_len);
155ec6ed 2143 s->c= ic;
28869757
MN
2144 s->c.bytestream_start= pbbak_start;
2145 s->c.bytestream= pbbak + i_len;
155ec6ed
MN
2146 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2147 memcpy(s->block_state, i_state, sizeof(s->block_state));
2148 return iscore;
2149 }else{
28869757 2150 memcpy(pbbak, p_buffer, p_len);
155ec6ed 2151 s->c= pc;
28869757
MN
2152 s->c.bytestream_start= pbbak_start;
2153 s->c.bytestream= pbbak + p_len;
155ec6ed
MN
2154 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2155 memcpy(s->block_state, p_state, sizeof(s->block_state));
2156 return score;
2157 }
2158}
2159
51d6a3cf
MN
2160static always_inline int same_block(BlockNode *a, BlockNode *b){
2161 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2162 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2163 }else{
2164 return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2165 }
2166}
2167
2168static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2169 const int w= s->b_width << s->block_max_depth;
2170 const int rem_depth= s->block_max_depth - level;
2171 const int index= (x + y*w) << rem_depth;
2172 int trx= (x+1)<<rem_depth;
2173 BlockNode *b= &s->block[index];
2174 BlockNode *left = x ? &s->block[index-1] : &null_block;
2175 BlockNode *top = y ? &s->block[index-w] : &null_block;
2176 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2177 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2178 int pl = left->color[0];
2179 int pcb= left->color[1];
2180 int pcr= left->color[2];
2181 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2182 int pmy= mid_pred(left->my, top->my, tr->my);
2183 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2184 int my_context= av_log2(2*ABS(left->my - top->my));
2185 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2186
2187 if(s->keyframe){
2188 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2189 return;
2190 }
2191
2192 if(level!=s->block_max_depth){
2193 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
d6f41eed
MN
2194 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2195 }else{
51d6a3cf
MN
2196 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2197 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2198 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2199 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2200 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2201 return;
51d6a3cf
MN
2202 }
2203 }
2204 if(b->type & BLOCK_INTRA){
2205 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2206 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2207 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2208 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2209 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2210 }else{
2211 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2212 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2213 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2214 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2215 }
2216}
2217
155ec6ed
MN
2218static void decode_q_branch(SnowContext *s, int level, int x, int y){
2219 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2220 const int rem_depth= s->block_max_depth - level;
2221 const int index= (x + y*w) << rem_depth;
155ec6ed 2222 int trx= (x+1)<<rem_depth;
155ec6ed
MN
2223 BlockNode *left = x ? &s->block[index-1] : &null_block;
2224 BlockNode *top = y ? &s->block[index-w] : &null_block;
2225 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2226 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2227 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 2228
155ec6ed
MN
2229 if(s->keyframe){
2230 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2231 return;
2232 }
2233
28869757 2234 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
155ec6ed
MN
2235 int type;
2236 int l = left->color[0];
2237 int cb= left->color[1];
2238 int cr= left->color[2];
2239 int mx= mid_pred(left->mx, top->mx, tr->mx);
2240 int my= mid_pred(left->my, top->my, tr->my);
2241 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2242 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
115329f1 2243
28869757 2244 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2245
2246 if(type){
2247 l += get_symbol(&s->c, &s->block_state[32], 1);
2248 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2249 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2250 }else{
2251 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2252 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2253 }
2254 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2255 }else{
2256 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2257 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2258 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2259 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2260 }
2261}
2262
2263static void encode_blocks(SnowContext *s){
2264 int x, y;
2265 int w= s->b_width;
2266 int h= s->b_height;
2267
51d6a3cf
MN
2268 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2269 iterative_me(s);
2270
155ec6ed 2271 for(y=0; y<h; y++){
d06c75a8 2272 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2273 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2274 return;
2275 }
155ec6ed 2276 for(x=0; x<w; x++){
51d6a3cf
MN
2277 if(s->avctx->me_method == ME_ITER)
2278 encode_q_branch2(s, 0, x, y);
2279 else
2280 encode_q_branch (s, 0, x, y);
155ec6ed
MN
2281 }
2282 }
2283}
2284
2285static void decode_blocks(SnowContext *s){
2286 int x, y;
2287 int w= s->b_width;
2288 int h= s->b_height;
2289
2290 for(y=0; y<h; y++){
2291 for(x=0; x<w; x++){
2292 decode_q_branch(s, 0, x, y);
2293 }
2294 }
791e7b83
MN
2295}
2296
2297static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2298 int x, y;
3924dac4 2299START_TIMER
791e7b83
MN
2300 for(y=0; y < b_h+5; y++){
2301 for(x=0; x < b_w; x++){
3924dac4
MN
2302 int a0= src[x ];
2303 int a1= src[x + 1];
2304 int a2= src[x + 2];
2305 int a3= src[x + 3];
2306 int a4= src[x + 4];
2307 int a5= src[x + 5];
791e7b83
MN
2308// int am= 9*(a1+a2) - (a0+a3);
2309 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2310// int am= 18*(a2+a3) - 2*(a1+a4);
2311// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2312// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2313
2314// if(b_w==16) am= 8*(a1+a2);
2315
8c2515bb
Y
2316 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2317 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
115329f1 2318
8c2515bb
Y
2319 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2320 if(am&(~255)) am= ~(am>>31);
115329f1 2321
8c2515bb 2322 tmp[x] = am;
791e7b83
MN
2323
2324/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2325 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2326 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2327 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2328 }
3924dac4
MN
2329 tmp += stride;
2330 src += stride;
791e7b83 2331 }
3924dac4 2332 tmp -= (b_h+5)*stride;
115329f1 2333
791e7b83
MN
2334 for(y=0; y < b_h; y++){
2335 for(x=0; x < b_w; x++){
3924dac4
MN
2336 int a0= tmp[x + 0*stride];
2337 int a1= tmp[x + 1*stride];
2338 int a2= tmp[x + 2*stride];
2339 int a3= tmp[x + 3*stride];
2340 int a4= tmp[x + 4*stride];
2341 int a5= tmp[x + 5*stride];
791e7b83
MN
2342 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2343// int am= 18*(a2+a3) - 2*(a1+a4);
2344/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2345 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
115329f1 2346
791e7b83
MN
2347// if(b_w==16) am= 8*(a1+a2);
2348
8c2515bb
Y
2349 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2350 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83 2351
8c2515bb 2352 if(am&(~255)) am= ~(am>>31);
115329f1 2353
8c2515bb 2354 dst[x] = am;
791e7b83
MN
2355/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2356 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2357 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2358 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2359 }
3924dac4
MN
2360 dst += stride;
2361 tmp += stride;
791e7b83 2362 }
3924dac4 2363STOP_TIMER("mc_block")
791e7b83
MN
2364}
2365
791e7b83 2366#define mca(dx,dy,b_w)\
d92b5807 2367static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
791e7b83
MN
2368 uint8_t tmp[stride*(b_w+5)];\
2369 assert(h==b_w);\
2370 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2371}
2372
2373mca( 0, 0,16)
2374mca( 8, 0,16)
2375mca( 0, 8,16)
2376mca( 8, 8,16)
d92b5807
MN
2377mca( 0, 0,8)
2378mca( 8, 0,8)
2379mca( 0, 8,8)
2380mca( 8, 8,8)
791e7b83 2381
ff158dc9 2382static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 2383 if(block->type & BLOCK_INTRA){
ff158dc9 2384 int x, y;
2692ceab
MN
2385 const int color = block->color[plane_index];
2386 const int color4= color*0x01010101;
1015631b
LM
2387 if(b_w==32){
2388 for(y=0; y < b_h; y++){
2389 *(uint32_t*)&dst[0 + y*stride]= color4;
2390 *(uint32_t*)&dst[4 + y*stride]= color4;
2391 *(uint32_t*)&dst[8 + y*stride]= color4;
2392 *(uint32_t*)&dst[12+ y*stride]= color4;
2393 *(uint32_t*)&dst[16+ y*stride]= color4;
2394 *(uint32_t*)&dst[20+ y*stride]= color4;
2395 *(uint32_t*)&dst[24+ y*stride]= color4;
2396 *(uint32_t*)&dst[28+ y*stride]= color4;
2397 }
2398 }else if(b_w==16){
2692ceab
MN
2399 for(y=0; y < b_h; y++){
2400 *(uint32_t*)&dst[0 + y*stride]= color4;
2401 *(uint32_t*)&dst[4 + y*stride]= color4;
2402 *(uint32_t*)&dst[8 + y*stride]= color4;
2403 *(uint32_t*)&dst[12+ y*stride]= color4;
2404 }
2405 }else if(b_w==8){
2406 for(y=0; y < b_h; y++){
2407 *(uint32_t*)&dst[0 + y*stride]= color4;
2408 *(uint32_t*)&dst[4 + y*stride]= color4;
2409 }
2410 }else if(b_w==4){
2411 for(y=0; y < b_h; y++){
2412 *(uint32_t*)&dst[0 + y*stride]= color4;
2413 }
2414 }else{
2415 for(y=0; y < b_h; y++){
2416 for(x=0; x < b_w; x++){
2417 dst[x + y*stride]= color;
2418 }
ff158dc9
MN
2419 }
2420 }
2421 }else{
2422 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2423 int mx= block->mx*scale;
2424 int my= block->my*scale;
ec697587
MN
2425 const int dx= mx&15;
2426 const int dy= my&15;
80e44bc3 2427 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
ff158dc9
MN
2428 sx += (mx>>4) - 2;
2429 sy += (my>>4) - 2;
2430 src += sx + sy*stride;
2431 if( (unsigned)sx >= w - b_w - 4
2432 || (unsigned)sy >= h - b_h - 4){
2433 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2434 src= tmp + MB_SIZE;
2435 }
87f20c2f
MN
2436// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2437// assert(!(b_w&(b_w-1)));
2692ceab 2438 assert(b_w>1 && b_h>1);
1015631b 2439 assert(tab_index>=0 && tab_index<4 || b_w==32);
87f20c2f 2440 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
ec697587 2441 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
1015631b
LM
2442 else if(b_w==32){
2443 int y;
2444 for(y=0; y<b_h; y+=16){
2445 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2446 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2447 }
2448 }else if(b_w==b_h)
80e44bc3 2449 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2692ceab 2450 else if(b_w==2*b_h){
80e44bc3
MN
2451 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2452 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2692ceab
MN
2453 }else{
2454 assert(2*b_w==b_h);
80e44bc3
MN
2455 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2456 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2692ceab 2457 }
ff158dc9
MN
2458 }
2459}
2460
059715a4
RE
2461void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2462 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2463 int y, x;
2464 DWTELEM * dst;
2465 for(y=0; y<b_h; y++){
2466 //FIXME ugly missue of obmc_stride
2467 uint8_t *obmc1= obmc + y*obmc_stride;
2468 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2469 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2470 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2471 dst = slice_buffer_get_line(sb, src_y + y);
2472 for(x=0; x<b_w; x++){
2473 int v= obmc1[x] * block[3][x + y*src_stride]
2474 +obmc2[x] * block[2][x + y*src_stride]
2475 +obmc3[x] * block[1][x + y*src_stride]
2476 +obmc4[x] * block[0][x + y*src_stride];
2477
2478 v <<= 8 - LOG2_OBMC_MAX;
2479 if(FRAC_BITS != 8){
2480 v += 1<<(7 - FRAC_BITS);
2481 v >>= 8 - FRAC_BITS;
2482 }
2483 if(add){
2484 v += dst[x + src_x];
2485 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2486 if(v&(~255)) v= ~(v>>31);
2487 dst8[x + y*src_stride] = v;
2488 }else{
2489 dst[x + src_x] -= v;
2490 }
2491 }
2492 }
2493}
2494
ff158dc9 2495//FIXME name clenup (b_w, block_w, b_width stuff)
a0d1931c
Y
2496static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2497 DWTELEM * dst = NULL;
2498 const int b_width = s->b_width << s->block_max_depth;
2499 const int b_height= s->b_height << s->block_max_depth;
2500 const int b_stride= b_width;
2501 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2502 BlockNode *rt= lt+1;
2503 BlockNode *lb= lt+b_stride;
2504 BlockNode *rb= lb+1;
115329f1 2505 uint8_t *block[4];
cc884a35
MN
2506 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2507 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2508 uint8_t *ptmp;
a0d1931c
Y
2509 int x,y;
2510
2511 if(b_x<0){
2512 lt= rt;
2513 lb= rb;
2514 }else if(b_x + 1 >= b_width){
2515 rt= lt;
2516 rb= lb;
2517 }
2518 if(b_y<0){
2519 lt= lb;
2520 rt= rb;
2521 }else if(b_y + 1 >= b_height){
2522 lb= lt;
2523 rb= rt;
2524 }
115329f1 2525
a0d1931c
Y
2526 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2527 obmc -= src_x;
2528 b_w += src_x;
2529 src_x=0;
2530 }else if(src_x + b_w > w){
2531 b_w = w - src_x;
2532 }
2533 if(src_y<0){
2534 obmc -= src_y*obmc_stride;
2535 b_h += src_y;
2536 src_y=0;
2537 }else if(src_y + b_h> h){
2538 b_h = h - src_y;
2539 }
115329f1 2540
a0d1931c
Y
2541 if(b_w<=0 || b_h<=0) return;
2542
cc884a35 2543assert(src_stride > 2*MB_SIZE + 5);
a0d1931c
Y
2544// old_dst += src_x + src_y*dst_stride;
2545 dst8+= src_x + src_y*src_stride;
2546// src += src_x + src_y*src_stride;
2547
cc884a35
MN
2548 ptmp= tmp + 3*tmp_step;
2549 block[0]= ptmp;
2550 ptmp+=tmp_step;
115329f1 2551 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
a0d1931c
Y
2552
2553 if(same_block(lt, rt)){
2554 block[1]= block[0];
2555 }else{
cc884a35
MN
2556 block[1]= ptmp;
2557 ptmp+=tmp_step;
a0d1931c
Y
2558 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2559 }
115329f1 2560
a0d1931c
Y
2561 if(same_block(lt, lb)){
2562 block[2]= block[0];
2563 }else if(same_block(rt, lb)){
2564 block[2]= block[1];
2565 }else{
cc884a35
MN
2566 block[2]= ptmp;
2567 ptmp+=tmp_step;
a0d1931c
Y
2568 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2569 }
2570
2571 if(same_block(lt, rb) ){
2572 block[3]= block[0];
2573 }else if(same_block(rt, rb)){
2574 block[3]= block[1];
2575 }else if(same_block(lb, rb)){
2576 block[3]= block[2];
2577 }else{
cc884a35 2578 block[3]= ptmp;
a0d1931c
Y
2579 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2580 }
2581#if 0
2582 for(y=0; y<b_h; y++){
2583 for(x=0; x<b_w; x++){
2584 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2585 if(add) dst[x + y*dst_stride] += v;
2586 else dst[x + y*dst_stride] -= v;
2587 }
2588 }
2589 for(y=0; y<b_h; y++){
2590 uint8_t *obmc2= obmc + (obmc_stride>>1);
2591 for(x=0; x<b_w; x++){
2592 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2593 if(add) dst[x + y*dst_stride] += v;
2594 else dst[x + y*dst_stride] -= v;
2595 }
2596 }
2597 for(y=0; y<b_h; y++){
2598 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2599 for(x=0; x<b_w; x++){
2600 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2601 if(add) dst[x + y*dst_stride] += v;
2602 else dst[x + y*dst_stride] -= v;
2603 }
2604 }
2605 for(y=0; y<b_h; y++){
2606 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2607 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2608 for(x=0; x<b_w; x++){
2609 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2610 if(add) dst[x + y*dst_stride] += v;
2611 else dst[x + y*dst_stride] -= v;
2612 }
2613 }
2614#else
2615{
2616
2617 START_TIMER
115329f1 2618
059715a4 2619 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
a0d1931c
Y
2620 STOP_TIMER("Inner add y block")
2621}
2622#endif
2623}
2624
2625//FIXME name clenup (b_w, block_w, b_width stuff)
1015631b 2626static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
ff158dc9
MN
2627 const int b_width = s->b_width << s->block_max_depth;
2628 const int b_height= s->b_height << s->block_max_depth;
2629 const int b_stride= b_width;
2630 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2631 BlockNode *rt= lt+1;
2632 BlockNode *lb= lt+b_stride;
2633 BlockNode *rb= lb+1;
115329f1 2634 uint8_t *block[4];
cc884a35
MN
2635 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2636 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2637 uint8_t *ptmp;
791e7b83
MN
2638 int x,y;
2639
ff158dc9
MN
2640 if(b_x<0){
2641 lt= rt;
2642 lb= rb;
2643 }else if(b_x + 1 >= b_width){
2644 rt= lt;
2645 rb= lb;
791e7b83 2646 }
ff158dc9
MN
2647 if(b_y<0){
2648 lt= lb;
2649 rt= rb;
2650 }else if(b_y + 1 >= b_height){
2651 lb= lt;
2652 rb= rt;
2653 }
115329f1 2654
ff158dc9
MN
2655 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2656 obmc -= src_x;
2657 b_w += src_x;
1015631b
LM
2658 if(!offset_dst)
2659 dst -= src_x;
ff158dc9
MN
2660 src_x=0;
2661 }else if(src_x + b_w > w){
2662 b_w = w - src_x;
2663 }
2664 if(src_y<0){
2665 obmc -= src_y*obmc_stride;
2666 b_h += src_y;
1015631b
LM
2667 if(!offset_dst)
2668 dst -= src_y*dst_stride;
ff158dc9
MN
2669 src_y=0;
2670 }else if(src_y + b_h> h){
2671 b_h = h - src_y;
791e7b83 2672 }
115329f1 2673
ff158dc9 2674 if(b_w<=0 || b_h<=0) return;
155ec6ed 2675
cc884a35 2676assert(src_stride > 2*MB_SIZE + 5);
1015631b
LM
2677 if(offset_dst)
2678 dst += src_x + src_y*dst_stride;
715a97f0 2679 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2680// src += src_x + src_y*src_stride;
2681
cc884a35
MN
2682 ptmp= tmp + 3*tmp_step;
2683 block[0]= ptmp;
2684 ptmp+=tmp_step;
115329f1 2685 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
2686
2687 if(same_block(lt, rt)){
2688 block[1]= block[0];
791e7b83 2689 }else{
cc884a35
MN
2690 block[1]= ptmp;
2691 ptmp+=tmp_step;
ff158dc9
MN
2692 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2693 }
115329f1 2694
ff158dc9
MN
2695 if(same_block(lt, lb)){
2696 block[2]= block[0];
2697 }else if(same_block(rt, lb)){
2698 block[2]= block[1];
2699 }else{
cc884a35
MN
2700 block[2]= ptmp;
2701 ptmp+=tmp_step;
ff158dc9
MN
2702 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2703 }
791e7b83 2704
ff158dc9
MN
2705 if(same_block(lt, rb) ){
2706 block[3]= block[0];
2707 }else if(same_block(rt, rb)){
2708 block[3]= block[1];
2709 }else if(same_block(lb, rb)){
2710 block[3]= block[2];
2711 }else{
cc884a35 2712 block[3]= ptmp;
ff158dc9
MN
2713 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2714 }
2715#if 0
2716 for(y=0; y<b_h; y++){
2717 for(x=0; x<b_w; x++){
2718 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2719 if(add) dst[x + y*dst_stride] += v;
2720 else dst[x + y*dst_stride] -= v;
2721 }
2722 }
2723 for(y=0; y<b_h; y++){
2724 uint8_t *obmc2= obmc + (obmc_stride>>1);
2725 for(x=0; x<b_w; x++){
2726 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2727 if(add) dst[x + y*dst_stride] += v;
2728 else dst[x + y*dst_stride] -= v;
2729 }
2730 }
2731 for(y=0; y<b_h; y++){
2732 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2733 for(x=0; x<b_w; x++){
2734 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2735 if(add) dst[x + y*dst_stride] += v;
2736 else dst[x + y*dst_stride] -= v;
2737 }
2738 }
2739 for(y=0; y<b_h; y++){
2740 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2741 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2742 for(x=0; x<b_w; x++){
2743 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2744 if(add) dst[x + y*dst_stride] += v;
2745 else dst[x + y*dst_stride] -= v;
2746 }
2747 }
2748#else
2749 for(y=0; y<b_h; y++){
2750 //FIXME ugly missue of obmc_stride
2751 uint8_t *obmc1= obmc + y*obmc_stride;
2752 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2753 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2754 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2755 for(x=0; x<b_w; x++){
2756 int v= obmc1[x] * block[3][x + y*src_stride]
2757 +obmc2[x] * block[2][x + y*src_stride]
2758 +obmc3[x] * block[1][x + y*src_stride]
2759 +obmc4[x] * block[0][x + y*src_stride];
115329f1 2760
715a97f0 2761 v <<= 8 - LOG2_OBMC_MAX;
034aff03
MN
2762 if(FRAC_BITS != 8){
2763 v += 1<<(7 - FRAC_BITS);
2764 v >>= 8 - FRAC_BITS;
2765 }
715a97f0
MN
2766 if(add){
2767 v += dst[x + y*dst_stride];
2768 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2769 if(v&(~255)) v= ~(v>>31);
2770 dst8[x + y*src_stride] = v;
2771 }else{
2772 dst[x + y*dst_stride] -= v;
2773 }
791e7b83
MN
2774 }
2775 }
ff158dc9 2776#endif
791e7b83
MN
2777}
2778
a0d1931c
Y
2779static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2780 Plane *p= &s->plane[plane_index];
2781 const int mb_w= s->b_width << s->block_max_depth;
2782 const int mb_h= s->b_height << s->block_max_depth;
2783 int x, y, mb_x;
2784 int block_size = MB_SIZE >> s->block_max_depth;
2785 int block_w = plane_index ? block_size/2 : block_size;
2786 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2787 int obmc_stride= plane_index ? block_size : 2*block_size;
2788 int ref_stride= s->current_picture.linesize[plane_index];
2789 uint8_t *ref = s->last_picture.data[plane_index];
2790 uint8_t *dst8= s->current_picture.data[plane_index];
2791 int w= p->width;
2792 int h= p->height;
2793 START_TIMER
115329f1 2794
a0d1931c
Y
2795 if(s->keyframe || (s->avctx->debug&512)){
2796 if(mb_y==mb_h)
2797 return;
2798
2799 if(add){
86e59cc0 2800 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2801 {
2802// DWTELEM * line = slice_buffer_get_line(sb, y);
2803 DWTELEM * line = sb->line[y];
2804 for(x=0; x<w; x++)
2805 {
2806// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2807 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2808 v >>= FRAC_BITS;
2809 if(v&(~255)) v= ~(v>>31);
2810 dst8[x + y*ref_stride]= v;
2811 }
2812 }
2813 }else{
86e59cc0 2814 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2815 {
2816// DWTELEM * line = slice_buffer_get_line(sb, y);
2817 DWTELEM * line = sb->line[y];
2818 for(x=0; x<w; x++)
2819 {
2820 line[x] -= 128 << FRAC_BITS;
2821// buf[x + y*w]-= 128<<FRAC_BITS;
2822 }
2823 }
2824 }
2825
2826 return;
2827 }
115329f1 2828
a0d1931c
Y
2829 for(mb_x=0; mb_x<=mb_w; mb_x++){
2830 START_TIMER
2831
115329f1 2832 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
a0d1931c
Y
2833 block_w*mb_x - block_w/2,
2834 block_w*mb_y - block_w/2,
2835 block_w, block_w,
2836 w, h,
2837 w, ref_stride, obmc_stride,
2838 mb_x - 1, mb_y - 1,
2839 add, plane_index);
115329f1 2840
a0d1931c
Y
2841 STOP_TIMER("add_yblock")
2842 }
115329f1 2843
a0d1931c
Y
2844 STOP_TIMER("predict_slice")
2845}
2846
f9e6ebf7 2847static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2848 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2849 const int mb_w= s->b_width << s->block_max_depth;
2850 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2851 int x, y, mb_x;
155ec6ed
MN
2852 int block_size = MB_SIZE >> s->block_max_depth;
2853 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2854 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 2855 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2856 int ref_stride= s->current_picture.linesize[plane_index];
791e7b83 2857 uint8_t *ref = s->last_picture.data[plane_index];
715a97f0 2858 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2859 int w= p->width;
2860 int h= p->height;
fff6d4ea 2861 START_TIMER
115329f1 2862
ff158dc9 2863 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2864 if(mb_y==mb_h)
2865 return;
2866
715a97f0 2867 if(add){
86e59cc0 2868 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2869 for(x=0; x<w; x++){
2870 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2871 v >>= FRAC_BITS;
2872 if(v&(~255)) v= ~(v>>31);
2873 dst8[x + y*ref_stride]= v;
2874 }
2875 }
2876 }else{
86e59cc0 2877 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2878 for(x=0; x<w; x++){
2879 buf[x + y*w]-= 128<<FRAC_BITS;
2880 }
ff158dc9 2881 }
791e7b83 2882 }
ff158dc9
MN
2883
2884 return;
791e7b83 2885 }
115329f1 2886
ff158dc9 2887 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2888 START_TIMER
ff158dc9 2889
115329f1 2890 add_yblock(s, buf, dst8, ref, obmc,
ff158dc9 2891 block_w*mb_x - block_w/2,
791e7b83 2892 block_w*mb_y - block_w/2,
ff158dc9 2893 block_w, block_w,
791e7b83 2894 w, h,
ff158dc9
MN
2895 w, ref_stride, obmc_stride,
2896 mb_x - 1, mb_y - 1,
1015631b 2897 add, 1, plane_index);
115329f1 2898
ff158dc9 2899 STOP_TIMER("add_yblock")
791e7b83 2900 }
115329f1 2901
f9e6ebf7
LM
2902 STOP_TIMER("predict_slice")
2903}
2904
2905static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2906 const int mb_h= s->b_height << s->block_max_depth;
2907 int mb_y;
2908 for(mb_y=0; mb_y<=mb_h; mb_y++)
2909 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2910}
2911
51d6a3cf
MN
2912static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2913 int i, x2, y2;
2914 Plane *p= &s->plane[plane_index];
2915 const int block_size = MB_SIZE >> s->block_max_depth;
2916 const int block_w = plane_index ? block_size/2 : block_size;
2917 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2918 const int obmc_stride= plane_index ? block_size : 2*block_size;
2919 const int ref_stride= s->current_picture.linesize[plane_index];
2920 uint8_t *ref= s-> last_picture.data[plane_index];
51d6a3cf 2921 uint8_t *src= s-> input_picture.data[plane_index];
1015631b 2922 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
51d6a3cf
MN
2923 const int b_stride = s->b_width << s->block_max_depth;
2924 const int w= p->width;
2925 const int h= p->height;
2926 int index= mb_x + mb_y*b_stride;
2927 BlockNode *b= &s->block[index];
2928 BlockNode backup= *b;
2929 int ab=0;
2930 int aa=0;
2931
2932 b->type|= BLOCK_INTRA;
2933 b->color[plane_index]= 0;
1015631b 2934 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
51d6a3cf
MN
2935
2936 for(i=0; i<4; i++){
2937 int mb_x2= mb_x + (i &1) - 1;
2938 int mb_y2= mb_y + (i>>1) - 1;
2939 int x= block_w*mb_x2 + block_w/2;
2940 int y= block_w*mb_y2 + block_w/2;
2941
4f59b684 2942 add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc,
1015631b 2943 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
51d6a3cf
MN
2944
2945 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2946 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2947 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2948 int obmc_v= obmc[index];
1015631b 2949 int d;
51d6a3cf
MN
2950 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2951 if(x<0) obmc_v += obmc[index + block_w];
2952 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2953 if(x+block_w>w) obmc_v += obmc[index - block_w];
2954 //FIXME precalc this or simplify it somehow else
2955
1015631b
LM
2956 d = -dst[index] + (1<<(FRAC_BITS-1));
2957 dst[index] = d;
2958 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
51d6a3cf
MN
2959 aa += obmc_v * obmc_v; //FIXME precalclate this
2960 }
2961 }
2962 }
2963 *b= backup;
2964
2965 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
2966}
2967
b104969f
LM
2968static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2969 const int b_stride = s->b_width << s->block_max_depth;
2970 const int b_height = s->b_height<< s->block_max_depth;
2971 int index= x + y*b_stride;
2972 BlockNode *b = &s->block[index];
2973 BlockNode *left = x ? &s->block[index-1] : &null_block;
2974 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2975 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2976 BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2977 int dmx, dmy;
2978// int mx_context= av_log2(2*ABS(left->mx - top->mx));
2979// int my_context= av_log2(2*ABS(left->my - top->my));
2980
2981 if(x<0 || x>=b_stride || y>=b_height)
2982 return 0;
2983 dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
2984 dmy= b->my - mid_pred(left->my, top->my, tr->my);
2985/*
29861 0 0
298701X 1-2 1
2988001XX 3-6 2-3
29890001XXX 7-14 4-7
299000001XXXX 15-30 8-15
2991*/
2992//FIXME try accurate rate
2993//FIXME intra and inter predictors if surrounding blocks arent the same type
2994 if(b->type & BLOCK_INTRA){
2995 return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
2996 + av_log2(2*ABS(left->color[1] - b->color[1]))
2997 + av_log2(2*ABS(left->color[2] - b->color[2])));
2998 }else
2999 return 2*(1 + av_log2(2*ABS(dmx))
3000 + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3001}
3002
1015631b 3003static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
51d6a3cf
MN
3004 Plane *p= &s->plane[plane_index];
3005 const int block_size = MB_SIZE >> s->block_max_depth;
3006 const int block_w = plane_index ? block_size/2 : block_size;
3007 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3008 const int obmc_stride= plane_index ? block_size : 2*block_size;
3009 const int ref_stride= s->current_picture.linesize[plane_index];
3010 uint8_t *ref= s-> last_picture.data[plane_index];
3011 uint8_t *dst= s->current_picture.data[plane_index];
1015631b
LM
3012 uint8_t *src= s-> input_picture.data[plane_index];
3013 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3014 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3015 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
51d6a3cf
MN
3016 const int b_stride = s->b_width << s->block_max_depth;
3017 const int b_height = s->b_height<< s->block_max_depth;
3018 const int w= p->width;
3019 const int h= p->height;
1015631b 3020 int distortion;
51d6a3cf
MN
3021 int rate= 0;
3022 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
1015631b
LM
3023 int sx= block_w*mb_x - block_w/2;
3024 int sy= block_w*mb_y - block_w/2;
3025 const int x0= FFMAX(0,-sx);
3026 const int y0= FFMAX(0,-sy);
3027 const int x1= FFMIN(block_w*2, w-sx);
3028 const int y1= FFMIN(block_w*2, h-sy);
3029 int i,x,y;
3030
3031 pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3032
3033 for(y=y0; y<y1; y++){
3034 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3035 const DWTELEM *pred1 = pred + y*obmc_stride;
3036 uint8_t *cur1 = cur + y*ref_stride;
3037 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3038 for(x=x0; x<x1; x++){
3039 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3040 v = (v + pred1[x]) >> FRAC_BITS;
3041 if(v&(~255)) v= ~(v>>31);
3042 dst1[x] = v;
51d6a3cf 3043 }
1015631b 3044 }
51d6a3cf 3045
1015631b
LM
3046 //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
3047 if(block_w==16){
3048 distortion = 0;
3049 for(i=0; i<4; i++){
3050 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3051 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3052 }
3053 }else{
3054 assert(block_w==8);
3055 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
51d6a3cf
MN
3056 }
3057
3058 if(plane_index==0){
3059 for(i=0; i<4; i++){
3060/* ..RRr
3061 * .RXx.
3062 * rxx..
3063 */
b104969f
LM
3064 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3065 }
48d1b9a1
LM
3066 if(mb_x == b_stride-2)
3067 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
b104969f
LM
3068 }
3069 return distortion + rate*penalty_factor;
3070}
3071
3072static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3073 int i, y2;
3074 Plane *p= &s->plane[plane_index];
3075 const int block_size = MB_SIZE >> s->block_max_depth;
3076 const int block_w = plane_index ? block_size/2 : block_size;
3077 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3078 const int obmc_stride= plane_index ? block_size : 2*block_size;
3079 const int ref_stride= s->current_picture.linesize[plane_index];
3080 uint8_t *ref= s-> last_picture.data[plane_index];
3081 uint8_t *dst= s->current_picture.data[plane_index];
3082 uint8_t *src= s-> input_picture.data[plane_index];
3083 const static DWTELEM zero_dst[4096]; //FIXME
3084 const int b_stride = s->b_width << s->block_max_depth;
3085 const int b_height = s->b_height<< s->block_max_depth;
3086 const int w= p->width;
3087 const int h= p->height;
3088 int distortion= 0;
3089 int rate= 0;
3090 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3091
3092 for(i=0; i<9; i++){
3093 int mb_x2= mb_x + (i%3) - 1;
3094 int mb_y2= mb_y + (i/3) - 1;
3095 int x= block_w*mb_x2 + block_w/2;
3096 int y= block_w*mb_y2 + block_w/2;
3097
4f59b684 3098 add_yblock(s, zero_dst, dst, ref, obmc,
b104969f
LM
3099 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3100
3101 //FIXME find a cleaner/simpler way to skip the outside stuff
3102 for(y2= y; y2<0; y2++)
3103 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3104 for(y2= h; y2<y+block_w; y2++)
3105 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3106 if(x<0){
3107 for(y2= y; y2<y+block_w; y2++)
3108 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
51d6a3cf 3109 }
b104969f
LM
3110 if(x+block_w > w){
3111 for(y2= y; y2<y+block_w; y2++)
3112 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3113 }
3114
3115 assert(block_w== 8 || block_w==16);
3116 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
51d6a3cf
MN
3117 }
3118
b104969f
LM
3119 if(plane_index==0){
3120 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3121 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3122
3123/* ..RRRr
3124 * .RXXx.
3125 * .RXXx.
3126 * rxxx.
3127 */
3128 if(merged)
3129 rate = get_block_bits(s, mb_x, mb_y, 2);
3130 for(i=merged?4:0; i<9; i++){
3131 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3132 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3133 }
3134 }
51d6a3cf
MN
3135 return distortion + rate*penalty_factor;
3136}
3137
1015631b 3138static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
51d6a3cf
MN
3139 const int b_stride= s->b_width << s->block_max_depth;
3140 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3141 BlockNode backup= *block;
3142 int rd, index, value;
3143
3144 assert(mb_x>=0 && mb_y>=0);
735f9f34 3145 assert(mb_x<b_stride);
51d6a3cf
MN
3146
3147 if(intra){
3148 block->color[0] = p[0];
3149 block->color[1] = p[1];
3150 block->color[2] = p[2];
3151 block->type |= BLOCK_INTRA;
3152 }else{
3153 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3154 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3155 if(s->me_cache[index] == value)
3156 return 0;
3157 s->me_cache[index]= value;
3158
3159 block->mx= p[0];
3160 block->my= p[1];
3161 block->type &= ~BLOCK_INTRA;
3162 }
3163
1015631b 3164 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
51d6a3cf
MN
3165
3166//FIXME chroma
3167 if(rd < *best_rd){
3168 *best_rd= rd;
3169 return 1;
3170 }else{
3171 *block= backup;
3172 return 0;
3173 }
3174}
3175
52137f2f 3176/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
fc8c4992 3177static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
52137f2f 3178 int p[2] = {p0, p1};
fc8c4992 3179 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
52137f2f
FR
3180}
3181
b104969f
LM
3182static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int *best_rd){
3183 const int b_stride= s->b_width << s->block_max_depth;
3184 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3185 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3186 int rd, index, value;
3187
3188 assert(mb_x>=0 && mb_y>=0);
3189 assert(mb_x<b_stride);
3190 assert(((mb_x|mb_y)&1) == 0);
3191
3192 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3193 value= s->me_cache_generation + (p0>>10) + (p1<<6);
3194 if(s->me_cache[index] == value)
3195 return 0;
3196 s->me_cache[index]= value;
3197
3198 block->mx= p0;
3199 block->my= p1;
3200 block->type &= ~BLOCK_INTRA;
3201 block[1]= block[b_stride]= block[b_stride+1]= *block;
3202
3203 rd= get_4block_rd(s, mb_x, mb_y, 0);
3204
3205//FIXME chroma
3206 if(rd < *best_rd){
3207 *best_rd= rd;
3208 return 1;
3209 }else{
3210 block[0]= backup[0];
3211 block[1]= backup[1];
3212 block[b_stride]= backup[2];
3213 block[b_stride+1]= backup[3];
3214 return 0;
3215 }
3216}
3217
51d6a3cf
MN
3218static void iterative_me(SnowContext *s){
3219 int pass, mb_x, mb_y;
3220 const int b_width = s->b_width << s->block_max_depth;
3221 const int b_height= s->b_height << s->block_max_depth;
3222 const int b_stride= b_width;
3223 int color[3];
3224
8f8ae495
LM
3225 {
3226 RangeCoder r = s->c;
3227 uint8_t state[sizeof(s->block_state)];
3228 memcpy(state, s->block_state, sizeof(s->block_state));
3229 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3230 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3231 encode_q_branch(s, 0, mb_x, mb_y);
3232 s->c = r;
3233 memcpy(s->block_state, state, sizeof(s->block_state));
3234 }
3235
51d6a3cf
MN
3236 for(pass=0; pass<50; pass++){
3237 int change= 0;
3238
3239 for(mb_y= 0; mb_y<b_height; mb_y++){
3240 for(mb_x= 0; mb_x<b_width; mb_x++){
3241 int dia_change, i, j;
3242 int best_rd= INT_MAX;
3243 BlockNode backup;
3244 const int index= mb_x + mb_y * b_stride;
3245 BlockNode *block= &s->block[index];
c8a596d2
MN
3246 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block;
3247 BlockNode *lb = mb_x ? &s->block[index -1] : &null_block;
3248 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : &null_block;
3249 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : &null_block;
3250 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block;
3251 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block;
3252 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : &null_block;
3253 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : &null_block;
1015631b
LM
3254 const int b_w= (MB_SIZE >> s->block_max_depth);
3255 uint8_t obmc_edged[b_w*2][b_w*2];
51d6a3cf
MN
3256
3257 if(pass && (block->type & BLOCK_OPT))
3258 continue;
3259 block->type |= BLOCK_OPT;
3260
3261 backup= *block;
3262
3263 if(!s->me_cache_generation)
3264 memset(s->me_cache, 0, sizeof(s->me_cache));
3265 s->me_cache_generation += 1<<22;
3266
1015631b
LM
3267 //FIXME precalc
3268 {
3269 int x, y;
3270 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3271 if(mb_x==0)
3272 for(y=0; y<b_w*2; y++)
3273 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3274 if(mb_x==b_stride-1)
3275 for(y=0; y<b_w*2; y++)
3276 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3277 if(mb_y==0){
3278 for(x=0; x<b_w*2; x++)
3279 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3280 for(y=1; y<b_w; y++)
3281 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3282 }
3283 if(mb_y==b_height-1){
3284 for(x=0; x<b_w*2; x++)
3285 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3286 for(y=b_w; y<b_w*2-1; y++)
3287 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3288 }
3289 }
3290
3291 //skip stuff outside the picture
3292 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3293 {
3294 uint8_t *src= s-> input_picture.data[0];
3295 uint8_t *dst= s->current_picture.data[0];
3296 const int stride= s->current_picture.linesize[0];
3297 const int block_w= MB_SIZE >> s->block_max_depth;
3298 const int sx= block_w*mb_x - block_w/2;
3299 const int sy= block_w*mb_y - block_w/2;
3300 const int w= s->plane[0].width;
3301 const int h= s->plane[0].height;
3302 int y;
3303
3304 for(y=sy; y<0; y++)
3305 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3306 for(y=h; y<sy+block_w*2; y++)
3307 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3308 if(sx<0){
3309 for(y=sy; y<sy+block_w*2; y++)
3310 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3311 }
3312 if(sx+block_w*2 > w){
3313 for(y=sy; y<sy+block_w*2; y++)
3314 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3315 }
3316 }
3317
3318 // intra(black) = neighbors' contribution to the current block
3319 for(i=0; i<3; i++)
3320 color[i]= get_dc(s, mb_x, mb_y, i);
3321
51d6a3cf 3322 // get previous score (cant be cached due to OBMC)
48d1b9a1
LM
3323 if(pass > 0 && (block->type&BLOCK_INTRA)){
3324 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3325 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3326 }else
fc8c4992 3327 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
48d1b9a1 3328
fc8c4992
MN
3329 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3330 check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, *obmc_edged, &best_rd);
3331 check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, *obmc_edged, &best_rd);
3332 check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, *obmc_edged, &best_rd);
3333 check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, *obmc_edged, &best_rd);
51d6a3cf
MN
3334
3335 /* fullpel ME */
3336 //FIXME avoid subpel interpol / round to nearest integer
3337 do{
3338 dia_change=0;
3339 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3340 for(j=0; j<i; j++){
fc8c4992
MN
3341 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3342 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3343 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3344 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
51d6a3cf
MN
3345 }
3346 }
3347 }while(dia_change);
3348 /* subpel ME */
3349 do{
3350 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3351 dia_change=0;
3352 for(i=0; i<8; i++)
fc8c4992 3353 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
51d6a3cf
MN
3354 }while(dia_change);
3355 //FIXME or try the standard 2 pass qpel or similar
13705b69 3356#if 1
1015631b 3357 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
51d6a3cf 3358 //FIXME RD style color selection
13705b69 3359#endif
51d6a3cf
MN
3360 if(!same_block(block, &backup)){
3361 if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3362 if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3363 if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3364 if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3365 if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3366 if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3367 if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3368 if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3369 change ++;
3370 }
3371 }
3372 }
3373 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3374 if(!change)
3375 break;
3376 }
b104969f
LM
3377
3378 if(s->block_max_depth == 1){
3379 int change= 0;
3380 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3381 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3382 int dia_change, i, j;
3383 int best_rd, init_rd;
3384 const int index= mb_x + mb_y * b_stride;
3385 BlockNode *b[4];
3386
3387 b[0]= &s->block[index];
3388 b[1]= b[0]+1;
3389 b[2]= b[0]+b_stride;
3390 b[3]= b[2]+1;
3391 if(same_block(b[0], b[1]) &&
3392 same_block(b[0], b[2]) &&
3393 same_block(b[0], b[3]))
3394 continue;
3395
3396 if(!s->me_cache_generation)
3397 memset(s->me_cache, 0, sizeof(s->me_cache));
3398 s->me_cache_generation += 1<<22;
3399
3400 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3401
3402 check_4block_inter(s, mb_x, mb_y,
3403 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3404 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, &best_rd);
3405
3406 for(i=0; i<4; i++)
3407 if(!(b[i]->type&BLOCK_INTRA))
3408 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, &best_rd);
3409
3410 if(init_rd != best_rd)
3411 change++;
3412 }
3413 }
3414 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3415 }
51d6a3cf
MN
3416}
3417
791e7b83
MN
3418static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3419 const int level= b->level;
3420 const int w= b->width;
3421 const int h= b->height;
c97de57c
MN
3422 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3423 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
da66b631 3424 int x,y, thres1, thres2;
62ab0b78 3425// START_TIMER
791e7b83 3426
93fbdb5a 3427 if(s->qlog == LOSSLESS_QLOG) return;
115329f1 3428
791e7b83 3429 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
3430 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3431 thres2= 2*thres1;
115329f1 3432
791e7b83
MN
3433 if(!bias){
3434 for(y=0; y<h; y++){
3435 for(x=0; x<w; x++){
da66b631 3436 int i= src[x + y*stride];
115329f1 3437
da66b631
MN
3438 if((unsigned)(i+thres1) > thres2){
3439 if(i>=0){
3440 i<<= QEXPSHIFT;
3441 i/= qmul; //FIXME optimize
3442 src[x + y*stride]= i;
3443 }else{
3444 i= -i;
3445 i<<= QEXPSHIFT;
3446 i/= qmul; //FIXME optimize
3447 src[x + y*stride]= -i;
3448 }
3449 }else
3450 src[x + y*stride]= 0;
791e7b83
MN
3451 }
3452 }
3453 }else{
3454 for(y=0; y<h; y++){
3455 for(x=0; x<w; x++){
115329f1
DB
3456 int i= src[x + y*stride];
3457
da66b631
MN
3458 if((unsigned)(i+thres1) > thres2){
3459 if(i>=0){
3460 i<<= QEXPSHIFT;
3461 i= (i + bias) / qmul; //FIXME optimize
3462 src[x + y*stride]= i;
3463 }else{
3464 i= -i;
3465 i<<= QEXPSHIFT;
3466 i= (i + bias) / qmul; //FIXME optimize
3467 src[x + y*stride]= -i;
3468 }
3469 }else
3470 src[x + y*stride]= 0;
791e7b83
MN
3471 }
3472 }
3473 }
da66b631
MN
3474 if(level+1 == s->spatial_decomposition_count){
3475// STOP_TIMER("quantize")
3476 }
791e7b83
MN
3477}
3478
66b32bf2 3479static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
a0d1931c 3480 const int w= b->width;
c97de57c
MN
3481 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3482 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
3483 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3484 int x,y;
3485 START_TIMER
115329f1 3486
a0d1931c 3487 if(s->qlog == LOSSLESS_QLOG) return;
115329f1 3488
66b32bf2 3489 for(y=start_y; y<end_y; y++){
a0d1931c
Y
3490// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3491 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3492 for(x=0; x<w; x++){
3493 int i= line[x];
3494 if(i<0){
3495 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3496 }else if(i>0){
3497 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3498 }