whitespace/indentation cosmetics
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
b78e7197
DB
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
791e7b83
MN
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
b78e7197 9 * version 2.1 of the License, or (at your option) any later version.
791e7b83 10 *
b78e7197 11 * FFmpeg is distributed in the hope that it will be useful,
791e7b83
MN
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
b78e7197 17 * License along with FFmpeg; if not, write to the Free Software
5509bffa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
MN
19 */
20
21#include "avcodec.h"
791e7b83 22#include "dsputil.h"
059715a4 23#include "snow.h"
28869757
MN
24
25#include "rangecoder.h"
791e7b83
MN
26
27#include "mpegvideo.h"
28
29#undef NDEBUG
30#include <assert.h>
31
791e7b83
MN
32static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49};
50static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67};
538a3841
MN
68static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85};
791e7b83
MN
86static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103};
104static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121};
122static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139};
140static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157};
158static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
175};
176
791e7b83
MN
177#if 0 //64*cubic
178static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
181 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
182 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
183 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
184 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
185 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
186 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
187 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
189 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
190 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
191 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
192 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
193 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
197 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
198 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
201 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
202 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
203 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
204 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
205 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
206 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
207 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
208 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
209 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211//error:0.000022
212};
213static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
216 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
217 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
218 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
219 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
220 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
224 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
225 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
228 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
229 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
230//error:0.000033
231};
232#elif 1 // 64*linear
233static const uint8_t obmc32[1024]={
561a18d3
RE
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
791e7b83
MN
266 //error:0.000020
267};
268static const uint8_t obmc16[256]={
561a18d3
RE
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
791e7b83
MN
285//error:0.000015
286};
287#else //64*cos
288static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
292 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
293 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
294 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
295 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
296 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
297 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
299 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
300 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
301 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
302 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
303 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
307 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
308 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
311 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
312 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
313 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
314 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
315 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
316 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
317 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
318 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321//error:0.000022
322};
323static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
326 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
327 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
328 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
329 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
330 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
334 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
335 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
338 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
339 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
340//error:0.000022
341};
342#endif
343
155ec6ed
MN
344//linear *64
345static const uint8_t obmc8[64]={
561a18d3
RE
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
155ec6ed
MN
354//error:0.000000
355};
356
357//linear *64
358static const uint8_t obmc4[16]={
561a18d3
RE
359 16, 48, 48, 16,
360 48,144,144, 48,
361 48,144,144, 48,
362 16, 48, 48, 16,
155ec6ed
MN
363//error:0.000000
364};
365
366static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
368};
369
85fc0e75
LM
370static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371
155ec6ed
MN
372typedef struct BlockNode{
373 int16_t mx;
374 int16_t my;
8c36eaaa 375 uint8_t ref;
155ec6ed
MN
376 uint8_t color[3];
377 uint8_t type;
378//#define TYPE_SPLIT 1
379#define BLOCK_INTRA 1
51d6a3cf 380#define BLOCK_OPT 2
155ec6ed
MN
381//#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
383}BlockNode;
384
51d6a3cf
MN
385static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
387 .mx= 0,
388 .my= 0,
8c36eaaa 389 .ref= 0,
51d6a3cf
MN
390 .type= 0,
391 .level= 0,
392};
393
155ec6ed
MN
394#define LOG2_MB_SIZE 4
395#define MB_SIZE (1<<LOG2_MB_SIZE)
b538791b 396#define ENCODER_EXTRA_BITS 4
155ec6ed 397
a0d1931c
Y
398typedef struct x_and_coeff{
399 int16_t x;
538a3841 400 uint16_t coeff;
a0d1931c
Y
401} x_and_coeff;
402
791e7b83
MN
403typedef struct SubBand{
404 int level;
405 int stride;
406 int width;
407 int height;
408 int qlog; ///< log(qscale)/log[2^(1/6)]
409 DWTELEM *buf;
a0d1931c
Y
410 int buf_x_offset;
411 int buf_y_offset;
412 int stride_line; ///< Stride measured in lines, not pixels.
413 x_and_coeff * x_coeff;
791e7b83
MN
414 struct SubBand *parent;
415 uint8_t state[/*7*2*/ 7 + 512][32];
416}SubBand;
417
418typedef struct Plane{
419 int width;
420 int height;
421 SubBand band[MAX_DECOMPOSITIONS][4];
422}Plane;
423
424typedef struct SnowContext{
eafcac6a 425// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
791e7b83
MN
426
427 AVCodecContext *avctx;
28869757 428 RangeCoder c;
791e7b83 429 DSPContext dsp;
51d6a3cf
MN
430 AVFrame new_picture;
431 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83 432 AVFrame current_picture;
8c36eaaa 433 AVFrame last_picture[MAX_REF_FRAMES];
791e7b83
MN
434 AVFrame mconly_picture;
435// uint8_t q_context[16];
436 uint8_t header_state[32];
155ec6ed 437 uint8_t block_state[128 + 32*128];
791e7b83 438 int keyframe;
19aa028d 439 int always_reset;
791e7b83
MN
440 int version;
441 int spatial_decomposition_type;
396a5e68 442 int last_spatial_decomposition_type;
791e7b83
MN
443 int temporal_decomposition_type;
444 int spatial_decomposition_count;
445 int temporal_decomposition_count;
8c36eaaa
LM
446 int max_ref_frames;
447 int ref_frames;
448 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
449 uint32_t *ref_scores[MAX_REF_FRAMES];
791e7b83 450 DWTELEM *spatial_dwt_buffer;
791e7b83
MN
451 int colorspace_type;
452 int chroma_h_shift;
453 int chroma_v_shift;
454 int spatial_scalability;
455 int qlog;
396a5e68 456 int last_qlog;
155ec6ed
MN
457 int lambda;
458 int lambda2;
4e64bead 459 int pass1_rc;
791e7b83 460 int mv_scale;
396a5e68 461 int last_mv_scale;
791e7b83 462 int qbias;
396a5e68 463 int last_qbias;
791e7b83 464#define QBIAS_SHIFT 3
155ec6ed
MN
465 int b_width;
466 int b_height;
467 int block_max_depth;
396a5e68 468 int last_block_max_depth;
791e7b83 469 Plane plane[MAX_PLANES];
155ec6ed 470 BlockNode *block;
51d6a3cf
MN
471#define ME_CACHE_SIZE 1024
472 int me_cache[ME_CACHE_SIZE];
473 int me_cache_generation;
a0d1931c 474 slice_buffer sb;
155ec6ed 475
eafcac6a 476 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
791e7b83
MN
477}SnowContext;
478
f9e6ebf7
LM
479typedef struct {
480 DWTELEM *b0;
481 DWTELEM *b1;
482 DWTELEM *b2;
483 DWTELEM *b3;
484 int y;
485} dwt_compose_t;
486
a0d1931c
Y
487#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
488//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
489
51d6a3cf
MN
490static void iterative_me(SnowContext *s);
491
a0d1931c
Y
492static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
493{
494 int i;
115329f1 495
a0d1931c
Y
496 buf->base_buffer = base_buffer;
497 buf->line_count = line_count;
498 buf->line_width = line_width;
499 buf->data_count = max_allocated_lines;
500 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
501 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
115329f1 502
a0d1931c
Y
503 for (i = 0; i < max_allocated_lines; i++)
504 {
505 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
506 }
115329f1 507
a0d1931c
Y
508 buf->data_stack_top = max_allocated_lines - 1;
509}
510
511static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
512{
a0d1931c
Y
513 int offset;
514 DWTELEM * buffer;
115329f1
DB
515
516// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
517
a0d1931c
Y
518 assert(buf->data_stack_top >= 0);
519// assert(!buf->line[line]);
520 if (buf->line[line])
521 return buf->line[line];
115329f1 522
a0d1931c
Y
523 offset = buf->line_width * line;
524 buffer = buf->data_stack[buf->data_stack_top];
525 buf->data_stack_top--;
526 buf->line[line] = buffer;
115329f1 527
a0d1931c 528// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
115329f1 529
a0d1931c
Y
530 return buffer;
531}
532
533static void slice_buffer_release(slice_buffer * buf, int line)
534{
a0d1931c
Y
535 int offset;
536 DWTELEM * buffer;
537
538 assert(line >= 0 && line < buf->line_count);
539 assert(buf->line[line]);
540
541 offset = buf->line_width * line;
542 buffer = buf->line[line];
543 buf->data_stack_top++;
544 buf->data_stack[buf->data_stack_top] = buffer;
545 buf->line[line] = NULL;
115329f1 546
a0d1931c
Y
547// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
548}
549
550static void slice_buffer_flush(slice_buffer * buf)
551{
552 int i;
553 for (i = 0; i < buf->line_count; i++)
554 {
555 if (buf->line[i])
556 {
557// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
558 slice_buffer_release(buf, i);
559 }
560 }
561}
562
563static void slice_buffer_destroy(slice_buffer * buf)
564{
565 int i;
566 slice_buffer_flush(buf);
115329f1 567
a0d1931c
Y
568 for (i = buf->data_count - 1; i >= 0; i--)
569 {
570 assert(buf->data_stack[i]);
e7c8206e 571 av_freep(&buf->data_stack[i]);
a0d1931c
Y
572 }
573 assert(buf->data_stack);
e7c8206e 574 av_freep(&buf->data_stack);
a0d1931c 575 assert(buf->line);
e7c8206e 576 av_freep(&buf->line);
a0d1931c
Y
577}
578
bb270c08 579#ifdef __sgi
2554db9b 580// Avoid a name clash on SGI IRIX
bb270c08 581#undef qexp
2554db9b 582#endif
034aff03 583#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 584static uint8_t qexp[QROOT];
791e7b83
MN
585
586static inline int mirror(int v, int m){
13705b69
MN
587 while((unsigned)v > (unsigned)m){
588 v=-v;
589 if(v<0) v+= 2*m;
590 }
591 return v;
791e7b83
MN
592}
593
28869757 594static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
595 int i;
596
597 if(v){
c26abfa5 598 const int a= FFABS(v);
791e7b83
MN
599 const int e= av_log2(a);
600#if 1
115329f1 601 const int el= FFMIN(e, 10);
28869757 602 put_rac(c, state+0, 0);
791e7b83
MN
603
604 for(i=0; i<el; i++){
28869757 605 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
606 }
607 for(; i<e; i++){
28869757 608 put_rac(c, state+1+9, 1); //1..10
791e7b83 609 }
28869757 610 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
611
612 for(i=e-1; i>=el; i--){
28869757 613 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
614 }
615 for(; i>=0; i--){
28869757 616 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
617 }
618
619 if(is_signed)
28869757 620 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 621#else
115329f1 622
28869757 623 put_rac(c, state+0, 0);
791e7b83
MN
624 if(e<=9){
625 for(i=0; i<e; i++){
28869757 626 put_rac(c, state+1+i, 1); //1..10
791e7b83 627 }
28869757 628 put_rac(c, state+1+i, 0);
791e7b83
MN
629
630 for(i=e-1; i>=0; i--){
28869757 631 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
632 }
633
634 if(is_signed)
28869757 635 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
636 }else{
637 for(i=0; i<e; i++){
28869757 638 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 639 }
28869757 640 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
641
642 for(i=e-1; i>=0; i--){
28869757 643 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
644 }
645
646 if(is_signed)
28869757 647 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83
MN
648 }
649#endif
650 }else{
28869757 651 put_rac(c, state+0, 1);
791e7b83
MN
652 }
653}
654
28869757
MN
655static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
656 if(get_rac(c, state+0))
791e7b83
MN
657 return 0;
658 else{
7c2425d2
LM
659 int i, e, a;
660 e= 0;
28869757 661 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 662 e++;
791e7b83 663 }
7c2425d2 664
791e7b83 665 a= 1;
7c2425d2 666 for(i=e-1; i>=0; i--){
28869757 667 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
668 }
669
28869757 670 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
671 return -a;
672 else
673 return a;
674 }
675}
676
28869757 677static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 678 int i;
0635cbfc 679 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
680
681 assert(v>=0);
0635cbfc
MN
682 assert(log2>=-4);
683
684 while(v >= r){
28869757 685 put_rac(c, state+4+log2, 1);
0635cbfc 686 v -= r;
4f4e9633 687 log2++;
0635cbfc 688 if(log2>0) r+=r;
4f4e9633 689 }
28869757 690 put_rac(c, state+4+log2, 0);
115329f1 691
4f4e9633 692 for(i=log2-1; i>=0; i--){
28869757 693 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 694 }
4f4e9633
MN
695}
696
28869757 697static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 698 int i;
0635cbfc 699 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
700 int v=0;
701
0635cbfc
MN
702 assert(log2>=-4);
703
28869757 704 while(get_rac(c, state+4+log2)){
0635cbfc 705 v+= r;
4f4e9633 706 log2++;
0635cbfc 707 if(log2>0) r+=r;
4f4e9633 708 }
115329f1 709
4f4e9633 710 for(i=log2-1; i>=0; i--){
28869757 711 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
712 }
713
714 return v;
715}
716
849f1035 717static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
791e7b83
MN
718 const int mirror_left= !highpass;
719 const int mirror_right= (width&1) ^ highpass;
720 const int w= (width>>1) - 1 + (highpass & width);
721 int i;
722
723#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
724 if(mirror_left){
725 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
726 dst += dst_step;
727 src += src_step;
728 }
115329f1 729
791e7b83
MN
730 for(i=0; i<w; i++){
731 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
732 }
115329f1 733
791e7b83
MN
734 if(mirror_right){
735 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
736 }
737}
738
059715a4 739#ifndef lift5
849f1035 740static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
791e7b83
MN
741 const int mirror_left= !highpass;
742 const int mirror_right= (width&1) ^ highpass;
743 const int w= (width>>1) - 1 + (highpass & width);
744 int i;
745
746 if(mirror_left){
747 int r= 3*2*ref[0];
748 r += r>>4;
749 r += r>>8;
750 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
751 dst += dst_step;
752 src += src_step;
753 }
115329f1 754
791e7b83
MN
755 for(i=0; i<w; i++){
756 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
757 r += r>>4;
758 r += r>>8;
759 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
760 }
115329f1 761
791e7b83
MN
762 if(mirror_right){
763 int r= 3*2*ref[w*ref_step];
764 r += r>>4;
765 r += r>>8;
766 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
767 }
768}
059715a4 769#endif
791e7b83 770
059715a4 771#ifndef liftS
849f1035 772static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
f5a71928
MN
773 const int mirror_left= !highpass;
774 const int mirror_right= (width&1) ^ highpass;
775 const int w= (width>>1) - 1 + (highpass & width);
776 int i;
777
778 assert(shift == 4);
81717747 779#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
f5a71928
MN
780 if(mirror_left){
781 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
782 dst += dst_step;
783 src += src_step;
784 }
115329f1 785
f5a71928
MN
786 for(i=0; i<w; i++){
787 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
788 }
115329f1 789
f5a71928
MN
790 if(mirror_right){
791 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
792 }
793}
059715a4 794#endif
f5a71928 795
791e7b83 796
aa25a462 797static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791e7b83 798 int x, i;
115329f1 799
791e7b83
MN
800 for(x=start; x<width; x+=2){
801 int64_t sum=0;
802
803 for(i=0; i<n; i++){
804 int x2= x + 2*i - n + 1;
805 if (x2< 0) x2= -x2;
806 else if(x2>=width) x2= 2*width-x2-2;
807 sum += coeffs[i]*(int64_t)dst[x2];
808 }
809 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
810 else dst[x] += (sum + (1<<shift)/2)>>shift;
811 }
812}
813
aa25a462 814static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
815 int x, y, i;
816 for(y=start; y<height; y+=2){
817 for(x=0; x<width; x++){
818 int64_t sum=0;
115329f1 819
791e7b83
MN
820 for(i=0; i<n; i++){
821 int y2= y + 2*i - n + 1;
822 if (y2< 0) y2= -y2;
823 else if(y2>=height) y2= 2*height-y2-2;
824 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
825 }
826 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
827 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
828 }
829 }
830}
831
832#define SCALEX 1
833#define LX0 0
834#define LX1 1
835
de890c9b 836#if 0 // more accurate 9/7
791e7b83
MN
837#define N1 2
838#define SHIFT1 14
839#define COEFFS1 (int[]){-25987,-25987}
840#define N2 2
841#define SHIFT2 19
842#define COEFFS2 (int[]){-27777,-27777}
843#define N3 2
844#define SHIFT3 15
845#define COEFFS3 (int[]){28931,28931}
846#define N4 2
847#define SHIFT4 15
848#define COEFFS4 (int[]){14533,14533}
849#elif 1 // 13/7 CRF
850#define N1 4
851#define SHIFT1 4
852#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 853#define N2 4
791e7b83
MN
854#define SHIFT2 4
855#define COEFFS2 (int[]){-1,5,5,-1}
856#define N3 0
857#define SHIFT3 1
858#define COEFFS3 NULL
859#define N4 0
860#define SHIFT4 1
861#define COEFFS4 NULL
862#elif 1 // 3/5
863#define LX0 1
864#define LX1 0
865#define SCALEX 0.5
866#define N1 2
867#define SHIFT1 1
868#define COEFFS1 (int[]){1,1}
869#define N2 2
870#define SHIFT2 2
871#define COEFFS2 (int[]){-1,-1}
872#define N3 0
873#define SHIFT3 0
874#define COEFFS3 NULL
875#define N4 0
876#define SHIFT4 0
877#define COEFFS4 NULL
115329f1 878#elif 1 // 11/5
791e7b83
MN
879#define N1 0
880#define SHIFT1 1
881#define COEFFS1 NULL
882#define N2 2
883#define SHIFT2 2
884#define COEFFS2 (int[]){-1,-1}
885#define N3 2
886#define SHIFT3 0
887#define COEFFS3 (int[]){-1,-1}
888#define N4 4
889#define SHIFT4 7
890#define COEFFS4 (int[]){-5,29,29,-5}
891#define SCALEX 4
892#elif 1 // 9/7 CDF
893#define N1 2
894#define SHIFT1 7
895#define COEFFS1 (int[]){-203,-203}
896#define N2 2
897#define SHIFT2 12
898#define COEFFS2 (int[]){-217,-217}
899#define N3 2
900#define SHIFT3 7
901#define COEFFS3 (int[]){113,113}
902#define N4 2
903#define SHIFT4 9
904#define COEFFS4 (int[]){227,227}
905#define SCALEX 1
906#elif 1 // 7/5 CDF
907#define N1 0
908#define SHIFT1 1
909#define COEFFS1 NULL
910#define N2 2
911#define SHIFT2 2
912#define COEFFS2 (int[]){-1,-1}
913#define N3 2
914#define SHIFT3 0
915#define COEFFS3 (int[]){-1,-1}
916#define N4 2
917#define SHIFT4 4
918#define COEFFS4 (int[]){3,3}
919#elif 1 // 9/7 MN
920#define N1 4
921#define SHIFT1 4
922#define COEFFS1 (int[]){1,-9,-9,1}
923#define N2 2
924#define SHIFT2 2
925#define COEFFS2 (int[]){1,1}
926#define N3 0
927#define SHIFT3 1
928#define COEFFS3 NULL
929#define N4 0
930#define SHIFT4 1
931#define COEFFS4 NULL
932#else // 13/7 CRF
933#define N1 4
934#define SHIFT1 4
935#define COEFFS1 (int[]){1,-9,-9,1}
936#define N2 4
937#define SHIFT2 4
938#define COEFFS2 (int[]){-1,5,5,-1}
939#define N3 0
940#define SHIFT3 1
941#define COEFFS3 NULL
942#define N4 0
943#define SHIFT4 1
944#define COEFFS4 NULL
945#endif
aa25a462
RFI
946static void horizontal_decomposeX(DWTELEM *b, int width){
947 DWTELEM temp[width];
791e7b83
MN
948 const int width2= width>>1;
949 const int w2= (width+1)>>1;
62ab0b78 950 int x;
791e7b83
MN
951
952 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
953 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
954 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
955 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
115329f1 956
791e7b83
MN
957 for(x=0; x<width2; x++){
958 temp[x ]= b[2*x ];
959 temp[x+w2]= b[2*x + 1];
960 }
961 if(width&1)
962 temp[x ]= b[2*x ];
963 memcpy(b, temp, width*sizeof(int));
964}
965
aa25a462
RFI
966static void horizontal_composeX(DWTELEM *b, int width){
967 DWTELEM temp[width];
791e7b83 968 const int width2= width>>1;
62ab0b78 969 int x;
791e7b83
MN
970 const int w2= (width+1)>>1;
971
972 memcpy(temp, b, width*sizeof(int));
973 for(x=0; x<width2; x++){
974 b[2*x ]= temp[x ];
975 b[2*x + 1]= temp[x+w2];
976 }
977 if(width&1)
978 b[2*x ]= temp[x ];
979
980 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
981 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
982 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
983 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
984}
985
aa25a462 986static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83 987 int x, y;
115329f1 988
791e7b83
MN
989 for(y=0; y<height; y++){
990 for(x=0; x<width; x++){
991 buffer[y*stride + x] *= SCALEX;
992 }
993 }
994
995 for(y=0; y<height; y++){
996 horizontal_decomposeX(buffer + y*stride, width);
997 }
115329f1 998
791e7b83
MN
999 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
1000 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
115329f1 1002 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
791e7b83
MN
1003}
1004
aa25a462 1005static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83 1006 int x, y;
115329f1 1007
791e7b83
MN
1008 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1009 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1011 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1012
1013 for(y=0; y<height; y++){
1014 horizontal_composeX(buffer + y*stride, width);
1015 }
1016
1017 for(y=0; y<height; y++){
1018 for(x=0; x<width; x++){
1019 buffer[y*stride + x] /= SCALEX;
1020 }
1021 }
1022}
1023
aa25a462
RFI
1024static void horizontal_decompose53i(DWTELEM *b, int width){
1025 DWTELEM temp[width];
791e7b83 1026 const int width2= width>>1;
62ab0b78 1027 int x;
791e7b83
MN
1028 const int w2= (width+1)>>1;
1029
1030 for(x=0; x<width2; x++){
1031 temp[x ]= b[2*x ];
1032 temp[x+w2]= b[2*x + 1];
1033 }
1034 if(width&1)
1035 temp[x ]= b[2*x ];
1036#if 0
62ab0b78
AJ
1037 {
1038 int A1,A2,A3,A4;
791e7b83
MN
1039 A2= temp[1 ];
1040 A4= temp[0 ];
1041 A1= temp[0+width2];
1042 A1 -= (A2 + A4)>>1;
1043 A4 += (A1 + 1)>>1;
1044 b[0+width2] = A1;
1045 b[0 ] = A4;
1046 for(x=1; x+1<width2; x+=2){
1047 A3= temp[x+width2];
1048 A4= temp[x+1 ];
1049 A3 -= (A2 + A4)>>1;
1050 A2 += (A1 + A3 + 2)>>2;
1051 b[x+width2] = A3;
1052 b[x ] = A2;
1053
1054 A1= temp[x+1+width2];
1055 A2= temp[x+2 ];
1056 A1 -= (A2 + A4)>>1;
1057 A4 += (A1 + A3 + 2)>>2;
1058 b[x+1+width2] = A1;
1059 b[x+1 ] = A4;
1060 }
1061 A3= temp[width-1];
1062 A3 -= A2;
1063 A2 += (A1 + A3 + 2)>>2;
1064 b[width -1] = A3;
1065 b[width2-1] = A2;
62ab0b78 1066 }
115329f1 1067#else
791e7b83
MN
1068 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1069 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1070#endif
1071}
1072
aa25a462 1073static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1074 int i;
115329f1 1075
791e7b83
MN
1076 for(i=0; i<width; i++){
1077 b1[i] -= (b0[i] + b2[i])>>1;
1078 }
1079}
1080
aa25a462 1081static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1082 int i;
115329f1 1083
791e7b83
MN
1084 for(i=0; i<width; i++){
1085 b1[i] += (b0[i] + b2[i] + 2)>>2;
1086 }
1087}
1088
aa25a462 1089static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1090 int y;
791e7b83
MN
1091 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1092 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
115329f1 1093
791e7b83
MN
1094 for(y=-2; y<height; y+=2){
1095 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1096 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1097
1098{START_TIMER
13705b69
MN
1099 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1100 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
791e7b83 1101STOP_TIMER("horizontal_decompose53i")}
115329f1 1102
791e7b83 1103{START_TIMER
13705b69
MN
1104 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1105 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
791e7b83 1106STOP_TIMER("vertical_decompose53i*")}
115329f1 1107
791e7b83
MN
1108 b0=b2;
1109 b1=b3;
1110 }
1111}
1112
aa25a462
RFI
1113static void horizontal_decompose97i(DWTELEM *b, int width){
1114 DWTELEM temp[width];
791e7b83
MN
1115 const int w2= (width+1)>>1;
1116
ce611a27
MN
1117 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
1118 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
791e7b83
MN
1119 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1120 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1121}
1122
1123
aa25a462 1124static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1125 int i;
115329f1 1126
791e7b83
MN
1127 for(i=0; i<width; i++){
1128 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1129 }
1130}
1131
aa25a462 1132static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1133 int i;
115329f1 1134
791e7b83
MN
1135 for(i=0; i<width; i++){
1136#ifdef lift5
1137 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1138#else
1139 int r= 3*(b0[i] + b2[i]);
1140 r+= r>>4;
1141 r+= r>>8;
1142 b1[i] += (r+W_CO)>>W_CS;
1143#endif
1144 }
1145}
1146
aa25a462 1147static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1148 int i;
115329f1 1149
791e7b83 1150 for(i=0; i<width; i++){
f5a71928 1151#ifdef liftS
791e7b83 1152 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928 1153#else
ce611a27 1154 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
f5a71928 1155#endif
791e7b83
MN
1156 }
1157}
1158
aa25a462 1159static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1160 int i;
115329f1 1161
791e7b83
MN
1162 for(i=0; i<width; i++){
1163 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1164 }
1165}
1166
aa25a462 1167static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1168 int y;
791e7b83
MN
1169 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1170 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1171 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1172 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
115329f1 1173
791e7b83
MN
1174 for(y=-4; y<height; y+=2){
1175 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1176 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1177
1178{START_TIMER
13705b69
MN
1179 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1180 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
791e7b83
MN
1181if(width>400){
1182STOP_TIMER("horizontal_decompose97i")
1183}}
115329f1 1184
791e7b83 1185{START_TIMER
13705b69
MN
1186 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1187 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1188 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1189 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
791e7b83
MN
1190
1191if(width>400){
1192STOP_TIMER("vertical_decompose97i")
1193}}
115329f1 1194
791e7b83
MN
1195 b0=b2;
1196 b1=b3;
1197 b2=b4;
1198 b3=b5;
1199 }
1200}
1201
aa25a462 1202void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83 1203 int level;
115329f1 1204
46c281e8
MN
1205 for(level=0; level<decomposition_count; level++){
1206 switch(type){
d4b287ed
LM
1207 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1208 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1209 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
791e7b83
MN
1210 }
1211 }
1212}
1213
aa25a462
RFI
1214static void horizontal_compose53i(DWTELEM *b, int width){
1215 DWTELEM temp[width];
791e7b83
MN
1216 const int width2= width>>1;
1217 const int w2= (width+1)>>1;
62ab0b78 1218 int x;
791e7b83
MN
1219
1220#if 0
62ab0b78 1221 int A1,A2,A3,A4;
791e7b83
MN
1222 A2= temp[1 ];
1223 A4= temp[0 ];
1224 A1= temp[0+width2];
1225 A1 -= (A2 + A4)>>1;
1226 A4 += (A1 + 1)>>1;
1227 b[0+width2] = A1;
1228 b[0 ] = A4;
1229 for(x=1; x+1<width2; x+=2){
1230 A3= temp[x+width2];
1231 A4= temp[x+1 ];
1232 A3 -= (A2 + A4)>>1;
1233 A2 += (A1 + A3 + 2)>>2;
1234 b[x+width2] = A3;
1235 b[x ] = A2;
1236
1237 A1= temp[x+1+width2];
1238 A2= temp[x+2 ];
1239 A1 -= (A2 + A4)>>1;
1240 A4 += (A1 + A3 + 2)>>2;
1241 b[x+1+width2] = A1;
1242 b[x+1 ] = A4;
1243 }
1244 A3= temp[width-1];
1245 A3 -= A2;
1246 A2 += (A1 + A3 + 2)>>2;
1247 b[width -1] = A3;
1248 b[width2-1] = A2;
115329f1 1249#else
791e7b83
MN
1250 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1251 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1252#endif
1253 for(x=0; x<width2; x++){
1254 b[2*x ]= temp[x ];
1255 b[2*x + 1]= temp[x+w2];
1256 }
1257 if(width&1)
1258 b[2*x ]= temp[x ];
1259}
1260
aa25a462 1261static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1262 int i;
115329f1 1263
791e7b83
MN
1264 for(i=0; i<width; i++){
1265 b1[i] += (b0[i] + b2[i])>>1;
1266 }
1267}
1268
aa25a462 1269static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1270 int i;
115329f1 1271
791e7b83
MN
1272 for(i=0; i<width; i++){
1273 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1274 }
1275}
1276
a0d1931c
Y
1277static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1278 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1279 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1280 cs->y = -1;
1281}
1282
f9e6ebf7
LM
1283static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1284 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1285 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1286 cs->y = -1;
1287}
1288
a0d1931c
Y
1289static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1290 int y= cs->y;
115329f1 1291
a0d1931c
Y
1292 DWTELEM *b0= cs->b0;
1293 DWTELEM *b1= cs->b1;
3b6ab26c
MN
1294 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1295 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
a0d1931c
Y
1296
1297{START_TIMER
13705b69
MN
1298 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1299 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
a0d1931c
Y
1300STOP_TIMER("vertical_compose53i*")}
1301
1302{START_TIMER
13705b69
MN
1303 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1304 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
a0d1931c
Y
1305STOP_TIMER("horizontal_compose53i")}
1306
1307 cs->b0 = b2;
1308 cs->b1 = b3;
1309 cs->y += 2;
1310}
1311
f9e6ebf7
LM
1312static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1313 int y= cs->y;
1314 DWTELEM *b0= cs->b0;
1315 DWTELEM *b1= cs->b1;
1316 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1317 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83
MN
1318
1319{START_TIMER
13705b69
MN
1320 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1321 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
791e7b83
MN
1322STOP_TIMER("vertical_compose53i*")}
1323
1324{START_TIMER
13705b69
MN
1325 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1326 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
791e7b83
MN
1327STOP_TIMER("horizontal_compose53i")}
1328
f9e6ebf7
LM
1329 cs->b0 = b2;
1330 cs->b1 = b3;
1331 cs->y += 2;
1332}
1333
1334static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1335 dwt_compose_t cs;
1336 spatial_compose53i_init(&cs, buffer, height, stride);
1337 while(cs.y <= height)
1338 spatial_compose53i_dy(&cs, buffer, width, height, stride);
115329f1
DB
1339}
1340
791e7b83 1341
059715a4 1342void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
aa25a462 1343 DWTELEM temp[width];
791e7b83
MN
1344 const int w2= (width+1)>>1;
1345
1346 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1347 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
ce611a27
MN
1348 liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1349 lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
791e7b83
MN
1350}
1351
aa25a462 1352static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1353 int i;
115329f1 1354
791e7b83
MN
1355 for(i=0; i<width; i++){
1356 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1357 }
1358}
1359
aa25a462 1360static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1361 int i;
115329f1 1362
791e7b83
MN
1363 for(i=0; i<width; i++){
1364#ifdef lift5
1365 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1366#else
1367 int r= 3*(b0[i] + b2[i]);
1368 r+= r>>4;
1369 r+= r>>8;
1370 b1[i] -= (r+W_CO)>>W_CS;
1371#endif
1372 }
1373}
1374
aa25a462 1375static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1376 int i;
115329f1 1377
791e7b83 1378 for(i=0; i<width; i++){
f5a71928 1379#ifdef liftS
791e7b83 1380 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1381#else
1382 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1383#endif
791e7b83
MN
1384 }
1385}
1386
aa25a462 1387static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1388 int i;
115329f1 1389
791e7b83
MN
1390 for(i=0; i<width; i++){
1391 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1392 }
1393}
1394
059715a4 1395void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
565a45ac 1396 int i;
115329f1 1397
565a45ac 1398 for(i=0; i<width; i++){
62ab0b78 1399#ifndef lift5
565a45ac 1400 int r;
62ab0b78 1401#endif
565a45ac
MN
1402 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1403#ifdef lift5
1404 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1405#else
1406 r= 3*(b2[i] + b4[i]);
1407 r+= r>>4;
1408 r+= r>>8;
1409 b3[i] -= (r+W_CO)>>W_CS;
1410#endif
f5a71928 1411#ifdef liftS
565a45ac 1412 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1413#else
1414 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1415#endif
565a45ac
MN
1416 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1417 }
1418}
1419
a0d1931c
Y
1420static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1421 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1422 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1423 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1424 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1425 cs->y = -3;
1426}
1427
f9e6ebf7
LM
1428static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1429 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1430 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1431 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1432 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1433 cs->y = -3;
1434}
791e7b83 1435
059715a4 1436static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1437 int y = cs->y;
115329f1 1438
a0d1931c
Y
1439 DWTELEM *b0= cs->b0;
1440 DWTELEM *b1= cs->b1;
1441 DWTELEM *b2= cs->b2;
1442 DWTELEM *b3= cs->b3;
3b6ab26c
MN
1443 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1444 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
115329f1 1445
a0d1931c 1446{START_TIMER
565a45ac 1447 if(y>0 && y+4<height){
059715a4 1448 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
565a45ac 1449 }else{
13705b69
MN
1450 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1451 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1452 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1453 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
565a45ac 1454 }
a0d1931c
Y
1455if(width>400){
1456STOP_TIMER("vertical_compose97i")}}
a0d1931c
Y
1457
1458{START_TIMER
059715a4
RE
1459 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1460 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
3b6ab26c 1461if(width>400 && y+0<(unsigned)height){
a0d1931c
Y
1462STOP_TIMER("horizontal_compose97i")}}
1463
1464 cs->b0=b2;
1465 cs->b1=b3;
1466 cs->b2=b4;
1467 cs->b3=b5;
1468 cs->y += 2;
1469}
1470
f9e6ebf7
LM
1471static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1472 int y = cs->y;
1473 DWTELEM *b0= cs->b0;
1474 DWTELEM *b1= cs->b1;
1475 DWTELEM *b2= cs->b2;
1476 DWTELEM *b3= cs->b3;
1477 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1478 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83 1479
791e7b83 1480{START_TIMER
13705b69
MN
1481 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1482 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1483 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1484 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
791e7b83
MN
1485if(width>400){
1486STOP_TIMER("vertical_compose97i")}}
1487
1488{START_TIMER
059715a4
RE
1489 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1490 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
791e7b83
MN
1491if(width>400 && b0 <= b2){
1492STOP_TIMER("horizontal_compose97i")}}
f9e6ebf7
LM
1493
1494 cs->b0=b2;
1495 cs->b1=b3;
1496 cs->b2=b4;
1497 cs->b3=b5;
1498 cs->y += 2;
1499}
1500
1501static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1502 dwt_compose_t cs;
1503 spatial_compose97i_init(&cs, buffer, height, stride);
1504 while(cs.y <= height)
1505 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1506}
1507
ceaf1909 1508static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
a0d1931c
Y
1509 int level;
1510 for(level=decomposition_count-1; level>=0; level--){
1511 switch(type){
d4b287ed
LM
1512 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1513 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
a0d1931c 1514 /* not slicified yet */
d4b287ed 1515 case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
a0d1931c
Y
1516 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1517 }
1518 }
1519}
1520
ceaf1909 1521static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1522 int level;
1523 for(level=decomposition_count-1; level>=0; level--){
1524 switch(type){
d4b287ed
LM
1525 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1526 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
f9e6ebf7 1527 /* not slicified yet */
d4b287ed 1528 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
f9e6ebf7 1529 }
791e7b83
MN
1530 }
1531}
1532
ceaf1909 1533static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
f9e6ebf7 1534 const int support = type==1 ? 3 : 5;
791e7b83 1535 int level;
f9e6ebf7 1536 if(type==2) return;
791e7b83 1537
46c281e8 1538 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1539 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1540 switch(type){
d4b287ed 1541 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
f9e6ebf7 1542 break;
d4b287ed 1543 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
f9e6ebf7 1544 break;
d4b287ed 1545 case DWT_X: break;
f9e6ebf7 1546 }
791e7b83
MN
1547 }
1548 }
1549}
1550
059715a4 1551static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
a0d1931c
Y
1552 const int support = type==1 ? 3 : 5;
1553 int level;
1554 if(type==2) return;
1555
1556 for(level=decomposition_count-1; level>=0; level--){
1557 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1558 switch(type){
d4b287ed 1559 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
a0d1931c 1560 break;
d4b287ed 1561 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
a0d1931c 1562 break;
d4b287ed 1563 case DWT_X: break;
a0d1931c
Y
1564 }
1565 }
1566 }
1567}
1568
ceaf1909 1569static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1570 if(type==2){
1571 int level;
1572 for(level=decomposition_count-1; level>=0; level--)
1573 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1574 }else{
1575 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1576 int y;
1577 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1578 for(y=0; y<height; y+=4)
1579 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1580 }
1581}
1582
0ecca7a4 1583static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1584 const int w= b->width;
1585 const int h= b->height;
1586 int x, y;
1587
791e7b83 1588 if(1){
791e7b83 1589 int run=0;
a8d73e56 1590 int runs[w*h];
791e7b83 1591 int run_index=0;
b44985ba 1592 int max_index;
115329f1 1593
791e7b83
MN
1594 for(y=0; y<h; y++){
1595 for(x=0; x<w; x++){
78486403 1596 int v, p=0;
6b2f6646 1597 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1598 v= src[x + y*stride];
791e7b83
MN
1599
1600 if(y){
a8d73e56 1601 t= src[x + (y-1)*stride];
791e7b83 1602 if(x){
a8d73e56 1603 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1604 }
1605 if(x + 1 < w){
a8d73e56 1606 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1607 }
1608 }
1609 if(x){
a8d73e56 1610 l= src[x - 1 + y*stride];
6b2f6646
MN
1611 /*if(x > 1){
1612 if(orientation==1) ll= src[y + (x-2)*stride];
1613 else ll= src[x - 2 + y*stride];
791e7b83
MN
1614 }*/
1615 }
78486403 1616 if(parent){
a8d73e56
MN
1617 int px= x>>1;
1618 int py= y>>1;
115329f1 1619 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1620 p= parent[px + py*2*stride];
1621 }
1622 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1623 if(v){
1624 runs[run_index++]= run;
1625 run=0;
1626 }else{
1627 run++;
1628 }
1629 }
1630 }
1631 }
b44985ba 1632 max_index= run_index;
791e7b83
MN
1633 runs[run_index++]= run;
1634 run_index=0;
1635 run= runs[run_index++];
1636
b44985ba
MN
1637 put_symbol2(&s->c, b->state[30], max_index, 0);
1638 if(run_index <= max_index)
1639 put_symbol2(&s->c, b->state[1], run, 3);
115329f1 1640
791e7b83 1641 for(y=0; y<h; y++){
d06c75a8 1642 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1643 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1644 return -1;
1645 }
791e7b83 1646 for(x=0; x<w; x++){
78486403 1647 int v, p=0;
6b2f6646 1648 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1649 v= src[x + y*stride];
791e7b83
MN
1650
1651 if(y){
a8d73e56 1652 t= src[x + (y-1)*stride];
791e7b83 1653 if(x){
a8d73e56 1654 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1655 }
1656 if(x + 1 < w){
a8d73e56 1657 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1658 }
1659 }
1660 if(x){
a8d73e56 1661 l= src[x - 1 + y*stride];
6b2f6646
MN
1662 /*if(x > 1){
1663 if(orientation==1) ll= src[y + (x-2)*stride];
1664 else ll= src[x - 2 + y*stride];
791e7b83
MN
1665 }*/
1666 }
78486403 1667 if(parent){
a8d73e56
MN
1668 int px= x>>1;
1669 int py= y>>1;
115329f1 1670 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1671 p= parent[px + py*2*stride];
1672 }
1673 if(/*ll|*/l|lt|t|rt|p){
c26abfa5 1674 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
6b2f6646 1675
28869757 1676 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1677 }else{
1678 if(!run){
1679 run= runs[run_index++];
4f4e9633 1680
b44985ba
MN
1681 if(run_index <= max_index)
1682 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1683 assert(v);
1684 }else{
1685 run--;
1686 assert(!v);
1687 }
1688 }
1689 if(v){
c26abfa5
DB
1690 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1691 int l2= 2*FFABS(l) + (l<0);
1692 int t2= 2*FFABS(t) + (t<0);
6b2f6646 1693
c26abfa5 1694 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
538a3841 1695 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1696 }
1697 }
1698 }
791e7b83 1699 }
0ecca7a4 1700 return 0;
791e7b83
MN
1701}
1702
115329f1 1703static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1704// encode_subband_qtree(s, b, src, parent, stride, orientation);
1705// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1706 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1707// encode_subband_dzr(s, b, src, parent, stride, orientation);
1708}
1709
a0d1931c 1710static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1711 const int w= b->width;
1712 const int h= b->height;
1713 int x,y;
115329f1 1714
791e7b83 1715 if(1){
b44985ba 1716 int run, runs;
cbb1d2b1
MN
1717 x_and_coeff *xc= b->x_coeff;
1718 x_and_coeff *prev_xc= NULL;
1719 x_and_coeff *prev2_xc= xc;
1720 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1721 x_and_coeff *prev_parent_xc= parent_xc;
791e7b83 1722
b44985ba
MN
1723 runs= get_symbol2(&s->c, b->state[30], 0);
1724 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1725 else run= INT_MAX;
1726
791e7b83 1727 for(y=0; y<h; y++){
0cea8a03
MN
1728 int v=0;
1729 int lt=0, t=0, rt=0;
1730
cbb1d2b1
MN
1731 if(y && prev_xc->x == 0){
1732 rt= prev_xc->coeff;
0cea8a03 1733 }
791e7b83 1734 for(x=0; x<w; x++){
0cea8a03
MN
1735 int p=0;
1736 const int l= v;
115329f1 1737
0cea8a03 1738 lt= t; t= rt;
791e7b83 1739
ff765159 1740 if(y){
cbb1d2b1
MN
1741 if(prev_xc->x <= x)
1742 prev_xc++;
1743 if(prev_xc->x == x + 1)
1744 rt= prev_xc->coeff;
ff765159
MN
1745 else
1746 rt=0;
1747 }
cbb1d2b1
MN
1748 if(parent_xc){
1749 if(x>>1 > parent_xc->x){
1750 parent_xc++;
7b49c309 1751 }
cbb1d2b1
MN
1752 if(x>>1 == parent_xc->x){
1753 p= parent_xc->coeff;
ff765159 1754 }
78486403
MN
1755 }
1756 if(/*ll|*/l|lt|t|rt|p){
c26abfa5 1757 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1758
28869757 1759 v=get_rac(&s->c, &b->state[0][context]);
3c096ac7
MN
1760 if(v){
1761 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1762 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 1763
cbb1d2b1
MN
1764 xc->x=x;
1765 (xc++)->coeff= v;
3c096ac7 1766 }
791e7b83
MN
1767 }else{
1768 if(!run){
b44985ba
MN
1769 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1770 else run= INT_MAX;
3c096ac7
MN
1771 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1772 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
115329f1 1773
cbb1d2b1
MN
1774 xc->x=x;
1775 (xc++)->coeff= v;
791e7b83 1776 }else{
99cd59e5 1777 int max_run;
791e7b83
MN
1778 run--;
1779 v=0;
3c1adccd 1780
cbb1d2b1 1781 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
99cd59e5 1782 else max_run= FFMIN(run, w-x-1);
cbb1d2b1
MN
1783 if(parent_xc)
1784 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
99cd59e5
MN
1785 x+= max_run;
1786 run-= max_run;
791e7b83
MN
1787 }
1788 }
7b49c309 1789 }
cbb1d2b1
MN
1790 (xc++)->x= w+1; //end marker
1791 prev_xc= prev2_xc;
1792 prev2_xc= xc;
115329f1 1793
cbb1d2b1 1794 if(parent_xc){
7b49c309 1795 if(y&1){
cbb1d2b1
MN
1796 while(parent_xc->x != parent->width+1)
1797 parent_xc++;
1798 parent_xc++;
1799 prev_parent_xc= parent_xc;
7b49c309 1800 }else{
cbb1d2b1 1801 parent_xc= prev_parent_xc;
791e7b83
MN
1802 }
1803 }
1804 }
a0d1931c 1805
cbb1d2b1 1806 (xc++)->x= w+1; //end marker
a0d1931c
Y
1807 }
1808}
1809
1810static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1811 const int w= b->width;
62ab0b78 1812 int y;
f66e4f5f 1813 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
c97de57c 1814 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1815 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1816 int new_index = 0;
115329f1 1817
a0d1931c
Y
1818 START_TIMER
1819
1820 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1821 qadd= 0;
1822 qmul= 1<<QEXPSHIFT;
1823 }
1824
1825 /* If we are on the second or later slice, restore our index. */
1826 if (start_y != 0)
1827 new_index = save_state[0];
1828
115329f1 1829
a0d1931c
Y
1830 for(y=start_y; y<h; y++){
1831 int x = 0;
1832 int v;
1833 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1834 memset(line, 0, b->width*sizeof(DWTELEM));
1835 v = b->x_coeff[new_index].coeff;
1836 x = b->x_coeff[new_index++].x;
1837 while(x < w)
1838 {
538a3841
MN
1839 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1840 register int u= -(v&1);
1841 line[x] = (t^u) - u;
1842
a0d1931c
Y
1843 v = b->x_coeff[new_index].coeff;
1844 x = b->x_coeff[new_index++].x;
1845 }
791e7b83 1846 }
a0d1931c
Y
1847 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1848 STOP_TIMER("decode_subband")
1849 }
115329f1 1850
a0d1931c
Y
1851 /* Save our variables for the next slice. */
1852 save_state[0] = new_index;
115329f1 1853
a0d1931c 1854 return;
791e7b83
MN
1855}
1856
396a5e68 1857static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
791e7b83
MN
1858 int plane_index, level, orientation;
1859
19aa028d 1860 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1861 for(level=0; level<s->spatial_decomposition_count; level++){
1862 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1863 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1864 }
1865 }
1866 }
28869757
MN
1867 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1868 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1869}
1870
1871static int alloc_blocks(SnowContext *s){
1872 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1873 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 1874
155ec6ed
MN
1875 s->b_width = w;
1876 s->b_height= h;
115329f1 1877
155ec6ed
MN
1878 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1879 return 0;
1880}
1881
28869757
MN
1882static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1883 uint8_t *bytestream= d->bytestream;
1884 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1885 *d= *s;
28869757
MN
1886 d->bytestream= bytestream;
1887 d->bytestream_start= bytestream_start;
155ec6ed
MN
1888}
1889
1890//near copy & paste from dsputil, FIXME
1891static int pix_sum(uint8_t * pix, int line_size, int w)
1892{
1893 int s, i, j;
1894
1895 s = 0;
1896 for (i = 0; i < w; i++) {
1897 for (j = 0; j < w; j++) {
1898 s += pix[0];
1899 pix ++;
1900 }
1901 pix += line_size - w;
1902 }
1903 return s;
1904}
1905
1906//near copy & paste from dsputil, FIXME
1907static int pix_norm1(uint8_t * pix, int line_size, int w)
1908{
1909 int s, i, j;
1d503957 1910 uint32_t *sq = ff_squareTbl + 256;
155ec6ed
MN
1911
1912 s = 0;
1913 for (i = 0; i < w; i++) {
1914 for (j = 0; j < w; j ++) {
1915 s += sq[pix[0]];
1916 pix ++;
1917 }
1918 pix += line_size - w;
1919 }
1920 return s;
1921}
1922
8c36eaaa 1923static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
155ec6ed
MN
1924 const int w= s->b_width << s->block_max_depth;
1925 const int rem_depth= s->block_max_depth - level;
1926 const int index= (x + y*w) << rem_depth;
1927 const int block_w= 1<<rem_depth;
1928 BlockNode block;
1929 int i,j;
115329f1 1930
155ec6ed
MN
1931 block.color[0]= l;
1932 block.color[1]= cb;
1933 block.color[2]= cr;
1934 block.mx= mx;
1935 block.my= my;
8c36eaaa 1936 block.ref= ref;
155ec6ed
MN
1937 block.type= type;
1938 block.level= level;
1939
1940 for(j=0; j<block_w; j++){
1941 for(i=0; i<block_w; i++){
1942 s->block[index + i + j*w]= block;
1943 }
1944 }
1945}
1946
1947static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1948 const int offset[3]= {
1949 y*c-> stride + x,
1950 ((y*c->uvstride + x)>>1),
1951 ((y*c->uvstride + x)>>1),
1952 };
1953 int i;
1954 for(i=0; i<3; i++){
1955 c->src[0][i]= src [i];
1956 c->ref[0][i]= ref [i] + offset[i];
1957 }
1958 assert(!ref_index);
1959}
1960
85fc0e75 1961static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
aadcc5ce 1962 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
85fc0e75
LM
1963 if(s->ref_frames == 1){
1964 *mx = mid_pred(left->mx, top->mx, tr->mx);
1965 *my = mid_pred(left->my, top->my, tr->my);
1966 }else{
1967 const int *scale = scale_mv_ref[ref];
6884c36c
PI
1968 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1969 (top ->mx * scale[top ->ref] + 128) >>8,
1970 (tr ->mx * scale[tr ->ref] + 128) >>8);
1971 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1972 (top ->my * scale[top ->ref] + 128) >>8,
1973 (tr ->my * scale[tr ->ref] + 128) >>8);
85fc0e75
LM
1974 }
1975}
1976
155ec6ed
MN
1977//FIXME copy&paste
1978#define P_LEFT P[1]
1979#define P_TOP P[2]
1980#define P_TOPRIGHT P[3]
1981#define P_MEDIAN P[4]
1982#define P_MV1 P[9]
1983#define FLAG_QPEL 1 //must be 1
1984
1985static int encode_q_branch(SnowContext *s, int level, int x, int y){
1986 uint8_t p_buffer[1024];
1987 uint8_t i_buffer[1024];
1988 uint8_t p_state[sizeof(s->block_state)];
1989 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
1990 RangeCoder pc, ic;
1991 uint8_t *pbbak= s->c.bytestream;
1992 uint8_t *pbbak_start= s->c.bytestream_start;
1e6b5700 1993 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
155ec6ed
MN
1994 const int w= s->b_width << s->block_max_depth;
1995 const int h= s->b_height << s->block_max_depth;
1996 const int rem_depth= s->block_max_depth - level;
1997 const int index= (x + y*w) << rem_depth;
1998 const int block_w= 1<<(LOG2_MB_SIZE - level);
155ec6ed
MN
1999 int trx= (x+1)<<rem_depth;
2000 int try= (y+1)<<rem_depth;
aadcc5ce
PI
2001 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2002 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2003 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2004 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2005 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2006 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed
MN
2007 int pl = left->color[0];
2008 int pcb= left->color[1];
2009 int pcr= left->color[2];
85fc0e75 2010 int pmx, pmy;
155ec6ed 2011 int mx=0, my=0;
51d6a3cf 2012 int l,cr,cb;
155ec6ed
MN
2013 const int stride= s->current_picture.linesize[0];
2014 const int uvstride= s->current_picture.linesize[1];
51d6a3cf
MN
2015 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2016 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2017 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
155ec6ed
MN
2018 int P[10][2];
2019 int16_t last_mv[3][2];
2020 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2021 const int shift= 1+qpel;
2022 MotionEstContext *c= &s->m.me;
8c36eaaa 2023 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
2024 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2025 int my_context= av_log2(2*FFABS(left->my - top->my));
155ec6ed 2026 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
8c36eaaa 2027 int ref, best_ref, ref_score, ref_mx, ref_my;
155ec6ed
MN
2028
2029 assert(sizeof(s->block_state) >= 256);
2030 if(s->keyframe){
85fc0e75 2031 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
155ec6ed
MN
2032 return 0;
2033 }
2034
155ec6ed
MN
2035// clip predictors / edge ?
2036
2037 P_LEFT[0]= left->mx;
2038 P_LEFT[1]= left->my;
2039 P_TOP [0]= top->mx;
2040 P_TOP [1]= top->my;
2041 P_TOPRIGHT[0]= tr->mx;
2042 P_TOPRIGHT[1]= tr->my;
115329f1 2043
155ec6ed
MN
2044 last_mv[0][0]= s->block[index].mx;
2045 last_mv[0][1]= s->block[index].my;
2046 last_mv[1][0]= right->mx;
2047 last_mv[1][1]= right->my;
2048 last_mv[2][0]= bottom->mx;
2049 last_mv[2][1]= bottom->my;
115329f1 2050
155ec6ed 2051 s->m.mb_stride=2;
115329f1 2052 s->m.mb_x=
155ec6ed 2053 s->m.mb_y= 0;
e2158da8 2054 c->skip= 0;
155ec6ed 2055
e2158da8
PI
2056 assert(c-> stride == stride);
2057 assert(c->uvstride == uvstride);
115329f1 2058
155ec6ed
MN
2059 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2060 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2061 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2062 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
115329f1 2063
ff158dc9
MN
2064 c->xmin = - x*block_w - 16+2;
2065 c->ymin = - y*block_w - 16+2;
2066 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2067 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
2068
2069 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
115329f1 2070 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
155ec6ed
MN
2071 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2072 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2073 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2074 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2075 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2076
2077 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2078 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2079
2080 if (!y) {
2081 c->pred_x= P_LEFT[0];
2082 c->pred_y= P_LEFT[1];
2083 } else {
2084 c->pred_x = P_MEDIAN[0];
2085 c->pred_y = P_MEDIAN[1];
2086 }
2087
8c36eaaa
LM
2088 score= INT_MAX;
2089 best_ref= 0;
2090 for(ref=0; ref<s->ref_frames; ref++){
2091 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2092
2093 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2094 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
155ec6ed 2095
8c36eaaa
LM
2096 assert(ref_mx >= c->xmin);
2097 assert(ref_mx <= c->xmax);
2098 assert(ref_my >= c->ymin);
2099 assert(ref_my <= c->ymax);
115329f1 2100
e2158da8 2101 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
8c36eaaa
LM
2102 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2103 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2104 if(s->ref_mvs[ref]){
2105 s->ref_mvs[ref][index][0]= ref_mx;
2106 s->ref_mvs[ref][index][1]= ref_my;
2107 s->ref_scores[ref][index]= ref_score;
2108 }
2109 if(score > ref_score){
2110 score= ref_score;
2111 best_ref= ref;
2112 mx= ref_mx;
2113 my= ref_my;
2114 }
2115 }
755bfeab 2116 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
115329f1 2117
155ec6ed 2118 // subpel search
61d49d12 2119 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
155ec6ed 2120 pc= s->c;
28869757
MN
2121 pc.bytestream_start=
2122 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2123 memcpy(p_state, s->block_state, sizeof(s->block_state));
2124
2125 if(level!=s->block_max_depth)
28869757
MN
2126 put_rac(&pc, &p_state[4 + s_context], 1);
2127 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
8c36eaaa
LM
2128 if(s->ref_frames > 1)
2129 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
85fc0e75 2130 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
8c36eaaa
LM
2131 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2132 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
28869757 2133 p_len= pc.bytestream - pc.bytestream_start;
1e6b5700 2134 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2135
2136 block_s= block_w*block_w;
51d6a3cf 2137 sum = pix_sum(current_data[0], stride, block_w);
155ec6ed 2138 l= (sum + block_s/2)/block_s;
51d6a3cf 2139 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
115329f1 2140
155ec6ed 2141 block_s= block_w*block_w>>2;
51d6a3cf 2142 sum = pix_sum(current_data[1], uvstride, block_w>>1);
155ec6ed
MN
2143 cb= (sum + block_s/2)/block_s;
2144// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
51d6a3cf 2145 sum = pix_sum(current_data[2], uvstride, block_w>>1);
155ec6ed
MN
2146 cr= (sum + block_s/2)/block_s;
2147// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2148
2149 ic= s->c;
28869757
MN
2150 ic.bytestream_start=
2151 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2152 memcpy(i_state, s->block_state, sizeof(s->block_state));
2153 if(level!=s->block_max_depth)
28869757
MN
2154 put_rac(&ic, &i_state[4 + s_context], 1);
2155 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
2156 put_symbol(&ic, &i_state[32], l-pl , 1);
2157 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2158 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757 2159 i_len= ic.bytestream - ic.bytestream_start;
1e6b5700 2160 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2161
2162// assert(score==256*256*256*64-1);
2163 assert(iscore < 255*255*256 + s->lambda2*10);
2164 assert(iscore >= 0);
2165 assert(l>=0 && l<=255);
2166 assert(pl>=0 && pl<=255);
2167
2168 if(level==0){
2169 int varc= iscore >> 8;
2170 int vard= score >> 8;
2171 if (vard <= 64 || vard < varc)
2172 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2173 else
2174 c->scene_change_score+= s->m.qscale;
2175 }
115329f1 2176
155ec6ed 2177 if(level!=s->block_max_depth){
28869757 2178 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
2179 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2180 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2181 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2182 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2183 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
115329f1 2184
155ec6ed
MN
2185 if(score2 < score && score2 < iscore)
2186 return score2;
2187 }
115329f1 2188
155ec6ed 2189 if(iscore < score){
85fc0e75 2190 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
28869757 2191 memcpy(pbbak, i_buffer, i_len);
155ec6ed 2192 s->c= ic;
28869757
MN
2193 s->c.bytestream_start= pbbak_start;
2194 s->c.bytestream= pbbak + i_len;
8c36eaaa 2195 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
155ec6ed
MN
2196 memcpy(s->block_state, i_state, sizeof(s->block_state));
2197 return iscore;
2198 }else{
28869757 2199 memcpy(pbbak, p_buffer, p_len);
155ec6ed 2200 s->c= pc;
28869757
MN
2201 s->c.bytestream_start= pbbak_start;
2202 s->c.bytestream= pbbak + p_len;
8c36eaaa 2203 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
155ec6ed
MN
2204 memcpy(s->block_state, p_state, sizeof(s->block_state));
2205 return score;
2206 }
2207}
2208
849f1035 2209static av_always_inline int same_block(BlockNode *a, BlockNode *b){
51d6a3cf
MN
2210 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2211 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2212 }else{
8c36eaaa 2213 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
51d6a3cf
MN
2214 }
2215}
2216
2217static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2218 const int w= s->b_width << s->block_max_depth;
2219 const int rem_depth= s->block_max_depth - level;
2220 const int index= (x + y*w) << rem_depth;
2221 int trx= (x+1)<<rem_depth;
2222 BlockNode *b= &s->block[index];
aadcc5ce
PI
2223 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2224 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2225 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2226 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
51d6a3cf
MN
2227 int pl = left->color[0];
2228 int pcb= left->color[1];
2229 int pcr= left->color[2];
85fc0e75 2230 int pmx, pmy;
8c36eaaa 2231 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
2232 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2233 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
51d6a3cf
MN
2234 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2235
2236 if(s->keyframe){
85fc0e75 2237 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
51d6a3cf
MN
2238 return;
2239 }
2240
2241 if(level!=s->block_max_depth){
2242 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
d6f41eed
MN
2243 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2244 }else{
51d6a3cf
MN
2245 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2246 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2247 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2248 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2249 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2250 return;
51d6a3cf
MN
2251 }
2252 }
2253 if(b->type & BLOCK_INTRA){
85fc0e75 2254 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
51d6a3cf
MN
2255 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2256 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2257 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2258 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
8c36eaaa 2259 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
51d6a3cf 2260 }else{
85fc0e75 2261 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
51d6a3cf 2262 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
8c36eaaa
LM
2263 if(s->ref_frames > 1)
2264 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
51d6a3cf
MN
2265 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2266 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
8c36eaaa 2267 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
51d6a3cf
MN
2268 }
2269}
2270
155ec6ed
MN
2271static void decode_q_branch(SnowContext *s, int level, int x, int y){
2272 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2273 const int rem_depth= s->block_max_depth - level;
2274 const int index= (x + y*w) << rem_depth;
155ec6ed 2275 int trx= (x+1)<<rem_depth;
aadcc5ce
PI
2276 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2277 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2278 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2279 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed 2280 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 2281
155ec6ed 2282 if(s->keyframe){
8c36eaaa 2283 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
155ec6ed
MN
2284 return;
2285 }
2286
28869757 2287 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1e90b34f 2288 int type, mx, my;
155ec6ed
MN
2289 int l = left->color[0];
2290 int cb= left->color[1];
2291 int cr= left->color[2];
8c36eaaa
LM
2292 int ref = 0;
2293 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
2294 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2295 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
115329f1 2296
28869757 2297 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2298
2299 if(type){
85fc0e75 2300 pred_mv(s, &mx, &my, 0, left, top, tr);
155ec6ed
MN
2301 l += get_symbol(&s->c, &s->block_state[32], 1);
2302 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2303 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2304 }else{
8c36eaaa
LM
2305 if(s->ref_frames > 1)
2306 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
85fc0e75 2307 pred_mv(s, &mx, &my, ref, left, top, tr);
8c36eaaa
LM
2308 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2309 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
155ec6ed 2310 }
8c36eaaa 2311 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
155ec6ed
MN
2312 }else{
2313 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2314 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2315 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2316 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2317 }
2318}
2319
74e6a8aa 2320static void encode_blocks(SnowContext *s, int search){
155ec6ed
MN
2321 int x, y;
2322 int w= s->b_width;
2323 int h= s->b_height;
2324
74e6a8aa 2325 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
51d6a3cf
MN
2326 iterative_me(s);
2327
155ec6ed 2328 for(y=0; y<h; y++){
d06c75a8 2329 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2330 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2331 return;
2332 }
155ec6ed 2333 for(x=0; x<w; x++){
74e6a8aa 2334 if(s->avctx->me_method == ME_ITER || !search)
51d6a3cf
MN
2335 encode_q_branch2(s, 0, x, y);
2336 else
2337 encode_q_branch (s, 0, x, y);
155ec6ed
MN
2338 }
2339 }
2340}
2341
2342static void decode_blocks(SnowContext *s){
2343 int x, y;
2344 int w= s->b_width;
2345 int h= s->b_height;
2346
2347 for(y=0; y<h; y++){
2348 for(x=0; x<w; x++){
2349 decode_q_branch(s, 0, x, y);
2350 }
2351 }
791e7b83
MN
2352}
2353
bad700e3 2354static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
791e7b83 2355 int x, y;
3924dac4 2356START_TIMER
791e7b83
MN
2357 for(y=0; y < b_h+5; y++){
2358 for(x=0; x < b_w; x++){
3924dac4
MN
2359 int a0= src[x ];
2360 int a1= src[x + 1];
2361 int a2= src[x + 2];
2362 int a3= src[x + 3];
2363 int a4= src[x + 4];
2364 int a5= src[x + 5];
791e7b83
MN
2365// int am= 9*(a1+a2) - (a0+a3);
2366 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2367// int am= 18*(a2+a3) - 2*(a1+a4);
2368// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2369// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2370
2371// if(b_w==16) am= 8*(a1+a2);
2372
8c2515bb
Y
2373 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2374 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
115329f1 2375
8c2515bb
Y
2376 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2377 if(am&(~255)) am= ~(am>>31);
115329f1 2378
8c2515bb 2379 tmp[x] = am;
791e7b83
MN
2380
2381/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2382 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2383 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2384 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2385 }
3924dac4
MN
2386 tmp += stride;
2387 src += stride;
791e7b83 2388 }
3924dac4 2389 tmp -= (b_h+5)*stride;
115329f1 2390
791e7b83
MN
2391 for(y=0; y < b_h; y++){
2392 for(x=0; x < b_w; x++){
3924dac4
MN
2393 int a0= tmp[x + 0*stride];
2394 int a1= tmp[x + 1*stride];
2395 int a2= tmp[x + 2*stride];
2396 int a3= tmp[x + 3*stride];
2397 int a4= tmp[x + 4*stride];
2398 int a5= tmp[x + 5*stride];
791e7b83
MN
2399 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2400// int am= 18*(a2+a3) - 2*(a1+a4);
2401/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2402 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
115329f1 2403
791e7b83
MN
2404// if(b_w==16) am= 8*(a1+a2);
2405
8c2515bb
Y
2406 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2407 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83 2408
8c2515bb 2409 if(am&(~255)) am= ~(am>>31);
115329f1 2410
8c2515bb 2411 dst[x] = am;
791e7b83
MN
2412/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2413 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2414 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2415 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2416 }
3924dac4
MN
2417 dst += stride;
2418 tmp += stride;
791e7b83 2419 }
3924dac4 2420STOP_TIMER("mc_block")
791e7b83
MN
2421}
2422
791e7b83 2423#define mca(dx,dy,b_w)\
bad700e3 2424static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
791e7b83
MN
2425 uint8_t tmp[stride*(b_w+5)];\
2426 assert(h==b_w);\
2427 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2428}
2429
2430mca( 0, 0,16)
2431mca( 8, 0,16)
2432mca( 0, 8,16)
2433mca( 8, 8,16)
d92b5807
MN
2434mca( 0, 0,8)
2435mca( 8, 0,8)
2436mca( 0, 8,8)
2437mca( 8, 8,8)
791e7b83 2438
8c36eaaa 2439static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 2440 if(block->type & BLOCK_INTRA){
ff158dc9 2441 int x, y;
2692ceab
MN
2442 const int color = block->color[plane_index];
2443 const int color4= color*0x01010101;
1015631b
LM
2444 if(b_w==32){
2445 for(y=0; y < b_h; y++){
2446 *(uint32_t*)&dst[0 + y*stride]= color4;
2447 *(uint32_t*)&dst[4 + y*stride]= color4;
2448 *(uint32_t*)&dst[8 + y*stride]= color4;
2449 *(uint32_t*)&dst[12+ y*stride]= color4;
2450 *(uint32_t*)&dst[16+ y*stride]= color4;
2451 *(uint32_t*)&dst[20+ y*stride]= color4;
2452 *(uint32_t*)&dst[24+ y*stride]= color4;
2453 *(uint32_t*)&dst[28+ y*stride]= color4;
2454 }
2455 }else if(b_w==16){
2692ceab
MN
2456 for(y=0; y < b_h; y++){
2457 *(uint32_t*)&dst[0 + y*stride]= color4;
2458 *(uint32_t*)&dst[4 + y*stride]= color4;
2459 *(uint32_t*)&dst[8 + y*stride]= color4;
2460 *(uint32_t*)&dst[12+ y*stride]= color4;
2461 }
2462 }else if(b_w==8){
2463 for(y=0; y < b_h; y++){
2464 *(uint32_t*)&dst[0 + y*stride]= color4;
2465 *(uint32_t*)&dst[4 + y*stride]= color4;
2466 }
2467 }else if(b_w==4){
2468 for(y=0; y < b_h; y++){
2469 *(uint32_t*)&dst[0 + y*stride]= color4;
2470 }
2471 }else{
2472 for(y=0; y < b_h; y++){
2473 for(x=0; x < b_w; x++){
2474 dst[x + y*stride]= color;
2475 }
ff158dc9
MN
2476 }
2477 }
2478 }else{
8c36eaaa 2479 uint8_t *src= s->last_picture[block->ref].data[plane_index];
ff158dc9
MN
2480 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2481 int mx= block->mx*scale;
2482 int my= block->my*scale;
ec697587
MN
2483 const int dx= mx&15;
2484 const int dy= my&15;
80e44bc3 2485 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
ff158dc9
MN
2486 sx += (mx>>4) - 2;
2487 sy += (my>>4) - 2;
2488 src += sx + sy*stride;
2489 if( (unsigned)sx >= w - b_w - 4
2490 || (unsigned)sy >= h - b_h - 4){
2491 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2492 src= tmp + MB_SIZE;
2493 }
87f20c2f
MN
2494// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2495// assert(!(b_w&(b_w-1)));
2692ceab 2496 assert(b_w>1 && b_h>1);
1015631b 2497 assert(tab_index>=0 && tab_index<4 || b_w==32);
87f20c2f 2498 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
ec697587 2499 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
1015631b
LM
2500 else if(b_w==32){
2501 int y;
2502 for(y=0; y<b_h; y+=16){
2503 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2504 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2505 }
2506 }else if(b_w==b_h)
80e44bc3 2507 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2692ceab 2508 else if(b_w==2*b_h){
80e44bc3
MN
2509 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2510 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2692ceab
MN
2511 }else{
2512 assert(2*b_w==b_h);
80e44bc3
MN
2513 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2514 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2692ceab 2515 }
ff158dc9
MN
2516 }
2517}
2518
9dd6c804 2519void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
059715a4
RE
2520 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2521 int y, x;
2522 DWTELEM * dst;
2523 for(y=0; y<b_h; y++){
19032450 2524 //FIXME ugly misuse of obmc_stride
9dd6c804
PI
2525 const uint8_t *obmc1= obmc + y*obmc_stride;
2526 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2527 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2528 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
059715a4
RE
2529 dst = slice_buffer_get_line(sb, src_y + y);
2530 for(x=0; x<b_w; x++){
2531 int v= obmc1[x] * block[3][x + y*src_stride]
2532 +obmc2[x] * block[2][x + y*src_stride]
2533 +obmc3[x] * block[1][x + y*src_stride]
2534 +obmc4[x] * block[0][x + y*src_stride];
2535
2536 v <<= 8 - LOG2_OBMC_MAX;
2537 if(FRAC_BITS != 8){
2538 v += 1<<(7 - FRAC_BITS);
2539 v >>= 8 - FRAC_BITS;
2540 }
2541 if(add){
2542 v += dst[x + src_x];
2543 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2544 if(v&(~255)) v= ~(v>>31);
2545 dst8[x + y*src_stride] = v;
2546 }else{
2547 dst[x + src_x] -= v;
2548 }
2549 }
2550 }
2551}
2552
ff158dc9 2553//FIXME name clenup (b_w, block_w, b_width stuff)
849f1035 2554static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
a0d1931c
Y
2555 const int b_width = s->b_width << s->block_max_depth;
2556 const int b_height= s->b_height << s->block_max_depth;
2557 const int b_stride= b_width;
2558 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2559 BlockNode *rt= lt+1;
2560 BlockNode *lb= lt+b_stride;
2561 BlockNode *rb= lb+1;
115329f1 2562 uint8_t *block[4];
cc884a35
MN
2563 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2564 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2565 uint8_t *ptmp;
a0d1931c
Y
2566 int x,y;
2567
2568 if(b_x<0){
2569 lt= rt;
2570 lb= rb;
2571 }else if(b_x + 1 >= b_width){
2572 rt= lt;
2573 rb= lb;
2574 }
2575 if(b_y<0){
2576 lt= lb;
2577 rt= rb;
2578 }else if(b_y + 1 >= b_height){
2579 lb= lt;
2580 rb= rt;
2581 }
115329f1 2582
a0d1931c
Y
2583 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2584 obmc -= src_x;
2585 b_w += src_x;
f7e89c73 2586 if(!sliced && !offset_dst)
1015631b 2587 dst -= src_x;
ff158dc9
MN
2588 src_x=0;
2589 }else if(src_x + b_w > w){
2590 b_w = w - src_x;
2591 }
2592 if(src_y<0){
2593 obmc -= src_y*obmc_stride;
2594 b_h += src_y;
f7e89c73 2595 if(!sliced && !offset_dst)
1015631b 2596 dst -= src_y*dst_stride;
ff158dc9
MN
2597 src_y=0;
2598 }else if(src_y + b_h> h){
2599 b_h = h - src_y;
791e7b83 2600 }
115329f1 2601
ff158dc9 2602 if(b_w<=0 || b_h<=0) return;
155ec6ed 2603
cc884a35 2604assert(src_stride > 2*MB_SIZE + 5);
f7e89c73 2605 if(!sliced && offset_dst)
1015631b 2606 dst += src_x + src_y*dst_stride;
715a97f0 2607 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2608// src += src_x + src_y*src_stride;
2609
cc884a35
MN
2610 ptmp= tmp + 3*tmp_step;
2611 block[0]= ptmp;
2612 ptmp+=tmp_step;
8c36eaaa 2613 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
2614
2615 if(same_block(lt, rt)){
2616 block[1]= block[0];
791e7b83 2617 }else{
cc884a35
MN
2618 block[1]= ptmp;
2619 ptmp+=tmp_step;
8c36eaaa 2620 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
ff158dc9 2621 }
115329f1 2622
ff158dc9
MN
2623 if(same_block(lt, lb)){
2624 block[2]= block[0];
2625 }else if(same_block(rt, lb)){
2626 block[2]= block[1];
2627 }else{
cc884a35
MN
2628 block[2]= ptmp;
2629 ptmp+=tmp_step;
8c36eaaa 2630 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
ff158dc9 2631 }
791e7b83 2632
ff158dc9
MN
2633 if(same_block(lt, rb) ){
2634 block[3]= block[0];
2635 }else if(same_block(rt, rb)){
2636 block[3]= block[1];
2637 }else if(same_block(lb, rb)){
2638 block[3]= block[2];
2639 }else{
cc884a35 2640 block[3]= ptmp;
8c36eaaa 2641 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
ff158dc9
MN
2642 }
2643#if 0
2644 for(y=0; y<b_h; y++){
2645 for(x=0; x<b_w; x++){
2646 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2647 if(add) dst[x + y*dst_stride] += v;
2648 else dst[x + y*dst_stride] -= v;
2649 }
2650 }
2651 for(y=0; y<b_h; y++){
2652 uint8_t *obmc2= obmc + (obmc_stride>>1);
2653 for(x=0; x<b_w; x++){
2654 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2655 if(add) dst[x + y*dst_stride] += v;
2656 else dst[x + y*dst_stride] -= v;
2657 }
2658 }
2659 for(y=0; y<b_h; y++){
2660 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2661 for(x=0; x<b_w; x++){
2662 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2663 if(add) dst[x + y*dst_stride] += v;
2664 else dst[x + y*dst_stride] -= v;
2665 }
2666 }
2667 for(y=0; y<b_h; y++){
2668 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2669 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2670 for(x=0; x<b_w; x++){
2671 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2672 if(add) dst[x + y*dst_stride] += v;
2673 else dst[x + y*dst_stride] -= v;
2674 }
2675 }
2676#else
f7e89c73
LM
2677 if(sliced){
2678 START_TIMER
2679
2680 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2681 STOP_TIMER("inner_add_yblock")
2682 }else
ff158dc9 2683 for(y=0; y<b_h; y++){
19032450 2684 //FIXME ugly misuse of obmc_stride
9dd6c804
PI
2685 const uint8_t *obmc1= obmc + y*obmc_stride;
2686 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2687 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2688 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
ff158dc9
MN
2689 for(x=0; x<b_w; x++){
2690 int v= obmc1[x] * block[3][x + y*src_stride]
2691 +obmc2[x] * block[2][x + y*src_stride]
2692 +obmc3[x] * block[1][x + y*src_stride]
2693 +obmc4[x] * block[0][x + y*src_stride];
115329f1 2694
715a97f0 2695 v <<= 8 - LOG2_OBMC_MAX;
034aff03
MN
2696 if(FRAC_BITS != 8){
2697 v += 1<<(7 - FRAC_BITS);
2698 v >>= 8 - FRAC_BITS;
2699 }
715a97f0
MN
2700 if(add){
2701 v += dst[x + y*dst_stride];
2702 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2703 if(v&(~255)) v= ~(v>>31);
2704 dst8[x + y*src_stride] = v;
2705 }else{
2706 dst[x + y*dst_stride] -= v;
2707 }
791e7b83
MN
2708 }
2709 }
ff158dc9 2710#endif
791e7b83
MN
2711}
2712
849f1035 2713static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
a0d1931c
Y
2714 Plane *p= &s->plane[plane_index];
2715 const int mb_w= s->b_width << s->block_max_depth;
2716 const int mb_h= s->b_height << s->block_max_depth;
2717 int x, y, mb_x;
2718 int block_size = MB_SIZE >> s->block_max_depth;
2719 int block_w = plane_index ? block_size/2 : block_size;
2720 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2721 int obmc_stride= plane_index ? block_size : 2*block_size;
2722 int ref_stride= s->current_picture.linesize[plane_index];
a0d1931c
Y
2723 uint8_t *dst8= s->current_picture.data[plane_index];
2724 int w= p->width;
2725 int h= p->height;
2726 START_TIMER
115329f1 2727
a0d1931c
Y
2728 if(s->keyframe || (s->avctx->debug&512)){
2729 if(mb_y==mb_h)
2730 return;
2731
2732 if(add){
86e59cc0 2733 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2734 {
2735// DWTELEM * line = slice_buffer_get_line(sb, y);
2736 DWTELEM * line = sb->line[y];
2737 for(x=0; x<w; x++)
2738 {
2739// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2740 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2741 v >>= FRAC_BITS;
2742 if(v&(~255)) v= ~(v>>31);
2743 dst8[x + y*ref_stride]= v;
2744 }
2745 }
2746 }else{
86e59cc0 2747 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2748 {
2749// DWTELEM * line = slice_buffer_get_line(sb, y);
2750 DWTELEM * line = sb->line[y];
2751 for(x=0; x<w; x++)
2752 {
2753 line[x] -= 128 << FRAC_BITS;
2754// buf[x + y*w]-= 128<<FRAC_BITS;
2755 }
2756 }
2757 }
2758
2759 return;
2760 }
115329f1 2761
a0d1931c
Y
2762 for(mb_x=0; mb_x<=mb_w; mb_x++){
2763 START_TIMER
2764
f7e89c73 2765 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
a0d1931c
Y
2766 block_w*mb_x - block_w/2,
2767 block_w*mb_y - block_w/2,
2768 block_w, block_w,
2769 w, h,
2770 w, ref_stride, obmc_stride,
2771 mb_x - 1, mb_y - 1,
f7e89c73 2772 add, 0, plane_index);
115329f1 2773
a0d1931c
Y
2774 STOP_TIMER("add_yblock")
2775 }
115329f1 2776
a0d1931c
Y
2777 STOP_TIMER("predict_slice")
2778}
2779
849f1035 2780static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2781 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2782 const int mb_w= s->b_width << s->block_max_depth;
2783 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2784 int x, y, mb_x;
155ec6ed
MN
2785 int block_size = MB_SIZE >> s->block_max_depth;
2786 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2787 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 2788 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2789 int ref_stride= s->current_picture.linesize[plane_index];
715a97f0 2790 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2791 int w= p->width;
2792 int h= p->height;
fff6d4ea 2793 START_TIMER
115329f1 2794
ff158dc9 2795 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2796 if(mb_y==mb_h)
2797 return;
2798
715a97f0 2799 if(add){
86e59cc0 2800 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2801 for(x=0; x<w; x++){
2802 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2803 v >>= FRAC_BITS;
2804 if(v&(~255)) v= ~(v>>31);
2805 dst8[x + y*ref_stride]= v;
2806 }
2807 }
2808 }else{
86e59cc0 2809 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2810 for(x=0; x<w; x++){
2811 buf[x + y*w]-= 128<<FRAC_BITS;
2812 }
ff158dc9 2813 }
791e7b83 2814 }
ff158dc9
MN
2815
2816 return;
791e7b83 2817 }
115329f1 2818
ff158dc9 2819 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2820 START_TIMER
ff158dc9 2821
f7e89c73 2822 add_yblock(s, 0, NULL, buf, dst8, obmc,
ff158dc9 2823 block_w*mb_x - block_w/2,
791e7b83 2824 block_w*mb_y - block_w/2,
ff158dc9 2825 block_w, block_w,
791e7b83 2826 w, h,
ff158dc9
MN
2827 w, ref_stride, obmc_stride,
2828 mb_x - 1, mb_y - 1,
1015631b 2829 add, 1, plane_index);
115329f1 2830
ff158dc9 2831 STOP_TIMER("add_yblock")
791e7b83 2832 }
115329f1 2833
f9e6ebf7
LM
2834 STOP_TIMER("predict_slice")
2835}
2836
849f1035 2837static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
f9e6ebf7
LM
2838 const int mb_h= s->b_height << s->block_max_depth;
2839 int mb_y;
2840 for(mb_y=0; mb_y<=mb_h; mb_y++)
2841 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2842}
2843
51d6a3cf
MN
2844static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2845 int i, x2, y2;
2846 Plane *p= &s->plane[plane_index];
2847 const int block_size = MB_SIZE >> s->block_max_depth;
2848 const int block_w = plane_index ? block_size/2 : block_size;
2849 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2850 const int obmc_stride= plane_index ? block_size : 2*block_size;
2851 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2852 uint8_t *src= s-> input_picture.data[plane_index];
1015631b 2853 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
51d6a3cf
MN
2854 const int b_stride = s->b_width << s->block_max_depth;
2855 const int w= p->width;
2856 const int h= p->height;
2857 int index= mb_x + mb_y*b_stride;
2858 BlockNode *b= &s->block[index];
2859 BlockNode backup= *b;
2860 int ab=0;
2861 int aa=0;
2862
2863 b->type|= BLOCK_INTRA;
2864 b->color[plane_index]= 0;
1015631b 2865 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
51d6a3cf
MN
2866
2867 for(i=0; i<4; i++){
2868 int mb_x2= mb_x + (i &1) - 1;
2869 int mb_y2= mb_y + (i>>1) - 1;
2870 int x= block_w*mb_x2 + block_w/2;
2871 int y= block_w*mb_y2 + block_w/2;
2872
f7e89c73 2873 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
1015631b 2874 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
51d6a3cf
MN
2875
2876 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2877 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2878 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2879 int obmc_v= obmc[index];
1015631b 2880 int d;
51d6a3cf
MN
2881 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2882 if(x<0) obmc_v += obmc[index + block_w];
2883 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2884 if(x+block_w>w) obmc_v += obmc[index - block_w];
2885 //FIXME precalc this or simplify it somehow else
2886
1015631b
LM
2887 d = -dst[index] + (1<<(FRAC_BITS-1));
2888 dst[index] = d;
2889 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
51d6a3cf
MN
2890 aa += obmc_v * obmc_v; //FIXME precalclate this
2891 }
2892 }
2893 }
2894 *b= backup;
2895
755bfeab 2896 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
51d6a3cf
MN
2897}
2898
b104969f
LM
2899static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2900 const int b_stride = s->b_width << s->block_max_depth;
2901 const int b_height = s->b_height<< s->block_max_depth;
2902 int index= x + y*b_stride;
aadcc5ce
PI
2903 const BlockNode *b = &s->block[index];
2904 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2905 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2906 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2907 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
b104969f 2908 int dmx, dmy;
c26abfa5
DB
2909// int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2910// int my_context= av_log2(2*FFABS(left->my - top->my));
b104969f
LM
2911
2912 if(x<0 || x>=b_stride || y>=b_height)
2913 return 0;
b104969f
LM
2914/*
29151 0 0
291601X 1-2 1
2917001XX 3-6 2-3
29180001XXX 7-14 4-7
291900001XXXX 15-30 8-15
2920*/
2921//FIXME try accurate rate
2922//FIXME intra and inter predictors if surrounding blocks arent the same type
2923 if(b->type & BLOCK_INTRA){
c26abfa5
DB
2924 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2925 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2926 + av_log2(2*FFABS(left->color[2] - b->color[2])));
85fc0e75
LM
2927 }else{
2928 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2929 dmx-= b->mx;
2930 dmy-= b->my;
c26abfa5
DB
2931 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2932 + av_log2(2*FFABS(dmy))
8c36eaaa 2933 + av_log2(2*b->ref));
85fc0e75 2934 }
b104969f
LM
2935}
2936
1015631b 2937static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
51d6a3cf
MN
2938 Plane *p= &s->plane[plane_index];
2939 const int block_size = MB_SIZE >> s->block_max_depth;
2940 const int block_w = plane_index ? block_size/2 : block_size;
51d6a3cf
MN
2941 const int obmc_stride= plane_index ? block_size : 2*block_size;
2942 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2943 uint8_t *dst= s->current_picture.data[plane_index];
1015631b
LM
2944 uint8_t *src= s-> input_picture.data[plane_index];
2945 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2946 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2947 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
51d6a3cf
MN
2948 const int b_stride = s->b_width << s->block_max_depth;
2949 const int b_height = s->b_height<< s->block_max_depth;
2950 const int w= p->width;
2951 const int h= p->height;
1015631b 2952 int distortion;
51d6a3cf
MN
2953 int rate= 0;
2954 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
1015631b
LM
2955 int sx= block_w*mb_x - block_w/2;
2956 int sy= block_w*mb_y - block_w/2;
561a18d3
RE
2957 int x0= FFMAX(0,-sx);
2958 int y0= FFMAX(0,-sy);
2959 int x1= FFMIN(block_w*2, w-sx);
2960 int y1= FFMIN(block_w*2, h-sy);
1015631b
LM
2961 int i,x,y;
2962
8c36eaaa 2963 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
1015631b
LM
2964
2965 for(y=y0; y<y1; y++){
2966 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2967 const DWTELEM *pred1 = pred + y*obmc_stride;
2968 uint8_t *cur1 = cur + y*ref_stride;
2969 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2970 for(x=x0; x<x1; x++){
2971 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2972 v = (v + pred1[x]) >> FRAC_BITS;
2973 if(v&(~255)) v= ~(v>>31);
2974 dst1[x] = v;
51d6a3cf 2975 }
1015631b 2976 }
51d6a3cf 2977
561a18d3
RE
2978 /* copy the regions where obmc[] = (uint8_t)256 */
2979 if(LOG2_OBMC_MAX == 8
2980 && (mb_x == 0 || mb_x == b_stride-1)
2981 && (mb_y == 0 || mb_y == b_height-1)){
2982 if(mb_x == 0)
2983 x1 = block_w;
2984 else
2985 x0 = block_w;
2986 if(mb_y == 0)
2987 y1 = block_w;
2988 else
2989 y0 = block_w;
2990 for(y=y0; y<y1; y++)
2991 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2992 }
2993
1015631b 2994 if(block_w==16){
871371a7
LM
2995 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2996 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2997 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2998 * so improving the score of one block is not strictly guaranteed to
2999 * improve the score of the whole frame, so iterative motion est
3000 * doesn't always converge. */
3001 if(s->avctx->me_cmp == FF_CMP_W97)
486497e0 3002 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
871371a7 3003 else if(s->avctx->me_cmp == FF_CMP_W53)
486497e0 3004 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
871371a7
LM
3005 else{
3006 distortion = 0;
3007 for(i=0; i<4; i++){
3008 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3009 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3010 }
1015631b
LM
3011 }
3012 }else{
3013 assert(block_w==8);
3014 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
51d6a3cf
MN
3015 }
3016
3017 if(plane_index==0){
3018 for(i=0; i<4; i++){
3019/* ..RRr
3020 * .RXx.
3021 * rxx..
3022 */
b104969f
LM
3023 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3024 }
48d1b9a1
LM
3025 if(mb_x == b_stride-2)
3026 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
b104969f
LM
3027 }
3028 return distortion + rate*penalty_factor;
3029}
3030
3031static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3032 int i, y2;
3033 Plane *p= &s->plane[plane_index];
3034 const int block_size = MB_SIZE >> s->block_max_depth;
3035 const int block_w = plane_index ? block_size/2 : block_size;
3036 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3037 const int obmc_stride= plane_index ? block_size : 2*block_size;
3038 const int ref_stride= s->current_picture.linesize[plane_index];
b104969f
LM
3039 uint8_t *dst= s->current_picture.data[plane_index];
3040 uint8_t *src= s-> input_picture.data[plane_index];
ed4e20ac 3041 static const DWTELEM zero_dst[4096]; //FIXME
b104969f 3042 const int b_stride = s->b_width << s->block_max_depth;
b104969f
LM
3043 const int w= p->width;
3044 const int h= p->height;
3045 int distortion= 0;
3046 int rate= 0;
3047 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3048
3049 for(i=0; i<9; i++){
3050 int mb_x2= mb_x + (i%3) - 1;
3051 int mb_y2= mb_y + (i/3) - 1;
3052 int x= block_w*mb_x2 + block_w/2;
3053 int y= block_w*mb_y2 + block_w/2;
3054
f7e89c73 3055 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
b104969f
LM
3056 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3057
3058 //FIXME find a cleaner/simpler way to skip the outside stuff
3059 for(y2= y; y2<0; y2++)
3060 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3061 for(y2= h; y2<y+block_w; y2++)
3062 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3063 if(x<0){
3064 for(y2= y; y2<y+block_w; y2++)
3065 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
51d6a3cf 3066 }
b104969f
LM
3067 if(x+block_w > w){
3068 for(y2= y; y2<y+block_w; y2++)
3069 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3070 }
3071
3072 assert(block_w== 8 || block_w==16);
3073 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
51d6a3cf
MN
3074 }
3075
b104969f
LM
3076 if(plane_index==0){
3077 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3078 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3079
3080/* ..RRRr
3081 * .RXXx.
3082 * .RXXx.
3083 * rxxx.
3084 */
3085 if(merged)
3086 rate = get_block_bits(s, mb_x, mb_y, 2);
3087 for(i=merged?4:0; i<9; i++){
3088 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3089 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3090 }
3091 }
51d6a3cf
MN
3092 return distortion + rate*penalty_factor;
3093}
3094
849f1035 3095static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
51d6a3cf
MN
3096 const int b_stride= s->b_width << s->block_max_depth;
3097 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3098 BlockNode backup= *block;
3099 int rd, index, value;
3100
3101 assert(mb_x>=0 && mb_y>=0);
735f9f34 3102 assert(mb_x<b_stride);
51d6a3cf
MN
3103
3104 if(intra){
3105 block->color[0] = p[0];
3106 block->color[1] = p[1];
3107 block->color[2] = p[2];
3108 block->type |= BLOCK_INTRA;
3109 }else{
3110 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
8c36eaaa 3111 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
51d6a3cf
MN
3112 if(s->me_cache[index] == value)
3113 return 0;
3114 s->me_cache[index]= value;
3115
3116 block->mx= p[0];
3117 block->my= p[1];
3118 block->type &= ~BLOCK_INTRA;
3119 }
3120
1015631b 3121 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
51d6a3cf
MN
3122
3123//FIXME chroma
3124 if(rd < *best_rd){
3125 *best_rd= rd;
3126 return 1;
3127 }else{
3128 *block= backup;
3129 return 0;
3130 }
3131}
3132
52137f2f 3133/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
849f1035 3134static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
52137f2f 3135 int p[2] = {p0, p1};
fc8c4992 3136 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
52137f2f
FR
3137}
3138
849f1035 3139static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
b104969f
LM
3140 const int b_stride= s->b_width << s->block_max_depth;
3141 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3142 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3143 int rd, index, value;
3144
3145 assert(mb_x>=0 && mb_y>=0);
3146 assert(mb_x<b_stride);
3147 assert(((mb_x|mb_y)&1) == 0);
3148
3149 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
8c36eaaa 3150 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
b104969f
LM
3151 if(s->me_cache[index] == value)
3152 return 0;
3153 s->me_cache[index]= value;
3154
3155 block->mx= p0;
3156 block->my= p1;
8c36eaaa 3157 block->ref= ref;
b104969f
LM
3158 block->type &= ~BLOCK_INTRA;
3159 block[1]= block[b_stride]= block[b_stride+1]= *block;
3160
3161 rd= get_4block_rd(s, mb_x, mb_y, 0);
3162
3163//FIXME chroma
3164 if(rd < *best_rd){
3165 *best_rd= rd;
3166 return 1;
3167 }else{
3168 block[0]= backup[0];
3169 block[1]= backup[1];
3170 block[b_stride]= backup[2];
3171 block[b_stride+1]= backup[3];
3172 return 0;
3173 }
3174}
3175
51d6a3cf
MN
3176static void iterative_me(SnowContext *s){
3177 int pass, mb_x, mb_y;
3178 const int b_width = s->b_width << s->block_max_depth;
3179 const int b_height= s->b_height << s->block_max_depth;
3180 const int b_stride= b_width;
3181 int color[3];
3182
8f8ae495
LM
3183 {
3184 RangeCoder r = s->c;
3185 uint8_t state[sizeof(s->block_state)];
3186 memcpy(state, s->block_state, sizeof(s->block_state));
3187 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3188 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3189 encode_q_branch(s, 0, mb_x, mb_y);
3190 s->c = r;
3191 memcpy(s->block_state, state, sizeof(s->block_state));
3192 }
3193
871371a7 3194 for(pass=0; pass<25; pass++){
51d6a3cf
MN
3195 int change= 0;
3196
3197 for(mb_y= 0; mb_y<b_height; mb_y++){
3198 for(mb_x= 0; mb_x<b_width; mb_x++){
8c36eaaa
LM
3199 int dia_change, i, j, ref;
3200 int best_rd= INT_MAX, ref_rd;
3201 BlockNode backup, ref_b;
51d6a3cf
MN
3202 const int index= mb_x + mb_y * b_stride;
3203 BlockNode *block= &s->block[index];
7f21a9a7
LM
3204 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3205 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3206 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3207 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3208 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3209 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3210 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3211 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
1015631b
LM
3212 const int b_w= (MB_SIZE >> s->block_max_depth);
3213 uint8_t obmc_edged[b_w*2][b_w*2];
51d6a3cf
MN
3214
3215 if(pass && (block->type & BLOCK_OPT))
3216 continue;
3217 block->type |= BLOCK_OPT;
3218
3219 backup= *block;
3220
3221 if(!s->me_cache_generation)
3222 memset(s->me_cache, 0, sizeof(s->me_cache));
3223 s->me_cache_generation += 1<<22;
3224
1015631b
LM
3225 //FIXME precalc
3226 {
3227 int x, y;
3228 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3229 if(mb_x==0)
3230 for(y=0; y<b_w*2; y++)
3231 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3232 if(mb_x==b_stride-1)
3233 for(y=0; y<b_w*2; y++)
3234 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3235 if(mb_y==0){
3236 for(x=0; x<b_w*2; x++)
3237 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3238 for(y=1; y<b_w; y++)
3239 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3240 }
3241 if(mb_y==b_height-1){
3242 for(x=0; x<b_w*2; x++)
3243 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3244 for(y=b_w; y<b_w*2-1; y++)
3245 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3246 }
3247 }
3248
3249 //skip stuff outside the picture
3250 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3251 {
3252 uint8_t *src= s-> input_picture.data[0];
3253 uint8_t *dst= s->current_picture.data[0];
3254 const int stride= s->current_picture.linesize[0];
3255 const int block_w= MB_SIZE >> s->block_max_depth;
3256 const int sx= block_w*mb_x - block_w/2;
3257 const int sy= block_w*mb_y - block_w/2;
3258 const int w= s->plane[0].width;
3259 const int h= s->plane[0].height;
3260 int y;
3261
3262 for(y=sy; y<0; y++)
3263 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3264 for(y=h; y<sy+block_w*2; y++)
3265 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3266 if(sx<0){
3267 for(y=sy; y<sy+block_w*2; y++)
3268 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3269 }
3270 if(sx+block_w*2 > w){
3271 for(y=sy; y<sy+block_w*2; y++)
3272 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3273 }
3274 }
3275
3276 // intra(black) = neighbors' contribution to the current block
3277 for(i=0; i<3; i++)
3278 color[i]= get_dc(s, mb_x, mb_y, i);
3279
755bfeab 3280 // get previous score (cannot be cached due to OBMC)
48d1b9a1
LM
3281 if(pass > 0 && (block->type&BLOCK_INTRA)){
3282 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3283 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3284 }else
fc8c4992 3285 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
48d1b9a1 3286
8c36eaaa
LM
3287 ref_b= *block;
3288 ref_rd= best_rd;
3289 for(ref=0; ref < s->ref_frames; ref++){
3290 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3291 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3292 continue;
3293 block->ref= ref;
3294 best_rd= INT_MAX;
3295
3296 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3297 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
7f21a9a7 3298 if(tb)
8c36eaaa 3299 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
7f21a9a7 3300 if(lb)
8c36eaaa 3301 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
7f21a9a7 3302 if(rb)
8c36eaaa 3303 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
7f21a9a7 3304 if(bb)
8c36eaaa
LM
3305 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3306
3307 /* fullpel ME */
3308 //FIXME avoid subpel interpol / round to nearest integer
3309 do{
3310 dia_change=0;
3311 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3312 for(j=0; j<i; j++){
3313 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3314 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3315 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3316 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3317 }
51d6a3cf 3318 }
8c36eaaa
LM
3319 }while(dia_change);
3320 /* subpel ME */
3321 do{
3322 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3323 dia_change=0;
3324 for(i=0; i<8; i++)
3325 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3326 }while(dia_change);
3327 //FIXME or try the standard 2 pass qpel or similar
3328
3329 mvr[0][0]= block->mx;
3330 mvr[0][1]= block->my;
3331 if(ref_rd > best_rd){
3332 ref_rd= best_rd;
3333 ref_b= *block;
51d6a3cf 3334 }
8c36eaaa
LM
3335 }
3336 best_rd= ref_rd;
3337 *block= ref_b;
13705b69 3338#if 1
1015631b 3339 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
51d6a3cf 3340 //FIXME RD style color selection
13705b69 3341#endif
51d6a3cf 3342 if(!same_block(block, &backup)){
7f21a9a7
LM
3343 if(tb ) tb ->type &= ~BLOCK_OPT;
3344 if(lb ) lb ->type &= ~BLOCK_OPT;
3345 if(rb ) rb ->type &= ~BLOCK_OPT;
3346 if(bb ) bb ->type &= ~BLOCK_OPT;
3347 if(tlb) tlb->type &= ~BLOCK_OPT;
3348 if(trb) trb->type &= ~BLOCK_OPT;
3349 if(blb) blb->type &= ~BLOCK_OPT;
3350 if(brb) brb->type &= ~BLOCK_OPT;
51d6a3cf
MN
3351 change ++;
3352 }
3353 }
3354 }
3355 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3356 if(!change)
3357 break;
3358 }
b104969f
LM
3359
3360 if(s->block_max_depth == 1){
3361 int change= 0;
3362 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3363 for(mb_x= 0; mb_x<b_width; mb_x+=2){
7f21a9a7 3364 int i;