Add comments to some #endif directives.
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
b78e7197
DB
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
791e7b83
MN
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
b78e7197 9 * version 2.1 of the License, or (at your option) any later version.
791e7b83 10 *
b78e7197 11 * FFmpeg is distributed in the hope that it will be useful,
791e7b83
MN
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
b78e7197 17 * License along with FFmpeg; if not, write to the Free Software
5509bffa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
MN
19 */
20
21#include "avcodec.h"
791e7b83 22#include "dsputil.h"
059715a4 23#include "snow.h"
28869757
MN
24
25#include "rangecoder.h"
791e7b83
MN
26
27#include "mpegvideo.h"
28
29#undef NDEBUG
30#include <assert.h>
31
791e7b83
MN
32static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49};
50static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67};
538a3841
MN
68static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85};
791e7b83
MN
86static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103};
104static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121};
122static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139};
140static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157};
158static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
175};
176
791e7b83
MN
177#if 0 //64*cubic
178static const uint8_t obmc32[1024]={
fa731ccd
MN
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
791e7b83
MN
211//error:0.000022
212};
213static const uint8_t obmc16[256]={
fa731ccd
MN
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
791e7b83
MN
230//error:0.000033
231};
232#elif 1 // 64*linear
233static const uint8_t obmc32[1024]={
561a18d3
RE
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
791e7b83
MN
266 //error:0.000020
267};
268static const uint8_t obmc16[256]={
561a18d3
RE
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
791e7b83
MN
285//error:0.000015
286};
287#else //64*cos
288static const uint8_t obmc32[1024]={
fa731ccd
MN
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
791e7b83
MN
321//error:0.000022
322};
323static const uint8_t obmc16[256]={
fa731ccd
MN
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
791e7b83
MN
340//error:0.000022
341};
96e2fbf2 342#endif /* 0 */
791e7b83 343
155ec6ed
MN
344//linear *64
345static const uint8_t obmc8[64]={
561a18d3
RE
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
155ec6ed
MN
354//error:0.000000
355};
356
357//linear *64
358static const uint8_t obmc4[16]={
561a18d3
RE
359 16, 48, 48, 16,
360 48,144,144, 48,
361 48,144,144, 48,
362 16, 48, 48, 16,
155ec6ed
MN
363//error:0.000000
364};
365
366static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
368};
369
85fc0e75
LM
370static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371
155ec6ed
MN
372typedef struct BlockNode{
373 int16_t mx;
374 int16_t my;
8c36eaaa 375 uint8_t ref;
155ec6ed
MN
376 uint8_t color[3];
377 uint8_t type;
378//#define TYPE_SPLIT 1
379#define BLOCK_INTRA 1
51d6a3cf 380#define BLOCK_OPT 2
155ec6ed
MN
381//#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
383}BlockNode;
384
51d6a3cf
MN
385static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
387 .mx= 0,
388 .my= 0,
8c36eaaa 389 .ref= 0,
51d6a3cf
MN
390 .type= 0,
391 .level= 0,
392};
393
155ec6ed
MN
394#define LOG2_MB_SIZE 4
395#define MB_SIZE (1<<LOG2_MB_SIZE)
b538791b 396#define ENCODER_EXTRA_BITS 4
61d6e445 397#define HTAPS_MAX 8
155ec6ed 398
a0d1931c
Y
399typedef struct x_and_coeff{
400 int16_t x;
538a3841 401 uint16_t coeff;
a0d1931c
Y
402} x_and_coeff;
403
791e7b83
MN
404typedef struct SubBand{
405 int level;
406 int stride;
407 int width;
408 int height;
409 int qlog; ///< log(qscale)/log[2^(1/6)]
410 DWTELEM *buf;
d593e329 411 IDWTELEM *ibuf;
a0d1931c
Y
412 int buf_x_offset;
413 int buf_y_offset;
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
791e7b83
MN
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
418}SubBand;
419
420typedef struct Plane{
421 int width;
422 int height;
423 SubBand band[MAX_DECOMPOSITIONS][4];
7d7f57d9
MN
424
425 int htaps;
61d6e445 426 int8_t hcoeff[HTAPS_MAX/2];
7d7f57d9
MN
427 int diag_mc;
428 int fast_mc;
429
430 int last_htaps;
61d6e445 431 int8_t last_hcoeff[HTAPS_MAX/2];
7d7f57d9 432 int last_diag_mc;
791e7b83
MN
433}Plane;
434
435typedef struct SnowContext{
eafcac6a 436// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
791e7b83
MN
437
438 AVCodecContext *avctx;
28869757 439 RangeCoder c;
791e7b83 440 DSPContext dsp;
51d6a3cf
MN
441 AVFrame new_picture;
442 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83 443 AVFrame current_picture;
8c36eaaa 444 AVFrame last_picture[MAX_REF_FRAMES];
5be3a818 445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
791e7b83
MN
446 AVFrame mconly_picture;
447// uint8_t q_context[16];
448 uint8_t header_state[32];
155ec6ed 449 uint8_t block_state[128 + 32*128];
791e7b83 450 int keyframe;
19aa028d 451 int always_reset;
791e7b83
MN
452 int version;
453 int spatial_decomposition_type;
396a5e68 454 int last_spatial_decomposition_type;
791e7b83
MN
455 int temporal_decomposition_type;
456 int spatial_decomposition_count;
8db13728 457 int last_spatial_decomposition_count;
791e7b83 458 int temporal_decomposition_count;
8c36eaaa
LM
459 int max_ref_frames;
460 int ref_frames;
461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462 uint32_t *ref_scores[MAX_REF_FRAMES];
791e7b83 463 DWTELEM *spatial_dwt_buffer;
d593e329 464 IDWTELEM *spatial_idwt_buffer;
791e7b83
MN
465 int colorspace_type;
466 int chroma_h_shift;
467 int chroma_v_shift;
468 int spatial_scalability;
469 int qlog;
396a5e68 470 int last_qlog;
155ec6ed
MN
471 int lambda;
472 int lambda2;
4e64bead 473 int pass1_rc;
791e7b83 474 int mv_scale;
396a5e68 475 int last_mv_scale;
791e7b83 476 int qbias;
396a5e68 477 int last_qbias;
791e7b83 478#define QBIAS_SHIFT 3
155ec6ed
MN
479 int b_width;
480 int b_height;
481 int block_max_depth;
396a5e68 482 int last_block_max_depth;
791e7b83 483 Plane plane[MAX_PLANES];
155ec6ed 484 BlockNode *block;
51d6a3cf
MN
485#define ME_CACHE_SIZE 1024
486 int me_cache[ME_CACHE_SIZE];
487 int me_cache_generation;
a0d1931c 488 slice_buffer sb;
155ec6ed 489
eafcac6a 490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
791e7b83
MN
491}SnowContext;
492
f9e6ebf7 493typedef struct {
d593e329
MN
494 IDWTELEM *b0;
495 IDWTELEM *b1;
496 IDWTELEM *b2;
497 IDWTELEM *b3;
f9e6ebf7
LM
498 int y;
499} dwt_compose_t;
500
a0d1931c
Y
501#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
502//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
503
51d6a3cf
MN
504static void iterative_me(SnowContext *s);
505
d593e329 506static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
a0d1931c
Y
507{
508 int i;
115329f1 509
a0d1931c
Y
510 buf->base_buffer = base_buffer;
511 buf->line_count = line_count;
512 buf->line_width = line_width;
513 buf->data_count = max_allocated_lines;
d593e329
MN
514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
115329f1 516
a0d1931c
Y
517 for (i = 0; i < max_allocated_lines; i++)
518 {
d593e329 519 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
a0d1931c 520 }
115329f1 521
a0d1931c
Y
522 buf->data_stack_top = max_allocated_lines - 1;
523}
524
d593e329 525static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
a0d1931c 526{
a0d1931c 527 int offset;
d593e329 528 IDWTELEM * buffer;
115329f1
DB
529
530// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
531
a0d1931c
Y
532 assert(buf->data_stack_top >= 0);
533// assert(!buf->line[line]);
534 if (buf->line[line])
535 return buf->line[line];
115329f1 536
a0d1931c
Y
537 offset = buf->line_width * line;
538 buffer = buf->data_stack[buf->data_stack_top];
539 buf->data_stack_top--;
540 buf->line[line] = buffer;
115329f1 541
a0d1931c 542// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
115329f1 543
a0d1931c
Y
544 return buffer;
545}
546
547static void slice_buffer_release(slice_buffer * buf, int line)
548{
a0d1931c 549 int offset;
d593e329 550 IDWTELEM * buffer;
a0d1931c
Y
551
552 assert(line >= 0 && line < buf->line_count);
553 assert(buf->line[line]);
554
555 offset = buf->line_width * line;
556 buffer = buf->line[line];
557 buf->data_stack_top++;
558 buf->data_stack[buf->data_stack_top] = buffer;
559 buf->line[line] = NULL;
115329f1 560
a0d1931c
Y
561// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
562}
563
564static void slice_buffer_flush(slice_buffer * buf)
565{
566 int i;
567 for (i = 0; i < buf->line_count; i++)
568 {
569 if (buf->line[i])
570 {
571// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
572 slice_buffer_release(buf, i);
573 }
574 }
575}
576
577static void slice_buffer_destroy(slice_buffer * buf)
578{
579 int i;
580 slice_buffer_flush(buf);
115329f1 581
a0d1931c
Y
582 for (i = buf->data_count - 1; i >= 0; i--)
583 {
e7c8206e 584 av_freep(&buf->data_stack[i]);
a0d1931c 585 }
e7c8206e 586 av_freep(&buf->data_stack);
e7c8206e 587 av_freep(&buf->line);
a0d1931c
Y
588}
589
bb270c08 590#ifdef __sgi
2554db9b 591// Avoid a name clash on SGI IRIX
bb270c08 592#undef qexp
2554db9b 593#endif
034aff03 594#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 595static uint8_t qexp[QROOT];
791e7b83
MN
596
597static inline int mirror(int v, int m){
13705b69
MN
598 while((unsigned)v > (unsigned)m){
599 v=-v;
600 if(v<0) v+= 2*m;
601 }
602 return v;
791e7b83
MN
603}
604
28869757 605static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
606 int i;
607
608 if(v){
c26abfa5 609 const int a= FFABS(v);
791e7b83
MN
610 const int e= av_log2(a);
611#if 1
115329f1 612 const int el= FFMIN(e, 10);
28869757 613 put_rac(c, state+0, 0);
791e7b83
MN
614
615 for(i=0; i<el; i++){
28869757 616 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
617 }
618 for(; i<e; i++){
28869757 619 put_rac(c, state+1+9, 1); //1..10
791e7b83 620 }
28869757 621 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
622
623 for(i=e-1; i>=el; i--){
28869757 624 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
625 }
626 for(; i>=0; i--){
28869757 627 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
628 }
629
630 if(is_signed)
28869757 631 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 632#else
115329f1 633
28869757 634 put_rac(c, state+0, 0);
791e7b83
MN
635 if(e<=9){
636 for(i=0; i<e; i++){
28869757 637 put_rac(c, state+1+i, 1); //1..10
791e7b83 638 }
28869757 639 put_rac(c, state+1+i, 0);
791e7b83
MN
640
641 for(i=e-1; i>=0; i--){
28869757 642 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
643 }
644
645 if(is_signed)
28869757 646 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
647 }else{
648 for(i=0; i<e; i++){
28869757 649 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 650 }
28869757 651 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
652
653 for(i=e-1; i>=0; i--){
28869757 654 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
655 }
656
657 if(is_signed)
28869757 658 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83 659 }
96e2fbf2 660#endif /* 1 */
791e7b83 661 }else{
28869757 662 put_rac(c, state+0, 1);
791e7b83
MN
663 }
664}
665
28869757
MN
666static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
667 if(get_rac(c, state+0))
791e7b83
MN
668 return 0;
669 else{
7c2425d2
LM
670 int i, e, a;
671 e= 0;
28869757 672 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 673 e++;
791e7b83 674 }
7c2425d2 675
791e7b83 676 a= 1;
7c2425d2 677 for(i=e-1; i>=0; i--){
28869757 678 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
679 }
680
28869757 681 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
682 return -a;
683 else
684 return a;
685 }
686}
687
28869757 688static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 689 int i;
0635cbfc 690 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
691
692 assert(v>=0);
0635cbfc
MN
693 assert(log2>=-4);
694
695 while(v >= r){
28869757 696 put_rac(c, state+4+log2, 1);
0635cbfc 697 v -= r;
4f4e9633 698 log2++;
0635cbfc 699 if(log2>0) r+=r;
4f4e9633 700 }
28869757 701 put_rac(c, state+4+log2, 0);
115329f1 702
4f4e9633 703 for(i=log2-1; i>=0; i--){
28869757 704 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 705 }
4f4e9633
MN
706}
707
28869757 708static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 709 int i;
0635cbfc 710 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
711 int v=0;
712
0635cbfc
MN
713 assert(log2>=-4);
714
28869757 715 while(get_rac(c, state+4+log2)){
0635cbfc 716 v+= r;
4f4e9633 717 log2++;
0635cbfc 718 if(log2>0) r+=r;
4f4e9633 719 }
115329f1 720
4f4e9633 721 for(i=log2-1; i>=0; i--){
28869757 722 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
723 }
724
725 return v;
726}
727
9d14ffbc
LB
728static av_always_inline void
729lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
730 int dst_step, int src_step, int ref_step,
731 int width, int mul, int add, int shift,
732 int highpass, int inverse){
791e7b83
MN
733 const int mirror_left= !highpass;
734 const int mirror_right= (width&1) ^ highpass;
735 const int w= (width>>1) - 1 + (highpass & width);
736 int i;
737
738#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
739 if(mirror_left){
740 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
741 dst += dst_step;
742 src += src_step;
743 }
115329f1 744
791e7b83 745 for(i=0; i<w; i++){
9d14ffbc
LB
746 dst[i*dst_step] =
747 LIFT(src[i*src_step],
748 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
749 inverse);
791e7b83 750 }
115329f1 751
791e7b83 752 if(mirror_right){
9d14ffbc
LB
753 dst[w*dst_step] =
754 LIFT(src[w*src_step],
755 ((mul*2*ref[w*ref_step]+add)>>shift),
756 inverse);
791e7b83
MN
757 }
758}
759
9d14ffbc
LB
760static av_always_inline void
761inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
762 int dst_step, int src_step, int ref_step,
763 int width, int mul, int add, int shift,
764 int highpass, int inverse){
d593e329
MN
765 const int mirror_left= !highpass;
766 const int mirror_right= (width&1) ^ highpass;
767 const int w= (width>>1) - 1 + (highpass & width);
768 int i;
769
770#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
771 if(mirror_left){
772 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
773 dst += dst_step;
774 src += src_step;
775 }
776
777 for(i=0; i<w; i++){
9d14ffbc
LB
778 dst[i*dst_step] =
779 LIFT(src[i*src_step],
780 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
781 inverse);
d593e329
MN
782 }
783
784 if(mirror_right){
9d14ffbc
LB
785 dst[w*dst_step] =
786 LIFT(src[w*src_step],
787 ((mul*2*ref[w*ref_step]+add)>>shift),
788 inverse);
d593e329
MN
789 }
790}
791
059715a4 792#ifndef liftS
9d14ffbc
LB
793static av_always_inline void
794liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
795 int dst_step, int src_step, int ref_step,
796 int width, int mul, int add, int shift,
797 int highpass, int inverse){
f5a71928
MN
798 const int mirror_left= !highpass;
799 const int mirror_right= (width&1) ^ highpass;
800 const int w= (width>>1) - 1 + (highpass & width);
801 int i;
802
803 assert(shift == 4);
9d14ffbc
LB
804#define LIFTS(src, ref, inv) \
805 ((inv) ? \
806 (src) + (((ref) + 4*(src))>>shift): \
807 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
f5a71928
MN
808 if(mirror_left){
809 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
810 dst += dst_step;
811 src += src_step;
812 }
115329f1 813
f5a71928 814 for(i=0; i<w; i++){
9d14ffbc
LB
815 dst[i*dst_step] =
816 LIFTS(src[i*src_step],
817 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
818 inverse);
f5a71928 819 }
115329f1 820
f5a71928 821 if(mirror_right){
9d14ffbc
LB
822 dst[w*dst_step] =
823 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
f5a71928
MN
824 }
825}
9d14ffbc
LB
826static av_always_inline void
827inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
828 int dst_step, int src_step, int ref_step,
829 int width, int mul, int add, int shift,
830 int highpass, int inverse){
d593e329
MN
831 const int mirror_left= !highpass;
832 const int mirror_right= (width&1) ^ highpass;
833 const int w= (width>>1) - 1 + (highpass & width);
834 int i;
835
836 assert(shift == 4);
9d14ffbc
LB
837#define LIFTS(src, ref, inv) \
838 ((inv) ? \
839 (src) + (((ref) + 4*(src))>>shift): \
840 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
d593e329
MN
841 if(mirror_left){
842 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
843 dst += dst_step;
844 src += src_step;
845 }
846
847 for(i=0; i<w; i++){
9d14ffbc
LB
848 dst[i*dst_step] =
849 LIFTS(src[i*src_step],
850 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
851 inverse);
d593e329
MN
852 }
853
854 if(mirror_right){
9d14ffbc
LB
855 dst[w*dst_step] =
856 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
d593e329
MN
857 }
858}
059715a4 859#endif
f5a71928 860
aa25a462
RFI
861static void horizontal_decompose53i(DWTELEM *b, int width){
862 DWTELEM temp[width];
791e7b83 863 const int width2= width>>1;
62ab0b78 864 int x;
791e7b83
MN
865 const int w2= (width+1)>>1;
866
867 for(x=0; x<width2; x++){
868 temp[x ]= b[2*x ];
869 temp[x+w2]= b[2*x + 1];
870 }
871 if(width&1)
872 temp[x ]= b[2*x ];
873#if 0
62ab0b78
AJ
874 {
875 int A1,A2,A3,A4;
791e7b83
MN
876 A2= temp[1 ];
877 A4= temp[0 ];
878 A1= temp[0+width2];
879 A1 -= (A2 + A4)>>1;
880 A4 += (A1 + 1)>>1;
881 b[0+width2] = A1;
882 b[0 ] = A4;
883 for(x=1; x+1<width2; x+=2){
884 A3= temp[x+width2];
885 A4= temp[x+1 ];
886 A3 -= (A2 + A4)>>1;
887 A2 += (A1 + A3 + 2)>>2;
888 b[x+width2] = A3;
889 b[x ] = A2;
890
891 A1= temp[x+1+width2];
892 A2= temp[x+2 ];
893 A1 -= (A2 + A4)>>1;
894 A4 += (A1 + A3 + 2)>>2;
895 b[x+1+width2] = A1;
896 b[x+1 ] = A4;
897 }
898 A3= temp[width-1];
899 A3 -= A2;
900 A2 += (A1 + A3 + 2)>>2;
901 b[width -1] = A3;
902 b[width2-1] = A2;
62ab0b78 903 }
115329f1 904#else
791e7b83
MN
905 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
906 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
907#endif
908}
909
aa25a462 910static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 911 int i;
115329f1 912
791e7b83
MN
913 for(i=0; i<width; i++){
914 b1[i] -= (b0[i] + b2[i])>>1;
915 }
916}
917
aa25a462 918static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 919 int i;
115329f1 920
791e7b83
MN
921 for(i=0; i<width; i++){
922 b1[i] += (b0[i] + b2[i] + 2)>>2;
923 }
924}
925
aa25a462 926static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 927 int y;
791e7b83
MN
928 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
929 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
115329f1 930
791e7b83
MN
931 for(y=-2; y<height; y+=2){
932 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
933 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
934
935{START_TIMER
13705b69
MN
936 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
937 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
791e7b83 938STOP_TIMER("horizontal_decompose53i")}
115329f1 939
791e7b83 940{START_TIMER
13705b69
MN
941 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
942 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
791e7b83 943STOP_TIMER("vertical_decompose53i*")}
115329f1 944
791e7b83
MN
945 b0=b2;
946 b1=b3;
947 }
948}
949
aa25a462
RFI
950static void horizontal_decompose97i(DWTELEM *b, int width){
951 DWTELEM temp[width];
791e7b83
MN
952 const int w2= (width+1)>>1;
953
ce611a27
MN
954 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
955 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
ff06e067 956 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
791e7b83
MN
957 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
958}
959
960
aa25a462 961static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 962 int i;
115329f1 963
791e7b83
MN
964 for(i=0; i<width; i++){
965 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
966 }
967}
968
aa25a462 969static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 970 int i;
115329f1 971
791e7b83 972 for(i=0; i<width; i++){
791e7b83 973 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
791e7b83
MN
974 }
975}
976
aa25a462 977static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 978 int i;
115329f1 979
791e7b83 980 for(i=0; i<width; i++){
f5a71928 981#ifdef liftS
791e7b83 982 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928 983#else
ce611a27 984 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
f5a71928 985#endif
791e7b83
MN
986 }
987}
988
aa25a462 989static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 990 int i;
115329f1 991
791e7b83
MN
992 for(i=0; i<width; i++){
993 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
994 }
995}
996
aa25a462 997static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 998 int y;
791e7b83
MN
999 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1000 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1001 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1002 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
115329f1 1003
791e7b83
MN
1004 for(y=-4; y<height; y+=2){
1005 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1006 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1007
1008{START_TIMER
13705b69
MN
1009 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1010 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
791e7b83
MN
1011if(width>400){
1012STOP_TIMER("horizontal_decompose97i")
1013}}
115329f1 1014
791e7b83 1015{START_TIMER
13705b69
MN
1016 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1017 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1018 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1019 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
791e7b83
MN
1020
1021if(width>400){
1022STOP_TIMER("vertical_decompose97i")
1023}}
115329f1 1024
791e7b83
MN
1025 b0=b2;
1026 b1=b3;
1027 b2=b4;
1028 b3=b5;
1029 }
1030}
1031
aa25a462 1032void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83 1033 int level;
115329f1 1034
46c281e8
MN
1035 for(level=0; level<decomposition_count; level++){
1036 switch(type){
d4b287ed
LM
1037 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1038 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
791e7b83
MN
1039 }
1040 }
1041}
1042
d593e329
MN
1043static void horizontal_compose53i(IDWTELEM *b, int width){
1044 IDWTELEM temp[width];
791e7b83
MN
1045 const int width2= width>>1;
1046 const int w2= (width+1)>>1;
62ab0b78 1047 int x;
791e7b83
MN
1048
1049#if 0
62ab0b78 1050 int A1,A2,A3,A4;
791e7b83
MN
1051 A2= temp[1 ];
1052 A4= temp[0 ];
1053 A1= temp[0+width2];
1054 A1 -= (A2 + A4)>>1;
1055 A4 += (A1 + 1)>>1;
1056 b[0+width2] = A1;
1057 b[0 ] = A4;
1058 for(x=1; x+1<width2; x+=2){
1059 A3= temp[x+width2];
1060 A4= temp[x+1 ];
1061 A3 -= (A2 + A4)>>1;
1062 A2 += (A1 + A3 + 2)>>2;
1063 b[x+width2] = A3;
1064 b[x ] = A2;
1065
1066 A1= temp[x+1+width2];
1067 A2= temp[x+2 ];
1068 A1 -= (A2 + A4)>>1;
1069 A4 += (A1 + A3 + 2)>>2;
1070 b[x+1+width2] = A1;
1071 b[x+1 ] = A4;
1072 }
1073 A3= temp[width-1];
1074 A3 -= A2;
1075 A2 += (A1 + A3 + 2)>>2;
1076 b[width -1] = A3;
1077 b[width2-1] = A2;
115329f1 1078#else
d593e329
MN
1079 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1080 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
96e2fbf2 1081#endif /* 0 */
791e7b83
MN
1082 for(x=0; x<width2; x++){
1083 b[2*x ]= temp[x ];
1084 b[2*x + 1]= temp[x+w2];
1085 }
1086 if(width&1)
1087 b[2*x ]= temp[x ];
1088}
1089
d593e329 1090static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1091 int i;
115329f1 1092
791e7b83
MN
1093 for(i=0; i<width; i++){
1094 b1[i] += (b0[i] + b2[i])>>1;
1095 }
1096}
1097
d593e329 1098static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1099 int i;
115329f1 1100
791e7b83
MN
1101 for(i=0; i<width; i++){
1102 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1103 }
1104}
1105
a0d1931c
Y
1106static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1107 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1108 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1109 cs->y = -1;
1110}
1111
d593e329 1112static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
f9e6ebf7
LM
1113 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1114 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1115 cs->y = -1;
1116}
1117
a0d1931c
Y
1118static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1119 int y= cs->y;
115329f1 1120
d593e329
MN
1121 IDWTELEM *b0= cs->b0;
1122 IDWTELEM *b1= cs->b1;
1123 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1124 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
a0d1931c
Y
1125
1126{START_TIMER
13705b69
MN
1127 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1128 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
a0d1931c
Y
1129STOP_TIMER("vertical_compose53i*")}
1130
1131{START_TIMER
13705b69
MN
1132 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1133 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
a0d1931c
Y
1134STOP_TIMER("horizontal_compose53i")}
1135
1136 cs->b0 = b2;
1137 cs->b1 = b3;
1138 cs->y += 2;
1139}
1140
d593e329 1141static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7 1142 int y= cs->y;
d593e329
MN
1143 IDWTELEM *b0= cs->b0;
1144 IDWTELEM *b1= cs->b1;
1145 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1146 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83
MN
1147
1148{START_TIMER
13705b69
MN
1149 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1150 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
791e7b83
MN
1151STOP_TIMER("vertical_compose53i*")}
1152
1153{START_TIMER
13705b69
MN
1154 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1155 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
791e7b83
MN
1156STOP_TIMER("horizontal_compose53i")}
1157
f9e6ebf7
LM
1158 cs->b0 = b2;
1159 cs->b1 = b3;
1160 cs->y += 2;
1161}
1162
d593e329 1163static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7
LM
1164 dwt_compose_t cs;
1165 spatial_compose53i_init(&cs, buffer, height, stride);
1166 while(cs.y <= height)
1167 spatial_compose53i_dy(&cs, buffer, width, height, stride);
115329f1
DB
1168}
1169
791e7b83 1170
d593e329
MN
1171void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1172 IDWTELEM temp[width];
791e7b83
MN
1173 const int w2= (width+1)>>1;
1174
d593e329 1175 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
ff06e067 1176 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
d593e329
MN
1177 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1178 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
791e7b83
MN
1179}
1180
d593e329 1181static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1182 int i;
115329f1 1183
791e7b83
MN
1184 for(i=0; i<width; i++){
1185 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1186 }
1187}
1188
d593e329 1189static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1190 int i;
115329f1 1191
791e7b83 1192 for(i=0; i<width; i++){
791e7b83 1193 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
791e7b83
MN
1194 }
1195}
1196
d593e329 1197static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1198 int i;
115329f1 1199
791e7b83 1200 for(i=0; i<width; i++){
f5a71928 1201#ifdef liftS
791e7b83 1202 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1203#else
1204 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1205#endif
791e7b83
MN
1206 }
1207}
1208
d593e329 1209static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1210 int i;
115329f1 1211
791e7b83
MN
1212 for(i=0; i<width; i++){
1213 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1214 }
1215}
1216
d593e329 1217void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
565a45ac 1218 int i;
115329f1 1219
565a45ac 1220 for(i=0; i<width; i++){
565a45ac 1221 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
565a45ac 1222 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
f5a71928 1223#ifdef liftS
565a45ac 1224 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1225#else
1226 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1227#endif
565a45ac
MN
1228 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1229 }
1230}
1231
a0d1931c
Y
1232static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1233 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1234 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1235 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1236 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1237 cs->y = -3;
1238}
1239
d593e329 1240static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
f9e6ebf7
LM
1241 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1242 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1243 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1244 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1245 cs->y = -3;
1246}
791e7b83 1247
059715a4 1248static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1249 int y = cs->y;
115329f1 1250
d593e329
MN
1251 IDWTELEM *b0= cs->b0;
1252 IDWTELEM *b1= cs->b1;
1253 IDWTELEM *b2= cs->b2;
1254 IDWTELEM *b3= cs->b3;
1255 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1256 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
115329f1 1257
a0d1931c 1258{START_TIMER
565a45ac 1259 if(y>0 && y+4<height){
059715a4 1260 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
565a45ac 1261 }else{
13705b69
MN
1262 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1263 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1264 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1265 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
565a45ac 1266 }
a0d1931c
Y
1267if(width>400){
1268STOP_TIMER("vertical_compose97i")}}
a0d1931c
Y
1269
1270{START_TIMER
059715a4
RE
1271 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1272 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
3b6ab26c 1273if(width>400 && y+0<(unsigned)height){
a0d1931c
Y
1274STOP_TIMER("horizontal_compose97i")}}
1275
1276 cs->b0=b2;
1277 cs->b1=b3;
1278 cs->b2=b4;
1279 cs->b3=b5;
1280 cs->y += 2;
1281}
1282
d593e329 1283static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7 1284 int y = cs->y;
d593e329
MN
1285 IDWTELEM *b0= cs->b0;
1286 IDWTELEM *b1= cs->b1;
1287 IDWTELEM *b2= cs->b2;
1288 IDWTELEM *b3= cs->b3;
1289 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1290 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83 1291
791e7b83 1292{START_TIMER
13705b69
MN
1293 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1294 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1295 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1296 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
791e7b83
MN
1297if(width>400){
1298STOP_TIMER("vertical_compose97i")}}
1299
1300{START_TIMER
059715a4
RE
1301 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1302 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
791e7b83
MN
1303if(width>400 && b0 <= b2){
1304STOP_TIMER("horizontal_compose97i")}}
f9e6ebf7
LM
1305
1306 cs->b0=b2;
1307 cs->b1=b3;
1308 cs->b2=b4;
1309 cs->b3=b5;
1310 cs->y += 2;
1311}
1312
d593e329 1313static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7
LM
1314 dwt_compose_t cs;
1315 spatial_compose97i_init(&cs, buffer, height, stride);
1316 while(cs.y <= height)
1317 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1318}
1319
ceaf1909 1320static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
a0d1931c
Y
1321 int level;
1322 for(level=decomposition_count-1; level>=0; level--){
1323 switch(type){
d4b287ed
LM
1324 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1325 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
a0d1931c
Y
1326 }
1327 }
1328}
1329
d593e329 1330static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1331 int level;
1332 for(level=decomposition_count-1; level>=0; level--){
1333 switch(type){
d4b287ed
LM
1334 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1335 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
f9e6ebf7 1336 }
791e7b83
MN
1337 }
1338}
1339
d593e329 1340static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
f9e6ebf7 1341 const int support = type==1 ? 3 : 5;
791e7b83 1342 int level;
f9e6ebf7 1343 if(type==2) return;
791e7b83 1344
46c281e8 1345 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1346 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1347 switch(type){
d4b287ed 1348 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
f9e6ebf7 1349 break;
d4b287ed 1350 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
f9e6ebf7 1351 break;
f9e6ebf7 1352 }
791e7b83
MN
1353 }
1354 }
1355}
1356
059715a4 1357static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
a0d1931c
Y
1358 const int support = type==1 ? 3 : 5;
1359 int level;
1360 if(type==2) return;
1361
1362 for(level=decomposition_count-1; level>=0; level--){
1363 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1364 switch(type){
d4b287ed 1365 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
a0d1931c 1366 break;
d4b287ed 1367 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
a0d1931c 1368 break;
a0d1931c
Y
1369 }
1370 }
1371 }
1372}
1373
d593e329 1374static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1375 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1376 int y;
1377 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1378 for(y=0; y<height; y+=4)
1379 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
f9e6ebf7
LM
1380}
1381
d593e329 1382static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1383 const int w= b->width;
1384 const int h= b->height;
1385 int x, y;
1386
791e7b83 1387 if(1){
791e7b83 1388 int run=0;
a8d73e56 1389 int runs[w*h];
791e7b83 1390 int run_index=0;
b44985ba 1391 int max_index;
115329f1 1392
791e7b83
MN
1393 for(y=0; y<h; y++){
1394 for(x=0; x<w; x++){
78486403 1395 int v, p=0;
6b2f6646 1396 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1397 v= src[x + y*stride];
791e7b83
MN
1398
1399 if(y){
a8d73e56 1400 t= src[x + (y-1)*stride];
791e7b83 1401 if(x){
a8d73e56 1402 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1403 }
1404 if(x + 1 < w){
a8d73e56 1405 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1406 }
1407 }
1408 if(x){
a8d73e56 1409 l= src[x - 1 + y*stride];
6b2f6646
MN
1410 /*if(x > 1){
1411 if(orientation==1) ll= src[y + (x-2)*stride];
1412 else ll= src[x - 2 + y*stride];
791e7b83
MN
1413 }*/
1414 }
78486403 1415 if(parent){
a8d73e56
MN
1416 int px= x>>1;
1417 int py= y>>1;
115329f1 1418 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1419 p= parent[px + py*2*stride];
1420 }
1421 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1422 if(v){
1423 runs[run_index++]= run;
1424 run=0;
1425 }else{
1426 run++;
1427 }
1428 }
1429 }
1430 }
b44985ba 1431 max_index= run_index;
791e7b83
MN
1432 runs[run_index++]= run;
1433 run_index=0;
1434 run= runs[run_index++];
1435
b44985ba
MN
1436 put_symbol2(&s->c, b->state[30], max_index, 0);
1437 if(run_index <= max_index)
1438 put_symbol2(&s->c, b->state[1], run, 3);
115329f1 1439
791e7b83 1440 for(y=0; y<h; y++){
d06c75a8 1441 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1442 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1443 return -1;
1444 }
791e7b83 1445 for(x=0; x<w; x++){
78486403 1446 int v, p=0;
6b2f6646 1447 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1448 v= src[x + y*stride];
791e7b83
MN
1449
1450 if(y){
a8d73e56 1451 t= src[x + (y-1)*stride];
791e7b83 1452 if(x){
a8d73e56 1453 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1454 }
1455 if(x + 1 < w){
a8d73e56 1456 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1457 }
1458 }
1459 if(x){
a8d73e56 1460 l= src[x - 1 + y*stride];
6b2f6646
MN
1461 /*if(x > 1){
1462 if(orientation==1) ll= src[y + (x-2)*stride];
1463 else ll= src[x - 2 + y*stride];
791e7b83
MN
1464 }*/
1465 }
78486403 1466 if(parent){
a8d73e56
MN
1467 int px= x>>1;
1468 int py= y>>1;
115329f1 1469 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1470 p= parent[px + py*2*stride];
1471 }
1472 if(/*ll|*/l|lt|t|rt|p){
c26abfa5 1473 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
6b2f6646 1474
28869757 1475 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1476 }else{
1477 if(!run){
1478 run= runs[run_index++];
4f4e9633 1479
b44985ba
MN
1480 if(run_index <= max_index)
1481 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1482 assert(v);
1483 }else{
1484 run--;
1485 assert(!v);
1486 }
1487 }
1488 if(v){
c26abfa5
DB
1489 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1490 int l2= 2*FFABS(l) + (l<0);
1491 int t2= 2*FFABS(t) + (t<0);
6b2f6646 1492
c26abfa5 1493 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
538a3841 1494 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1495 }
1496 }
1497 }
791e7b83 1498 }
0ecca7a4 1499 return 0;
791e7b83
MN
1500}
1501
d593e329 1502static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1503// encode_subband_qtree(s, b, src, parent, stride, orientation);
1504// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1505 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1506// encode_subband_dzr(s, b, src, parent, stride, orientation);
1507}
1508
a0d1931c 1509static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1510 const int w= b->width;
1511 const int h= b->height;
1512 int x,y;
115329f1 1513
791e7b83 1514 if(1){
b44985ba 1515 int run, runs;
cbb1d2b1
MN
1516 x_and_coeff *xc= b->x_coeff;
1517 x_and_coeff *prev_xc= NULL;
1518 x_and_coeff *prev2_xc= xc;
1519 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1520 x_and_coeff *prev_parent_xc= parent_xc;
791e7b83 1521
b44985ba
MN
1522 runs= get_symbol2(&s->c, b->state[30], 0);
1523 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1524 else run= INT_MAX;
1525
791e7b83 1526 for(y=0; y<h; y++){
0cea8a03
MN
1527 int v=0;
1528 int lt=0, t=0, rt=0;
1529
cbb1d2b1
MN
1530 if(y && prev_xc->x == 0){
1531 rt= prev_xc->coeff;
0cea8a03 1532 }
791e7b83 1533 for(x=0; x<w; x++){
0cea8a03
MN
1534 int p=0;
1535 const int l= v;
115329f1 1536
0cea8a03 1537 lt= t; t= rt;
791e7b83 1538
ff765159 1539 if(y){
cbb1d2b1
MN
1540 if(prev_xc->x <= x)
1541 prev_xc++;
1542 if(prev_xc->x == x + 1)
1543 rt= prev_xc->coeff;
ff765159
MN
1544 else
1545 rt=0;
1546 }
cbb1d2b1
MN
1547 if(parent_xc){
1548 if(x>>1 > parent_xc->x){
1549 parent_xc++;
7b49c309 1550 }
cbb1d2b1
MN
1551 if(x>>1 == parent_xc->x){
1552 p= parent_xc->coeff;
ff765159 1553 }
78486403
MN
1554 }
1555 if(/*ll|*/l|lt|t|rt|p){
c26abfa5 1556 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1557
28869757 1558 v=get_rac(&s->c, &b->state[0][context]);
3c096ac7
MN
1559 if(v){
1560 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1561 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 1562
cbb1d2b1
MN
1563 xc->x=x;
1564 (xc++)->coeff= v;
3c096ac7 1565 }
791e7b83
MN
1566 }else{
1567 if(!run){
b44985ba
MN
1568 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1569 else run= INT_MAX;
3c096ac7
MN
1570 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1571 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
115329f1 1572
cbb1d2b1
MN
1573 xc->x=x;
1574 (xc++)->coeff= v;
791e7b83 1575 }else{
99cd59e5 1576 int max_run;
791e7b83
MN
1577 run--;
1578 v=0;
3c1adccd 1579
cbb1d2b1 1580 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
99cd59e5 1581 else max_run= FFMIN(run, w-x-1);
cbb1d2b1
MN
1582 if(parent_xc)
1583 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
99cd59e5
MN
1584 x+= max_run;
1585 run-= max_run;
791e7b83
MN
1586 }
1587 }
7b49c309 1588 }
cbb1d2b1
MN
1589 (xc++)->x= w+1; //end marker
1590 prev_xc= prev2_xc;
1591 prev2_xc= xc;
115329f1 1592
cbb1d2b1 1593 if(parent_xc){
7b49c309 1594 if(y&1){
cbb1d2b1
MN
1595 while(parent_xc->x != parent->width+1)
1596 parent_xc++;
1597 parent_xc++;
1598 prev_parent_xc= parent_xc;
7b49c309 1599 }else{
cbb1d2b1 1600 parent_xc= prev_parent_xc;
791e7b83
MN
1601 }
1602 }
1603 }
a0d1931c 1604
cbb1d2b1 1605 (xc++)->x= w+1; //end marker
a0d1931c
Y
1606 }
1607}
1608
1609static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1610 const int w= b->width;
62ab0b78 1611 int y;
f66e4f5f 1612 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
c97de57c 1613 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1614 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1615 int new_index = 0;
115329f1 1616
a0d1931c
Y
1617 START_TIMER
1618
d593e329 1619 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
a0d1931c
Y
1620 qadd= 0;
1621 qmul= 1<<QEXPSHIFT;
1622 }
1623
1624 /* If we are on the second or later slice, restore our index. */
1625 if (start_y != 0)
1626 new_index = save_state[0];
1627
115329f1 1628
a0d1931c
Y
1629 for(y=start_y; y<h; y++){
1630 int x = 0;
1631 int v;
d593e329
MN
1632 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1633 memset(line, 0, b->width*sizeof(IDWTELEM));
a0d1931c
Y
1634 v = b->x_coeff[new_index].coeff;
1635 x = b->x_coeff[new_index++].x;
1636 while(x < w)
1637 {
538a3841
MN
1638 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1639 register int u= -(v&1);
1640 line[x] = (t^u) - u;
1641
a0d1931c
Y
1642 v = b->x_coeff[new_index].coeff;
1643 x = b->x_coeff[new_index++].x;
1644 }
791e7b83 1645 }
a0d1931c
Y
1646 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1647 STOP_TIMER("decode_subband")
1648 }
115329f1 1649
a0d1931c
Y
1650 /* Save our variables for the next slice. */
1651 save_state[0] = new_index;
115329f1 1652
a0d1931c 1653 return;
791e7b83
MN
1654}
1655
396a5e68 1656static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
791e7b83
MN
1657 int plane_index, level, orientation;
1658
19aa028d 1659 for(plane_index=0; plane_index<3; plane_index++){
4f90f33a 1660 for(level=0; level<MAX_DECOMPOSITIONS; level++){
791e7b83 1661 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1662 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1663 }
1664 }
1665 }
28869757
MN
1666 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1667 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1668}
1669
1670static int alloc_blocks(SnowContext *s){
1671 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1672 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 1673
155ec6ed
MN
1674 s->b_width = w;
1675 s->b_height= h;
115329f1 1676
155ec6ed
MN
1677 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1678 return 0;
1679}
1680
28869757
MN
1681static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1682 uint8_t *bytestream= d->bytestream;
1683 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1684 *d= *s;
28869757
MN
1685 d->bytestream= bytestream;
1686 d->bytestream_start= bytestream_start;
155ec6ed
MN
1687}
1688
1689//near copy & paste from dsputil, FIXME
1690static int pix_sum(uint8_t * pix, int line_size, int w)
1691{
1692 int s, i, j;
1693
1694 s = 0;
1695 for (i = 0; i < w; i++) {
1696 for (j = 0; j < w; j++) {
1697 s += pix[0];
1698 pix ++;
1699 }
1700 pix += line_size - w;
1701 }
1702 return s;
1703}
1704
1705//near copy & paste from dsputil, FIXME
1706static int pix_norm1(uint8_t * pix, int line_size, int w)
1707{
1708 int s, i, j;
1d503957 1709 uint32_t *sq = ff_squareTbl + 256;
155ec6ed
MN
1710
1711 s = 0;
1712 for (i = 0; i < w; i++) {
1713 for (j = 0; j < w; j ++) {
1714 s += sq[pix[0]];
1715 pix ++;
1716 }
1717 pix += line_size - w;
1718 }
1719 return s;
1720}
1721
8c36eaaa 1722static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
155ec6ed
MN
1723 const int w= s->b_width << s->block_max_depth;
1724 const int rem_depth= s->block_max_depth - level;
1725 const int index= (x + y*w) << rem_depth;
1726 const int block_w= 1<<rem_depth;
1727 BlockNode block;
1728 int i,j;
115329f1 1729
155ec6ed
MN
1730 block.color[0]= l;
1731 block.color[1]= cb;
1732 block.color[2]= cr;
1733 block.mx= mx;
1734 block.my= my;
8c36eaaa 1735 block.ref= ref;
155ec6ed
MN
1736 block.type= type;
1737 block.level= level;
1738
1739 for(j=0; j<block_w; j++){
1740 for(i=0; i<block_w; i++){
1741 s->block[index + i + j*w]= block;
1742 }
1743 }
1744}
1745
1746static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1747 const int offset[3]= {
1748 y*c-> stride + x,
1749 ((y*c->uvstride + x)>>1),
1750 ((y*c->uvstride + x)>>1),
1751 };
1752 int i;
1753 for(i=0; i<3; i++){
1754 c->src[0][i]= src [i];
1755 c->ref[0][i]= ref [i] + offset[i];
1756 }
1757 assert(!ref_index);
1758}
1759
85fc0e75 1760static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
aadcc5ce 1761 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
85fc0e75
LM
1762 if(s->ref_frames == 1){
1763 *mx = mid_pred(left->mx, top->mx, tr->mx);
1764 *my = mid_pred(left->my, top->my, tr->my);
1765 }else{
1766 const int *scale = scale_mv_ref[ref];
6884c36c
PI
1767 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1768 (top ->mx * scale[top ->ref] + 128) >>8,
1769 (tr ->mx * scale[tr ->ref] + 128) >>8);
1770 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1771 (top ->my * scale[top ->ref] + 128) >>8,
1772 (tr ->my * scale[tr ->ref] + 128) >>8);
85fc0e75
LM
1773 }
1774}
1775
155ec6ed
MN
1776//FIXME copy&paste
1777#define P_LEFT P[1]
1778#define P_TOP P[2]
1779#define P_TOPRIGHT P[3]
1780#define P_MEDIAN P[4]
1781#define P_MV1 P[9]
1782#define FLAG_QPEL 1 //must be 1
1783
1784static int encode_q_branch(SnowContext *s, int level, int x, int y){
1785 uint8_t p_buffer[1024];
1786 uint8_t i_buffer[1024];
1787 uint8_t p_state[sizeof(s->block_state)];
1788 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
1789 RangeCoder pc, ic;
1790 uint8_t *pbbak= s->c.bytestream;
1791 uint8_t *pbbak_start= s->c.bytestream_start;
1e6b5700 1792 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
155ec6ed
MN
1793 const int w= s->b_width << s->block_max_depth;
1794 const int h= s->b_height << s->block_max_depth;
1795 const int rem_depth= s->block_max_depth - level;
1796 const int index= (x + y*w) << rem_depth;
1797 const int block_w= 1<<(LOG2_MB_SIZE - level);
155ec6ed
MN
1798 int trx= (x+1)<<rem_depth;
1799 int try= (y+1)<<rem_depth;
aadcc5ce
PI
1800 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1801 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1802 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1803 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1804 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1805 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed
MN
1806 int pl = left->color[0];
1807 int pcb= left->color[1];
1808 int pcr= left->color[2];
85fc0e75 1809 int pmx, pmy;
155ec6ed 1810 int mx=0, my=0;
51d6a3cf 1811 int l,cr,cb;
155ec6ed
MN
1812 const int stride= s->current_picture.linesize[0];
1813 const int uvstride= s->current_picture.linesize[1];
51d6a3cf
MN
1814 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1815 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1816 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
155ec6ed
MN
1817 int P[10][2];
1818 int16_t last_mv[3][2];
1819 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1820 const int shift= 1+qpel;
1821 MotionEstContext *c= &s->m.me;
8c36eaaa 1822 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
1823 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1824 int my_context= av_log2(2*FFABS(left->my - top->my));
155ec6ed 1825 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
8c36eaaa 1826 int ref, best_ref, ref_score, ref_mx, ref_my;
155ec6ed
MN
1827
1828 assert(sizeof(s->block_state) >= 256);
1829 if(s->keyframe){
85fc0e75 1830 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
155ec6ed
MN
1831 return 0;
1832 }
1833
155ec6ed
MN
1834// clip predictors / edge ?
1835
1836 P_LEFT[0]= left->mx;
1837 P_LEFT[1]= left->my;
1838 P_TOP [0]= top->mx;
1839 P_TOP [1]= top->my;
1840 P_TOPRIGHT[0]= tr->mx;
1841 P_TOPRIGHT[1]= tr->my;
115329f1 1842
155ec6ed
MN
1843 last_mv[0][0]= s->block[index].mx;
1844 last_mv[0][1]= s->block[index].my;
1845 last_mv[1][0]= right->mx;
1846 last_mv[1][1]= right->my;
1847 last_mv[2][0]= bottom->mx;
1848 last_mv[2][1]= bottom->my;
115329f1 1849
155ec6ed 1850 s->m.mb_stride=2;
115329f1 1851 s->m.mb_x=
155ec6ed 1852 s->m.mb_y= 0;
e2158da8 1853 c->skip= 0;
155ec6ed 1854
e2158da8
PI
1855 assert(c-> stride == stride);
1856 assert(c->uvstride == uvstride);
115329f1 1857
155ec6ed
MN
1858 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1859 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1860 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1861 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
115329f1 1862
ff158dc9
MN
1863 c->xmin = - x*block_w - 16+2;
1864 c->ymin = - y*block_w - 16+2;
1865 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1866 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
1867
1868 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
115329f1 1869 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
155ec6ed
MN
1870 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1871 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1872 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1873 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1874 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1875
1876 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1877 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1878
1879 if (!y) {
1880 c->pred_x= P_LEFT[0];
1881 c->pred_y= P_LEFT[1];
1882 } else {
1883 c->pred_x = P_MEDIAN[0];
1884 c->pred_y = P_MEDIAN[1];
1885 }
1886
8c36eaaa
LM
1887 score= INT_MAX;
1888 best_ref= 0;
1889 for(ref=0; ref<s->ref_frames; ref++){
1890 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1891
1892 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1893 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
155ec6ed 1894
8c36eaaa
LM
1895 assert(ref_mx >= c->xmin);
1896 assert(ref_mx <= c->xmax);
1897 assert(ref_my >= c->ymin);
1898 assert(ref_my <= c->ymax);
115329f1 1899
e2158da8 1900 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
8c36eaaa
LM
1901 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1902 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1903 if(s->ref_mvs[ref]){
1904 s->ref_mvs[ref][index][0]= ref_mx;
1905 s->ref_mvs[ref][index][1]= ref_my;
1906 s->ref_scores[ref][index]= ref_score;
1907 }
1908 if(score > ref_score){
1909 score= ref_score;
1910 best_ref= ref;
1911 mx= ref_mx;
1912 my= ref_my;
1913 }
1914 }
755bfeab 1915 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
115329f1 1916
155ec6ed 1917 // subpel search
61d49d12 1918 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
155ec6ed 1919 pc= s->c;
28869757
MN
1920 pc.bytestream_start=
1921 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1922 memcpy(p_state, s->block_state, sizeof(s->block_state));
1923
1924 if(level!=s->block_max_depth)
28869757
MN
1925 put_rac(&pc, &p_state[4 + s_context], 1);
1926 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
8c36eaaa
LM
1927 if(s->ref_frames > 1)
1928 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
85fc0e75 1929 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
8c36eaaa
LM
1930 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1931 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
28869757 1932 p_len= pc.bytestream - pc.bytestream_start;
1e6b5700 1933 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1934
1935 block_s= block_w*block_w;
51d6a3cf 1936 sum = pix_sum(current_data[0], stride, block_w);
155ec6ed 1937 l= (sum + block_s/2)/block_s;
51d6a3cf 1938 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
115329f1 1939
155ec6ed 1940 block_s= block_w*block_w>>2;
51d6a3cf 1941 sum = pix_sum(current_data[1], uvstride, block_w>>1);
155ec6ed
MN
1942 cb= (sum + block_s/2)/block_s;
1943// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
51d6a3cf 1944 sum = pix_sum(current_data[2], uvstride, block_w>>1);
155ec6ed
MN
1945 cr= (sum + block_s/2)/block_s;
1946// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1947
1948 ic= s->c;
28869757
MN
1949 ic.bytestream_start=
1950 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1951 memcpy(i_state, s->block_state, sizeof(s->block_state));
1952 if(level!=s->block_max_depth)
28869757
MN
1953 put_rac(&ic, &i_state[4 + s_context], 1);
1954 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
1955 put_symbol(&ic, &i_state[32], l-pl , 1);
1956 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1957 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757 1958 i_len= ic.bytestream - ic.bytestream_start;
1e6b5700 1959 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1960
1961// assert(score==256*256*256*64-1);
1962 assert(iscore < 255*255*256 + s->lambda2*10);
1963 assert(iscore >= 0);
1964 assert(l>=0 && l<=255);
1965 assert(pl>=0 && pl<=255);
1966
1967 if(level==0){
1968 int varc= iscore >> 8;
1969 int vard= score >> 8;
1970 if (vard <= 64 || vard < varc)
1971 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1972 else
1973 c->scene_change_score+= s->m.qscale;
1974 }
115329f1 1975
155ec6ed 1976 if(level!=s->block_max_depth){
28869757 1977 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
1978 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1979 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1980 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1981 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1982 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
115329f1 1983
155ec6ed
MN
1984 if(score2 < score && score2 < iscore)
1985 return score2;
1986 }
115329f1 1987
155ec6ed 1988 if(iscore < score){
85fc0e75 1989 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
28869757 1990 memcpy(pbbak, i_buffer, i_len);
155ec6ed 1991 s->c= ic;
28869757
MN
1992 s->c.bytestream_start= pbbak_start;
1993 s->c.bytestream= pbbak + i_len;
8c36eaaa 1994 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
155ec6ed
MN
1995 memcpy(s->block_state, i_state, sizeof(s->block_state));
1996 return iscore;
1997 }else{
28869757 1998 memcpy(pbbak, p_buffer, p_len);
155ec6ed 1999 s->c= pc;
28869757
MN
2000 s->c.bytestream_start= pbbak_start;
2001 s->c.bytestream= pbbak + p_len;
8c36eaaa 2002 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
155ec6ed
MN
2003 memcpy(s->block_state, p_state, sizeof(s->block_state));
2004 return score;
2005 }
2006}
2007
849f1035 2008static av_always_inline int same_block(BlockNode *a, BlockNode *b){
51d6a3cf
MN
2009 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2010 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2011 }else{
8c36eaaa 2012 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
51d6a3cf
MN
2013 }
2014}
2015
2016static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2017 const int w= s->b_width << s->block_max_depth;
2018 const int rem_depth= s->block_max_depth - level;
2019 const int index= (x + y*w) << rem_depth;
2020 int trx= (x+1)<<rem_depth;
2021 BlockNode *b= &s->block[index];
aadcc5ce
PI
2022 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2023 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2024 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2025 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
51d6a3cf
MN
2026 int pl = left->color[0];
2027 int pcb= left->color[1];
2028 int pcr= left->color[2];
85fc0e75 2029 int pmx, pmy;
8c36eaaa 2030 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
2031 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2032 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
51d6a3cf
MN
2033 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2034
2035 if(s->keyframe){
85fc0e75 2036 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
51d6a3cf
MN
2037 return;
2038 }
2039
2040 if(level!=s->block_max_depth){
2041 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
d6f41eed
MN
2042 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2043 }else{
51d6a3cf
MN
2044 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2045 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2046 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2047 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2048 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2049 return;
51d6a3cf
MN
2050 }
2051 }
2052 if(b->type & BLOCK_INTRA){
85fc0e75 2053 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
51d6a3cf
MN
2054 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2055 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2056 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2057 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
8c36eaaa 2058 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
51d6a3cf 2059 }else{
85fc0e75 2060 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
51d6a3cf 2061 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
8c36eaaa
LM
2062 if(s->ref_frames > 1)
2063 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
51d6a3cf
MN
2064 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2065 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
8c36eaaa 2066 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
51d6a3cf
MN
2067 }
2068}
2069
155ec6ed
MN
2070static void decode_q_branch(SnowContext *s, int level, int x, int y){
2071 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2072 const int rem_depth= s->block_max_depth - level;
2073 const int index= (x + y*w) << rem_depth;
155ec6ed 2074 int trx= (x+1)<<rem_depth;
aadcc5ce
PI
2075 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2076 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2077 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2078 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed 2079 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 2080
155ec6ed 2081 if(s->keyframe){
8c36eaaa 2082 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
155ec6ed
MN
2083 return;
2084 }
2085
28869757 2086 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1e90b34f 2087 int type, mx, my;
155ec6ed
MN
2088 int l = left->color[0];
2089 int cb= left->color[1];
2090 int cr= left->color[2];
8c36eaaa
LM
2091 int ref = 0;
2092 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
2093 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2094 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
115329f1 2095
28869757 2096 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2097
2098 if(type){
85fc0e75 2099 pred_mv(s, &mx, &my, 0, left, top, tr);
155ec6ed
MN
2100 l += get_symbol(&s->c, &s->block_state[32], 1);
2101 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2102 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2103 }else{
8c36eaaa
LM
2104 if(s->ref_frames > 1)
2105 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
85fc0e75 2106 pred_mv(s, &mx, &my, ref, left, top, tr);
8c36eaaa
LM
2107 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2108 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
155ec6ed 2109 }
8c36eaaa 2110 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
155ec6ed
MN
2111 }else{
2112 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2113 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2114 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2115 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2116 }
2117}
2118
74e6a8aa 2119static void encode_blocks(SnowContext *s, int search){
155ec6ed
MN
2120 int x, y;
2121 int w= s->b_width;
2122 int h= s->b_height;
2123
74e6a8aa 2124 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
51d6a3cf
MN
2125 iterative_me(s);
2126
155ec6ed 2127 for(y=0; y<h; y++){
d06c75a8 2128 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2129 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2130 return;
2131 }
155ec6ed 2132 for(x=0; x<w; x++){
74e6a8aa 2133 if(s->avctx->me_method == ME_ITER || !search)
51d6a3cf
MN
2134 encode_q_branch2(s, 0, x, y);
2135 else
2136 encode_q_branch (s, 0, x, y);
155ec6ed
MN
2137 }
2138 }
2139}
2140
2141static void decode_blocks(SnowContext *s){
2142 int x, y;
2143 int w= s->b_width;
2144 int h= s->b_height;
2145
2146 for(y=0; y<h; y++){
2147 for(x=0; x<w; x++){
2148 decode_q_branch(s, 0, x, y);
2149 }
2150 }
791e7b83
MN
2151}
2152
7d7f57d9 2153static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
a68ca08e
MN
2154 const static uint8_t weight[64]={
2155 8,7,6,5,4,3,2,1,
2156 7,7,0,0,0,0,0,1,
2157 6,0,6,0,0,0,2,0,
2158 5,0,0,5,0,3,0,0,
2159 4,0,0,0,4,0,0,0,
2160 3,0,0,5,0,3,0,0,
2161 2,0,6,0,0,0,2,0,
2162 1,7,0,0,0,0,0,1,
2163 };
2164
2165 const static uint8_t brane[256]={
2166 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2167 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2168 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2169 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2170 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2171 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2172 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2173 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2174 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2175 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2176 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2177 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2178 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2179 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2180 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2181 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2182 };
2183
2184 const static uint8_t needs[16]={
2185 0,1,0,0,
2186 2,4,2,0,
2187 0,1,0,0,
2188 15
2189 };
2190
2191 int x, y, b, r, l;
61d6e445
MN
2192 int16_t tmpIt [64*(32+HTAPS_MAX)];
2193 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
a68ca08e
MN
2194 int16_t *tmpI= tmpIt;
2195 uint8_t *tmp2= tmp2t[0];
2196 uint8_t *hpel[11];
3924dac4 2197START_TIMER
a68ca08e
MN
2198 assert(dx<16 && dy<16);
2199 r= brane[dx + 16*dy]&15;
2200 l= brane[dx + 16*dy]>>4;
2201
2202 b= needs[l] | needs[r];
7d7f57d9
MN
2203 if(p && !p->diag_mc)
2204 b= 15;
a68ca08e
MN
2205
2206 if(b&5){
61d6e445 2207 for(y=0; y < b_h+HTAPS_MAX-1; y++){
65dc0f53 2208 for(x=0; x < b_w; x++){
61d6e445
MN
2209 int a_1=src[x + HTAPS_MAX/2-4];
2210 int a0= src[x + HTAPS_MAX/2-3];
2211 int a1= src[x + HTAPS_MAX/2-2];
2212 int a2= src[x + HTAPS_MAX/2-1];
2213 int a3= src[x + HTAPS_MAX/2+0];
2214 int a4= src[x + HTAPS_MAX/2+1];
2215 int a5= src[x + HTAPS_MAX/2+2];
2216 int a6= src[x + HTAPS_MAX/2+3];
7d7f57d9
MN
2217 int am=0;
2218 if(!p || p->fast_mc){
2219 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2220 tmpI[x]= am;
2221 am= (am+16)>>5;
2222 }else{
2223 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2224 tmpI[x]= am;
2225 am= (am+32)>>6;
2226 }
791e7b83 2227
65dc0f53
MN
2228 if(am&(~255)) am= ~(am>>31);
2229 tmp2[x]= am;
2230 }
2231 tmpI+= 64;
2232 tmp2+= stride;
2233 src += stride;
791e7b83 2234 }
65dc0f53 2235 src -= stride*y;
a68ca08e 2236 }
61d6e445 2237 src += HTAPS_MAX/2 - 1;
a68ca08e 2238 tmp2= tmp2t[1];
115329f1 2239
a68ca08e 2240 if(b&2){
65dc0f53
MN
2241 for(y=0; y < b_h; y++){
2242 for(x=0; x < b_w+1; x++){
61d6e445
MN
2243 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2244 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2245 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2246 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2247 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2248 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2249 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2250 int a6= src[x + (HTAPS_MAX/2+3)*stride];
7d7f57d9
MN
2251 int am=0;
2252 if(!p || p->fast_mc)
2253 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2254 else
2255 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
791e7b83 2256
65dc0f53
MN
2257 if(am&(~255)) am= ~(am>>31);
2258 tmp2[x]= am;
2259 }
2260 src += stride;
2261 tmp2+= stride;
a68ca08e 2262 }
65dc0f53 2263 src -= stride*y;
a68ca08e 2264 }
61d6e445 2265 src += stride*(HTAPS_MAX/2 - 1);
a68ca08e
MN
2266 tmp2= tmp2t[2];
2267 tmpI= tmpIt;
2268 if(b&4){
2269 for(y=0; y < b_h; y++){
2270 for(x=0; x < b_w; x++){
61d6e445
MN
2271 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2272 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2273 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2274 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2275 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2276 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2277 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2278 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
7d7f57d9
MN
2279 int am=0;
2280 if(!p || p->fast_mc)
2281 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2282 else
2283 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
a68ca08e
MN
2284 if(am&(~255)) am= ~(am>>31);
2285 tmp2[x]= am;
2286 }
2287 tmpI+= 64;
2288 tmp2+= stride;
2289 }
2290 }
115329f1 2291
a68ca08e 2292 hpel[ 0]= src;
61d6e445 2293 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
a68ca08e
MN
2294 hpel[ 2]= src + 1;
2295
2296 hpel[ 4]= tmp2t[1];
2297 hpel[ 5]= tmp2t[2];
2298 hpel[ 6]= tmp2t[1] + 1;
2299
2300 hpel[ 8]= src + stride;
2301 hpel[ 9]= hpel[1] + stride;
2302 hpel[10]= hpel[8] + 1;
2303
2304 if(b==15){
2305 uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2306 uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2307 uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2308 uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2309 dx&=7;
2310 dy&=7;
2311 for(y=0; y < b_h; y++){
2312 for(x=0; x < b_w; x++){
2313 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2314 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2315 }
2316 src1+=stride;
2317 src2+=stride;
2318 src3+=stride;
2319 src4+=stride;
2320 dst +=stride;
2321 }
2322 }else{
2323 uint8_t *src1= hpel[l];
2324 uint8_t *src2= hpel[r];
2325 int a= weight[((dx&7) + (8*(dy&7)))];
2326 int b= 8-a;
2327 for(y=0; y < b_h; y++){
2328 for(x=0; x < b_w; x++){
2329 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2330 }
2331 src1+=stride;
2332 src2+=stride;
2333 dst +=stride;
791e7b83
MN
2334 }
2335 }
3924dac4 2336STOP_TIMER("mc_block")
791e7b83
MN
2337}
2338
791e7b83 2339#define mca(dx,dy,b_w)\
bad700e3 2340static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
61d6e445 2341 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
791e7b83 2342 assert(h==b_w);\
61d6e445 2343 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
791e7b83
MN
2344}
2345
2346mca( 0, 0,16)
2347mca( 8, 0,16)
2348mca( 0, 8,16)
2349mca( 8, 8,16)
d92b5807
MN
2350mca( 0, 0,8)
2351mca( 8, 0,8)
2352mca( 0, 8,8)
2353mca( 8, 8,8)
791e7b83 2354
8c36eaaa 2355static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 2356 if(block->type & BLOCK_INTRA){
ff158dc9 2357 int x, y;
2692ceab
MN
2358 const int color = block->color[plane_index];
2359 const int color4= color*0x01010101;
1015631b
LM
2360 if(b_w==32){
2361 for(y=0; y < b_h; y++){
2362 *(uint32_t*)&dst[0 + y*stride]= color4;
2363 *(uint32_t*)&dst[4 + y*stride]= color4;
2364 *(uint32_t*)&dst[8 + y*stride]= color4;
2365 *(uint32_t*)&dst[12+ y*stride]= color4;
2366 *(uint32_t*)&dst[16+ y*stride]= color4;
2367 *(uint32_t*)&dst[20+ y*stride]= color4;
2368 *(uint32_t*)&dst[24+ y*stride]= color4;
2369 *(uint32_t*)&dst[28+ y*stride]= color4;
2370 }
2371 }else if(b_w==16){
2692ceab
MN
2372 for(y=0; y < b_h; y++){
2373 *(uint32_t*)&dst[0 + y*stride]= color4;
2374 *(uint32_t*)&dst[4 + y*stride]= color4;
2375 *(uint32_t*)&dst[8 + y*stride]= color4;
2376 *(uint32_t*)&dst[12+ y*stride]= color4;
2377 }
2378 }else if(b_w==8){
2379 for(y=0; y < b_h; y++){
2380 *(uint32_t*)&dst[0 + y*stride]= color4;
2381 *(uint32_t*)&dst[4 + y*stride]= color4;
2382 }
2383 }else if(b_w==4){
2384 for(y=0; y < b_h; y++){
2385 *(uint32_t*)&dst[0 + y*stride]= color4;
2386 }
2387 }else{
2388 for(y=0; y < b_h; y++){
2389 for(x=0; x < b_w; x++){
2390 dst[x + y*stride]= color;
2391 }
ff158dc9
MN
2392 }
2393 }
2394 }else{
8c36eaaa 2395 uint8_t *src= s->last_picture[block->ref].data[plane_index];
ff158dc9
MN
2396 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2397 int mx= block->mx*scale;
2398 int my= block->my*scale;
ec697587
MN
2399 const int dx= mx&15;
2400 const int dy= my&15;
80e44bc3 2401 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
61d6e445
MN
2402 sx += (mx>>4) - (HTAPS_MAX/2-1);
2403 sy += (my>>4) - (HTAPS_MAX/2-1);
ff158dc9 2404 src += sx + sy*stride;
61d6e445
MN
2405 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2406 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2407 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
ff158dc9
MN
2408 src= tmp + MB_SIZE;
2409 }
87f20c2f
MN
2410// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2411// assert(!(b_w&(b_w-1)));
2692ceab 2412 assert(b_w>1 && b_h>1);
1015631b 2413 assert(tab_index>=0 && tab_index<4 || b_w==32);
7d7f57d9
MN
2414 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2415 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1015631b
LM
2416 else if(b_w==32){
2417 int y;
2418 for(y=0; y<b_h; y+=16){
7d7f57d9
MN
2419 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2420 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1015631b
LM
2421 }
2422 }else if(b_w==b_h)
7d7f57d9 2423 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2692ceab 2424 else if(b_w==2*b_h){
7d7f57d9
MN
2425 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2426 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2692ceab
MN
2427 }else{
2428 assert(2*b_w==b_h);
7d7f57d9
MN
2429 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2430 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2692ceab 2431 }
ff158dc9
MN
2432 }
2433}
2434
9dd6c804 2435void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
059715a4
RE
2436 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2437 int y, x;
d593e329 2438 IDWTELEM * dst;
059715a4 2439 for(y=0; y<b_h; y++){
19032450 2440 //FIXME ugly misuse of obmc_stride
9dd6c804
PI
2441 const uint8_t *obmc1= obmc + y*obmc_stride;
2442 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2443 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2444 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
059715a4
RE
2445 dst = slice_buffer_get_line(sb, src_y + y);
2446 for(x=0; x<b_w; x++){
2447 int v= obmc1[x] * block[3][x + y*src_stride]
2448 +obmc2[x] * block[2][x + y*src_stride]
2449 +obmc3[x] * block[1][x + y*src_stride]
2450 +obmc4[x] * block[0][x + y*src_stride];
2451
2452 v <<= 8 - LOG2_OBMC_MAX;
2453 if(FRAC_BITS != 8){
059715a4
RE
2454 v >>= 8 - FRAC_BITS;
2455 }
2456 if(add){
2457 v += dst[x + src_x];
2458 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2459 if(v&(~255)) v= ~(v>>31);
2460 dst8[x + y*src_stride] = v;
2461 }else{
2462 dst[x + src_x] -= v;
2463 }
2464 }
2465 }
2466}
2467
ff158dc9 2468//FIXME name clenup (b_w, block_w, b_width stuff)
d593e329 2469static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
a0d1931c
Y
2470 const int b_width = s->b_width << s->block_max_depth;
2471 const int b_height= s->b_height << s->block_max_depth;
2472 const int b_stride= b_width;
2473 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2474 BlockNode *rt= lt+1;
2475 BlockNode *lb= lt+b_stride;
2476 BlockNode *rb= lb+1;
115329f1 2477 uint8_t *block[4];
cc884a35
MN
2478 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2479 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2480 uint8_t *ptmp;
a0d1931c
Y
2481 int x,y;
2482
2483 if(b_x<0){
2484 lt= rt;
2485 lb= rb;
2486 }else if(b_x + 1 >= b_width){
2487 rt= lt;
2488 rb= lb;
2489 }
2490 if(b_y<0){
2491 lt= lb;
2492 rt= rb;
2493 }else if(b_y + 1 >= b_height){
2494 lb= lt;
2495 rb= rt;
2496 }
115329f1 2497
a0d1931c
Y
2498 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2499 obmc -= src_x;
2500 b_w += src_x;
f7e89c73 2501 if(!sliced && !offset_dst)
1015631b 2502 dst -= src_x;
ff158dc9
MN
2503 src_x=0;
2504 }else if(src_x + b_w > w){
2505 b_w = w - src_x;
2506 }
2507 if(src_y<0){
2508 obmc -= src_y*obmc_stride;
2509 b_h += src_y;
f7e89c73 2510 if(!sliced && !offset_dst)
1015631b 2511 dst -= src_y*dst_stride;
ff158dc9
MN
2512 src_y=0;
2513 }else if(src_y + b_h> h){
2514 b_h = h - src_y;
791e7b83 2515 }
115329f1 2516
ff158dc9 2517 if(b_w<=0 || b_h<=0) return;
155ec6ed 2518
cc884a35 2519assert(src_stride > 2*MB_SIZE + 5);
f7e89c73 2520 if(!sliced && offset_dst)
1015631b 2521 dst += src_x + src_y*dst_stride;
715a97f0 2522 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2523// src += src_x + src_y*src_stride;
2524
cc884a35
MN
2525 ptmp= tmp + 3*tmp_step;
2526 block[0]= ptmp;
2527 ptmp+=tmp_step;
8c36eaaa 2528 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
2529
2530 if(same_block(lt, rt)){
2531 block[1]= block[0];
791e7b83 2532 }else{
cc884a35
MN
2533 block[1]= ptmp;
2534 ptmp+=tmp_step;
8c36eaaa 2535 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
ff158dc9 2536 }
115329f1 2537
ff158dc9
MN
2538 if(same_block(lt, lb)){
2539 block[2]= block[0];
2540 }else if(same_block(rt, lb)){
2541 block[2]= block[1];
2542 }else{
cc884a35
MN
2543 block[2]= ptmp;
2544 ptmp+=tmp_step;
8c36eaaa 2545 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
ff158dc9 2546 }
791e7b83 2547
ff158dc9
MN
2548 if(same_block(lt, rb) ){
2549 block[3]= block[0];
2550 }else if(same_block(rt, rb)){
2551 block[3]= block[1];
2552 }else if(same_block(lb, rb)){
2553 block[3]= block[2];
2554 }else{
cc884a35 2555 block[3]= ptmp;
8c36eaaa 2556 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
ff158dc9
MN
2557 }
2558#if 0
2559 for(y=0; y<b_h; y++){
2560 for(x=0; x<b_w; x++){
2561 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2562 if(add) dst[x + y*dst_stride] += v;
2563 else dst[x + y*dst_stride] -= v;
2564 }
2565 }
2566 for(y=0; y<b_h; y++){
2567 uint8_t *obmc2= obmc + (obmc_stride>>1);
2568 for(x=0; x<b_w; x++){
2569 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2570 if(add) dst[x + y*dst_stride] += v;
2571 else dst[x + y*dst_stride] -= v;
2572 }
2573 }
2574 for(y=0; y<b_h; y++){
2575 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2576 for(x=0; x<b_w; x++){
2577 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2578 if(add) dst[x + y*dst_stride] += v;
2579 else dst[x + y*dst_stride] -= v;
2580 }
2581 }
2582 for(y=0; y<b_h; y++){
2583 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2584 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2585 for(x=0; x<b_w; x++){
2586 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2587 if(add) dst[x + y*dst_stride] += v;
2588 else dst[x + y*dst_stride] -= v;
2589 }
2590 }
2591#else
f7e89c73
LM
2592 if(sliced){
2593 START_TIMER
2594
2595 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2596 STOP_TIMER("inner_add_yblock")
2597 }else
ff158dc9 2598 for(y=0; y<b_h; y++){
19032450 2599 //FIXME ugly misuse of obmc_stride
9dd6c804
PI
2600 const uint8_t *obmc1= obmc + y*obmc_stride;
2601 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2602 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2603 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
ff158dc9
MN
2604 for(x=0; x<b_w; x++){
2605 int v= obmc1[x] * block[3][x + y*src_stride]
2606 +obmc2[x] * block[2][x + y*src_stride]
2607 +obmc3[x] * block[1][x + y*src_stride]
2608 +obmc4[x] * block[0][x + y*src_stride];
115329f1 2609
715a97f0 2610 v <<= 8 - LOG2_OBMC_MAX;
034aff03 2611 if(FRAC_BITS != 8){
034aff03
MN
2612 v >>= 8 - FRAC_BITS;
2613 }
715a97f0
MN
2614 if(add){
2615 v += dst[x + y*dst_stride];
2616 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2617 if(v&(~255)) v= ~(v>>31);
2618 dst8[x + y*src_stride] = v;
2619 }else{
2620 dst[x + y*dst_stride] -= v;
2621 }
791e7b83
MN
2622 }
2623 }
96e2fbf2 2624#endif /* 0 */
791e7b83
MN
2625}
2626
d593e329 2627static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
a0d1931c
Y
2628 Plane *p= &s->plane[plane_index];
2629 const int mb_w= s->b_width << s->block_max_depth;
2630 const int mb_h= s->b_height << s->block_max_depth;
2631 int x, y, mb_x;
2632 int block_size = MB_SIZE >> s->block_max_depth;
2633 int block_w = plane_index ? block_size/2 : block_size;
2634 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2635 int obmc_stride= plane_index ? block_size : 2*block_size;
2636 int ref_stride= s->current_picture.linesize[plane_index];
a0d1931c
Y
2637 uint8_t *dst8= s->current_picture.data[plane_index];
2638 int w= p->width;
2639 int h= p->height;
2640 START_TIMER
115329f1 2641
a0d1931c
Y
2642 if(s->keyframe || (s->avctx->debug&512)){
2643 if(mb_y==mb_h)
2644 return;
2645
2646 if(add){
86e59cc0 2647 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2648 {
2649// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 2650 IDWTELEM * line = sb->line[y];
a0d1931c
Y
2651 for(x=0; x<w; x++)
2652 {
2653// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2654 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2655 v >>= FRAC_BITS;
2656 if(v&(~255)) v= ~(v>>31);
2657 dst8[x + y*ref_stride]= v;
2658 }
2659 }
2660 }else{
86e59cc0 2661 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2662 {
2663// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 2664 IDWTELEM * line = sb->line[y];
a0d1931c
Y
2665 for(x=0; x<w; x++)
2666 {
2667 line[x] -= 128 << FRAC_BITS;
2668// buf[x + y*w]-= 128<<FRAC_BITS;
2669 }
2670 }
2671 }
2672
2673 return;
2674 }
115329f1 2675
a0d1931c
Y
2676 for(mb_x=0; mb_x<=mb_w; mb_x++){
2677 START_TIMER
2678
f7e89c73 2679 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
a0d1931c
Y
2680 block_w*mb_x - block_w/2,
2681 block_w*mb_y - block_w/2,
2682 block_w, block_w,
2683 w, h,
2684 w, ref_stride, obmc_stride,
2685 mb_x - 1, mb_y - 1,
f7e89c73 2686 add, 0, plane_index);
115329f1 2687
a0d1931c
Y
2688 STOP_TIMER("add_yblock")
2689 }
115329f1 2690
a0d1931c
Y
2691 STOP_TIMER("predict_slice")
2692}
2693
d593e329 2694static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2695 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2696 const int mb_w= s->b_width << s->block_max_depth;
2697 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2698 int x, y, mb_x;
155ec6ed
MN
2699 int block_size = MB_SIZE >> s->block_max_depth;
2700 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2701 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 2702 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2703 int ref_stride= s->current_picture.linesize[plane_index];
715a97f0 2704 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2705 int w= p->width;
2706 int h= p->height;
fff6d4ea 2707 START_TIMER
115329f1 2708
ff158dc9 2709 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2710 if(mb_y==mb_h)
2711 return;
2712
715a97f0 2713 if(add){
86e59cc0 2714 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2715 for(x=0; x<w; x++){
2716 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2717 v >>= FRAC_BITS;
2718 if(v&(~255)) v= ~(v>>31);
2719 dst8[x + y*ref_stride]= v;
2720 }
2721 }
2722 }else{
86e59cc0 2723 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2724 for(x=0; x<w; x++){
2725 buf[x + y*w]-= 128<<FRAC_BITS;
2726 }
ff158dc9 2727 }
791e7b83 2728 }
ff158dc9
MN
2729
2730 return;
791e7b83 2731 }
115329f1 2732
ff158dc9 2733 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2734 START_TIMER
ff158dc9 2735
f7e89c73 2736 add_yblock(s, 0, NULL, buf, dst8, obmc,
ff158dc9 2737 block_w*mb_x - block_w/2,
791e7b83 2738 block_w*mb_y - block_w/2,
ff158dc9 2739 block_w, block_w,
791e7b83 2740 w, h,
ff158dc9
MN
2741 w, ref_stride, obmc_stride,
2742 mb_x - 1, mb_y - 1,
1015631b 2743 add, 1, plane_index);
115329f1 2744
ff158dc9 2745 STOP_TIMER("add_yblock")
791e7b83 2746 }
115329f1 2747
f9e6ebf7
LM
2748 STOP_TIMER("predict_slice")
2749}
2750
d593e329 2751static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
f9e6ebf7
LM
2752 const int mb_h= s->b_height << s->block_max_depth;
2753 int mb_y;
2754 for(mb_y=0; mb_y<=mb_h; mb_y++)
2755 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2756}
2757
51d6a3cf
MN
2758static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2759 int i, x2, y2;
2760 Plane *p= &s->plane[plane_index];
2761 const int block_size = MB_SIZE >> s->block_max_depth;
2762 const int block_w = plane_index ? block_size/2 : block_size;
2763 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2764 const int obmc_stride= plane_index ? block_size : 2*block_size;
2765 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2766 uint8_t *src= s-> input_picture.data[plane_index];
d593e329 2767 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
51d6a3cf
MN
2768 const int b_stride = s->b_width << s->block_max_depth;
2769 const int w= p->width;
2770 const int h= p->height;
2771 int index= mb_x + mb_y*b_stride;
2772 BlockNode *b= &s->block[index];
2773 BlockNode backup= *b;
2774 int ab=0;
2775 int aa=0;
2776
2777 b->type|= BLOCK_INTRA;
2778 b->color[plane_index]= 0;
d593e329 2779 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
51d6a3cf
MN
2780
2781 for(i=0; i<4; i++){
2782 int mb_x2= mb_x + (i &1) - 1;
2783 int mb_y2= mb_y + (i>>1) - 1;
2784 int x= block_w*mb_x2 + block_w/2;
2785 int y= block_w*mb_y2 + block_w/2;
2786
f7e89c73 2787 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
1015631b 2788 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
51d6a3cf
MN
2789
2790 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2791 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2792 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2793 int obmc_v= obmc[index];
1015631b 2794 int d;
51d6a3cf
MN
2795 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2796 if(x<0) obmc_v += obmc[index + block_w];
2797 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2798 if(x+block_w>w) obmc_v += obmc[index - block_w];
2799 //FIXME precalc this or simplify it somehow else
2800
1015631b
LM
2801 d = -dst[index] + (1<<(FRAC_BITS-1));
2802 dst[index] = d;
2803 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
51d6a3cf
MN
2804 aa += obmc_v * obmc_v; //FIXME precalclate this
2805 }
2806 }
2807 }
2808 *b= backup;
2809
755bfeab 2810 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
51d6a3cf
MN
2811}
2812
b104969f
LM
2813static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2814 const int b_stride = s->b_width << s->block_max_depth;
2815 const int b_height = s->b_height<< s->block_max_depth;
2816 int index= x + y*b_stride;
aadcc5ce
PI
2817 const BlockNode *b = &s->block[index];
2818 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2819 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2820 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2821 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
b104969f 2822 int dmx, dmy;
c26abfa5
DB
2823// int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2824// int my_context= av_log2(2*FFABS(left->my - top->my));
b104969f
LM
2825
2826 if(x<0 || x>=b_stride || y>=b_height)
2827 return 0;
b104969f
LM
2828/*
28291 0 0
283001X 1-2 1
2831001XX 3-6 2-3
28320001XXX 7-14 4-7
283300001XXXX 15-30 8-15
2834*/
2835//FIXME try accurate rate
2836//FIXME intra and inter predictors if surrounding blocks arent the same type
2837 if(b->type & BLOCK_INTRA){
c26abfa5
DB
2838 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2839 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2840 + av_log2(2*FFABS(left->color[2] - b->color[2])));
85fc0e75
LM
2841 }else{
2842 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2843 dmx-= b->mx;
2844 dmy-= b->my;
c26abfa5
DB
2845 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2846 + av_log2(2*FFABS(dmy))
8c36eaaa 2847 + av_log2(2*b->ref));
85fc0e75 2848 }
b104969f
LM
2849}
2850
1015631b 2851static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
51d6a3cf
MN
2852 Plane *p= &s->plane[plane_index];
2853 const int block_size = MB_SIZE >> s->block_max_depth;
2854 const int block_w = plane_index ? block_size/2 : block_size;
51d6a3cf
MN
2855 const int obmc_stride= plane_index ? block_size : 2*block_size;
2856 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2857 uint8_t *dst= s->current_picture.data[plane_index];
1015631b 2858 uint8_t *src= s-> input_picture.data[plane_index];
d593e329 2859 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
1015631b 2860 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
61d6e445 2861 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
51d6a3cf
MN
2862 const int b_stride = s->b_width << s->block_max_depth;
2863 const int b_height = s->b_height<< s->block_max_depth;
2864 const int w= p->width;
2865 const int h= p->height;
1015631b 2866 int distortion;
51d6a3cf
MN
2867 int rate= 0;
2868 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
1015631b
LM
2869 int sx= block_w*mb_x - block_w/2;
2870 int sy= block_w*mb_y - block_w/2;
561a18d3
RE
2871 int x0= FFMAX(0,-sx);
2872 int y0= FFMAX(0,-sy);
2873 int x1= FFMIN(block_w*2, w-sx);
2874 int y1= FFMIN(block_w*2, h-sy);
1015631b
LM
2875 int i,x,y;
2876
8c36eaaa 2877 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
1015631b
LM
2878
2879 for(y=y0; y<y1; y++){
2880 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
d593e329 2881 const IDWTELEM *pred1 = pred + y*obmc_stride;
1015631b
LM
2882 uint8_t *cur1 = cur + y*ref_stride;
2883 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2884 for(x=x0; x<x1; x++){
d593e329 2885#if FRAC_BITS >= LOG2_OBMC_MAX
1015631b 2886 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
d593e329
MN
2887#else
2888 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2889#endif
1015631b
LM
2890 v = (v + pred1[x]) >> FRAC_BITS;
2891 if(v&(~255)) v= ~(v>>31);
2892 dst1[x] = v;
51d6a3cf 2893 }
1015631b 2894 }
51d6a3cf 2895
561a18d3
RE
2896 /* copy the regions where obmc[] = (uint8_t)256 */
2897 if(LOG2_OBMC_MAX == 8
2898 && (mb_x == 0 || mb_x == b_stride-1)
2899 && (mb_y == 0 || mb_y == b_height-1)){
2900 if(mb_x == 0)
2901 x1 = block_w;
2902 else
2903 x0 = block_w;
2904 if(mb_y == 0)
2905 y1 = block_w;
2906 else
2907 y0 = block_w;
2908 for(y=y0; y<y1; y++)
2909 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2910 }
2911
1015631b 2912 if(block_w==16){
871371a7
LM
2913 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2914 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2915 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2916 * so improving the score of one block is not strictly guaranteed to
2917 * improve the score of the whole frame, so iterative motion est
2918 * doesn't always converge. */
2919 if(s->avctx->me_cmp == FF_CMP_W97)
486497e0 2920 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
871371a7 2921 else if(s->avctx->me_cmp == FF_CMP_W53)
486497e0 2922 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
871371a7
LM
2923 else{
2924 distortion = 0;
2925 for(i=0; i<4; i++){
2926 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2927 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2928 }
1015631b
LM
2929 }
2930 }else{
2931 assert(block_w==8);
2932 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
51d6a3cf
MN
2933 }
2934
2935 if(plane_index==0){
2936 for(i=0; i<4; i++){
2937/* ..RRr
2938 * .RXx.
2939 * rxx..
2940 */
b104969f
LM
2941 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2942 }
48d1b9a1
LM
2943 if(mb_x == b_stride-2)
2944 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
b104969f
LM
2945 }
2946 return distortion + rate*penalty_factor;
2947}
2948
2949static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2950 int i, y2;
2951 Plane *p= &s->plane[plane_index];
2952 const int block_size = MB_SIZE >> s->block_max_depth;
2953 const int block_w = plane_index ? block_size/2 : block_size;
2954 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2955 const int obmc_stride= plane_index ? block_size : 2*block_size;
2956 const int ref_stride= s->current_picture.linesize[plane_index];
b104969f
LM
2957 uint8_t *dst= s->current_picture.data[plane_index];
2958 uint8_t *src= s-> input_picture.data[plane_index];
d593e329 2959 static const IDWTELEM zero_dst[4096]; //FIXME
b104969f 2960 const int b_stride = s->b_width << s->block_max_depth;
b104969f
LM
2961 const int w= p->width;
2962 const int h= p->height;
2963 int distortion= 0;
2964 int rate= 0;
2965 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2966
2967 for(i=0; i<9; i++){
2968 int mb_x2= mb_x + (i%3) - 1;
2969 int mb_y2= mb_y + (i/3) - 1;
2970 int x= block_w*mb_x2 + block_w/2;
2971 int y= block_w*mb_y2 + block_w/2;
2972
f7e89c73 2973 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
b104969f
LM
2974 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2975
2976 //FIXME find a cleaner/simpler way to skip the outside stuff
2977 for(y2= y; y2<0; y2++)
2978 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2979 for(y2= h; y2<y+block_w; y2++)
2980 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2981 if(x<0){
2982 for(y2= y; y2<y+block_w; y2++)
2983 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
51d6a3cf 2984 }
b104969f
LM
2985 if(x+block_w > w){
2986 for(y2= y; y2<y+block_w; y2++)
2987 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2988 }
2989
2990 assert(block_w== 8 || block_w==16);
2991 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
51d6a3cf
MN
2992 }
2993
b104969f
LM
2994 if(plane_index==0){
2995 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2996 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2997
2998/* ..RRRr
2999 * .RXXx.
3000 * .RXXx.
3001 * rxxx.
3002 */
3003 if(merged)
3004 rate = get_block_bits(s, mb_x, mb_y, 2);
3005 for(i=merged?4:0; i<9; i++){
3006 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3007 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3008 }
3009 }
51d6a3cf
MN
3010 return distortion + rate*penalty_factor;
3011}
3012
849f1035 3013static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
51d6a3cf
MN
3014 const int b_stride= s->b_width << s->block_max_depth;
3015 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3016 BlockNode backup= *block;
3017 int rd, index, value;
3018
3019 assert(mb_x>=0 && mb_y>=0);
735f9f34 3020 assert(mb_x<b_stride);
51d6a3cf
MN
3021
3022 if(intra){
3023 block->color[0] = p[0];
3024 block->color[1] = p[1];
3025 block->color[2] = p[2];
3026 block->type |= BLOCK_INTRA;
3027 }else{
3028 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
8c36eaaa 3029 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
51d6a3cf
MN
3030 if(s->me_cache[index] == value)
3031 return 0;
3032 s->me_cache[index]= value;
3033
3034 block->mx= p[0];
3035 block->my= p[1];
3036 block->type &= ~BLOCK_INTRA;
3037 }
3038
1015631b 3039 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
51d6a3cf
MN
3040
3041//FIXME chroma
3042 if(rd < *best_rd){
3043 *best_rd= rd;
3044 return 1;
3045 }else{
3046 *block= backup;
3047 return 0;
3048 }
3049}
3050
52137f2f 3051/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
849f1035 3052static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
52137f2f 3053 int p[2] = {p0, p1};
fc8c4992 3054 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
52137f2f
FR
3055}
3056
849f1035 3057static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
b104969f
LM
3058 const int b_stride= s->b_width << s->block_max_depth;
3059 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3060 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3061 int rd, index, value;
3062
3063 assert(mb_x>=0 && mb_y>=0);
3064 assert(mb_x<b_stride);
3065 assert(((mb_x|mb_y)&1) == 0);
3066
3067 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
8c36eaaa 3068 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
b104969f
LM
3069 if(s->me_cache[index] == value)
3070 return 0;
3071 s->me_cache[index]= value;
3072
3073 block->mx= p0;
3074 block->my= p1;
8c36eaaa 3075 block->ref= ref;
b104969f
LM
3076 block->type &= ~BLOCK_INTRA;
3077 block[1]= block[b_stride]= block[b_stride+1]= *block;
3078
3079 rd= get_4block_rd(s, mb_x, mb_y, 0);
3080
3081//FIXME chroma
3082 if(rd < *best_rd){
3083 *best_rd= rd;
3084 return 1;
3085 }else{
3086 block[0]= backup[0];
3087 block[1]= backup[1];
3088 block[b_stride]= backup[2];
3089 block[b_stride+1]= backup[3];
3090 return 0;
3091 }
3092}
3093
51d6a3cf
MN
3094static void iterative_me(SnowContext *s){
3095 int pass, mb_x, mb_y;
3096 const int b_width = s->b_width << s->block_max_depth;
3097 const int b_height= s->b_height << s->block_max_depth;
3098 const int b_stride= b_width;
3099 int color[3];
3100
8f8ae495
LM
3101 {
3102 RangeCoder r = s->c;
3103 uint8_t state[sizeof(s->block_state)];
3104 memcpy(state, s->block_state, sizeof(s->block_state));
3105 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3106 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3107 encode_q_branch(s, 0, mb_x, mb_y);
3108 s->c = r;
3109 memcpy(s->block_state, state, sizeof(s->block_state));
3110 }
3111
871371a7 3112 for(pass=0; pass<25; pass++){
51d6a3cf
MN
3113 int change= 0;
3114
3115 for(mb_y= 0; mb_y<b_height; mb_y++){
3116 for(mb_x= 0; mb_x<b_width; mb_x++){
8c36eaaa
LM
3117 int dia_change, i, j, ref;
3118 int best_rd= INT_MAX, ref_rd;
3119 BlockNode backup, ref_b;
51d6a3cf
MN
3120 const int index= mb_x + mb_y * b_stride;
3121 BlockNode *block= &s->block[index];
7f21a9a7
LM
3122 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3123 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3124 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3125 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3126 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3127 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3128 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3129 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
1015631b
LM
3130 const int b_w= (MB_SIZE >> s->block_max_depth);
3131 uint8_t obmc_edged[b_w*2][b_w*2];
51d6a3cf
MN
3132
3133 if(pass && (block->type & BLOCK_OPT))
3134 continue;
3135 block->type |= BLOCK_OPT;
3136
3137 backup= *block;
3138
3139 if(!s->me_cache_generation)
3140 memset(s->me_cache, 0, sizeof(s->me_cache));
3141 s->me_cache_generation += 1<<22;
3142
1015631b
LM
3143 //FIXME precalc
3144 {
3145 int x, y;
3146 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3147 if(mb_x==0)
3148 for(y=0; y<b_w*2; y++)
3149 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3150 if(mb_x==b_stride-1)
3151 for(y=0; y<b_w*2; y++)
3152 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3153 if(mb_y==0){
3154 for(x=0; x<b_w*2; x++)
3155 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3156 for(y=1; y<b_w; y++)
3157 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3158 }
3159 if(mb_y==b_height-1){
3160 for(x=0; x<b_w*2; x++)
3161 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3162 for(y=b_w; y<b_w*2-1; y++)
3163 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3164 }
3165 }
3166
3167 //skip stuff outside the picture
3168 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3169 {
3170 uint8_t *src= s-> input_picture.data[0];
3171 uint8_t *dst= s->current_picture.data[0];
3172 const int stride= s->current_picture.linesize[0];
3173 const int block_w= MB_SIZE >> s->block_max_depth;
3174 const int sx= block_w*mb_x - block_w/2;
3175 const int sy= block_w*mb_y - block_w/2;
3176 const int w= s->plane[0].width;
3177 const int h= s->plane[0].height;
3178 int y;
3179
3180 for(y=sy; y<0; y++)
3181 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3182 for(y=h; y<sy+block_w*2; y++)
3183 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3184 if(sx<0){
3185 for(y=sy; y<sy+block_w*2; y++)
3186 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3187 }
3188 if(sx+block_w*2 > w){
3189 for(y=sy; y<sy+block_w*2; y++)
3190 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3191 }
3192 }
3193
3194 // intra(black) = neighbors' contribution to the current block
3195 for(i=0; i<3; i++)
3196 color[i]= get_dc(s, mb_x, mb_y, i);
3197
755bfeab 3198 // get previous score (cannot be cached due to OBMC)
48d1b9a1
LM
3199 if(pass > 0 && (block->type&BLOCK_INTRA)){
3200 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3201 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3202 }else
fc8c4992 3203 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
48d1b9a1 3204
8c36eaaa
LM
3205 ref_b= *block;
3206 ref_rd= best_rd;
3207 for(ref=0; ref < s->ref_frames; ref++){
3208 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3209 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3210 continue;
3211 block->ref= ref;
3212 best_rd= INT_MAX;
3213
3214 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3215 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
7f21a9a7 3216 if(tb)
8c36eaaa 3217 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
7f21a9a7 3218 if(lb)
8c36eaaa 3219 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
7f21a9a7 3220 if(rb)
8c36eaaa 3221 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
7f21a9a7 3222 if(bb)
8c36eaaa
LM
3223 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3224
3225 /* fullpel ME */
3226 //FIXME avoid subpel interpol / round to nearest integer
3227 do{
3228 dia_change=0;
3229 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3230 for(j=0; j<i; j++){
3231 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3232 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3233 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3234 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3235 }
51d6a3cf 3236 }
8c36eaaa
LM
3237 }while(dia_change);
3238 /* subpel ME */
3239 do{
3240 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3241 dia_change=0;
3242 for(i=0; i<8; i++)
3243 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3244 }while(dia_change);
3245 //FIXME or try the standard 2 pass qpel or similar
3246
3247 mvr[0][0]= block->mx;
3248 mvr[0][1]= block->my;
3249 if(ref_rd > best_rd){
3250 ref_rd= best_rd;
3251 ref_b= *block;
51d6a3cf 3252 }
8c36eaaa
LM
3253 }
3254 best_rd= ref_rd;
3255 *block= ref_b;
13705b69 3256#if 1
1015631b 3257 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
51d6a3cf 3258 //FIXME RD style color selection
13705b69 3259#endif
51d6a3cf 3260 if(!same_block(block, &backup)){
7f21a9a7
LM
3261 if(tb ) tb ->type &= ~BLOCK_OPT;
3262 if(lb ) lb ->type &= ~BLOCK_OPT;
3263 if(rb ) rb ->type &= ~BLOCK_OPT;
3264 if(bb ) bb ->type &= ~BLOCK_OPT;
3265 if(tlb) tlb->type &= ~BLOCK_OPT;
3266 if(trb) trb->type &= ~BLOCK_OPT;
3267 if(blb) blb->type &= ~BLOCK_OPT;
3268 if(brb) brb->type &= ~BLOCK_OPT;
51d6a3cf
MN
3269 change ++;
3270 }
3271 }
3272 }
3273 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3274 if(!change)
3275 break;
3276 }
b104969f
LM
3277
3278 if(s->block_max_depth == 1){
3279 int change= 0;
3280 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3281 for(mb_x= 0; mb_x<b_width; mb_x+=2){
7f21a9a7 3282 int i;
b104969f
LM
3283 int best_rd, init_rd;
3284 const int index= mb_x + mb_y * b_stride;
3285 BlockNode *b[4];
3286
3287 b[0]= &s->block[index];
3288 b[1]= b[0]+1;
3289 b[2]= b[0]+b_stride;
3290 b[3]= b[2]+1;
3291 if(same_block(b[0], b[1]) &&
3292 same_block(b[0], b[2]) &&
3293 same_block(b[0], b[3]))
3294 continue;
3295
3296 if(!s->me_cache_generation)
3297 memset(s->me_cache, 0, sizeof(s->me_cache));
3298 s->me_cache_generation += 1<<22;
3299
3300 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3301
8c36eaaa 3302 //FIXME more multiref search?
b104969f
LM
3303 check_4block_inter(s, mb_x, mb_y,
3304 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
8c36eaaa 3305 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
b104969f
LM
3306
3307 for(i=0; i<4; i++)
3308 if(!(b[i]->type&BLOCK_INTRA))
8c36eaaa 3309 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
b104969f
LM
3310
3311 if(init_rd != best_rd)
3312 change++;