Make sure the next used frame is released so get_buffer() wont fail.
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
b78e7197
DB
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
791e7b83
MN
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
b78e7197 9 * version 2.1 of the License, or (at your option) any later version.
791e7b83 10 *
b78e7197 11 * FFmpeg is distributed in the hope that it will be useful,
791e7b83
MN
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
b78e7197 17 * License along with FFmpeg; if not, write to the Free Software
5509bffa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
MN
19 */
20
21#include "avcodec.h"
791e7b83 22#include "dsputil.h"
059715a4 23#include "snow.h"
28869757
MN
24
25#include "rangecoder.h"
199436b9 26#include "mathops.h"
791e7b83
MN
27
28#include "mpegvideo.h"
29
30#undef NDEBUG
31#include <assert.h>
32
791e7b83
MN
33static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50};
51static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68};
538a3841
MN
69static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86};
791e7b83
MN
87static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104};
105static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122};
123static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140};
141static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158};
159static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
176};
177
791e7b83
MN
178#if 0 //64*cubic
179static const uint8_t obmc32[1024]={
fa731ccd
MN
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
791e7b83
MN
212//error:0.000022
213};
214static const uint8_t obmc16[256]={
fa731ccd
MN
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
791e7b83
MN
231//error:0.000033
232};
233#elif 1 // 64*linear
234static const uint8_t obmc32[1024]={
561a18d3
RE
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
791e7b83
MN
267 //error:0.000020
268};
269static const uint8_t obmc16[256]={
561a18d3
RE
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
791e7b83
MN
286//error:0.000015
287};
288#else //64*cos
289static const uint8_t obmc32[1024]={
fa731ccd
MN
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
791e7b83
MN
322//error:0.000022
323};
324static const uint8_t obmc16[256]={
fa731ccd
MN
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
791e7b83
MN
341//error:0.000022
342};
96e2fbf2 343#endif /* 0 */
791e7b83 344
155ec6ed
MN
345//linear *64
346static const uint8_t obmc8[64]={
561a18d3
RE
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
155ec6ed
MN
355//error:0.000000
356};
357
358//linear *64
359static const uint8_t obmc4[16]={
561a18d3
RE
360 16, 48, 48, 16,
361 48,144,144, 48,
362 48,144,144, 48,
363 16, 48, 48, 16,
155ec6ed
MN
364//error:0.000000
365};
366
cf2baeb3 367static const uint8_t * const obmc_tab[4]={
155ec6ed
MN
368 obmc32, obmc16, obmc8, obmc4
369};
370
85fc0e75
LM
371static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372
155ec6ed
MN
373typedef struct BlockNode{
374 int16_t mx;
375 int16_t my;
8c36eaaa 376 uint8_t ref;
155ec6ed
MN
377 uint8_t color[3];
378 uint8_t type;
379//#define TYPE_SPLIT 1
380#define BLOCK_INTRA 1
51d6a3cf 381#define BLOCK_OPT 2
155ec6ed
MN
382//#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
384}BlockNode;
385
51d6a3cf
MN
386static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
388 .mx= 0,
389 .my= 0,
8c36eaaa 390 .ref= 0,
51d6a3cf
MN
391 .type= 0,
392 .level= 0,
393};
394
155ec6ed
MN
395#define LOG2_MB_SIZE 4
396#define MB_SIZE (1<<LOG2_MB_SIZE)
b538791b 397#define ENCODER_EXTRA_BITS 4
61d6e445 398#define HTAPS_MAX 8
155ec6ed 399
a0d1931c
Y
400typedef struct x_and_coeff{
401 int16_t x;
538a3841 402 uint16_t coeff;
a0d1931c
Y
403} x_and_coeff;
404
791e7b83
MN
405typedef struct SubBand{
406 int level;
407 int stride;
408 int width;
409 int height;
e6464f8b 410 int qlog; ///< log(qscale)/log[2^(1/6)]
791e7b83 411 DWTELEM *buf;
d593e329 412 IDWTELEM *ibuf;
a0d1931c
Y
413 int buf_x_offset;
414 int buf_y_offset;
415 int stride_line; ///< Stride measured in lines, not pixels.
416 x_and_coeff * x_coeff;
791e7b83
MN
417 struct SubBand *parent;
418 uint8_t state[/*7*2*/ 7 + 512][32];
419}SubBand;
420
421typedef struct Plane{
422 int width;
423 int height;
424 SubBand band[MAX_DECOMPOSITIONS][4];
7d7f57d9
MN
425
426 int htaps;
61d6e445 427 int8_t hcoeff[HTAPS_MAX/2];
7d7f57d9
MN
428 int diag_mc;
429 int fast_mc;
430
431 int last_htaps;
61d6e445 432 int8_t last_hcoeff[HTAPS_MAX/2];
7d7f57d9 433 int last_diag_mc;
791e7b83
MN
434}Plane;
435
436typedef struct SnowContext{
e6464f8b 437// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
791e7b83
MN
438
439 AVCodecContext *avctx;
28869757 440 RangeCoder c;
791e7b83 441 DSPContext dsp;
51d6a3cf
MN
442 AVFrame new_picture;
443 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83 444 AVFrame current_picture;
8c36eaaa 445 AVFrame last_picture[MAX_REF_FRAMES];
5be3a818 446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
791e7b83
MN
447 AVFrame mconly_picture;
448// uint8_t q_context[16];
449 uint8_t header_state[32];
155ec6ed 450 uint8_t block_state[128 + 32*128];
791e7b83 451 int keyframe;
19aa028d 452 int always_reset;
791e7b83
MN
453 int version;
454 int spatial_decomposition_type;
396a5e68 455 int last_spatial_decomposition_type;
791e7b83
MN
456 int temporal_decomposition_type;
457 int spatial_decomposition_count;
8db13728 458 int last_spatial_decomposition_count;
791e7b83 459 int temporal_decomposition_count;
8c36eaaa
LM
460 int max_ref_frames;
461 int ref_frames;
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463 uint32_t *ref_scores[MAX_REF_FRAMES];
791e7b83 464 DWTELEM *spatial_dwt_buffer;
d593e329 465 IDWTELEM *spatial_idwt_buffer;
791e7b83
MN
466 int colorspace_type;
467 int chroma_h_shift;
468 int chroma_v_shift;
469 int spatial_scalability;
470 int qlog;
396a5e68 471 int last_qlog;
155ec6ed
MN
472 int lambda;
473 int lambda2;
4e64bead 474 int pass1_rc;
791e7b83 475 int mv_scale;
396a5e68 476 int last_mv_scale;
791e7b83 477 int qbias;
396a5e68 478 int last_qbias;
791e7b83 479#define QBIAS_SHIFT 3
155ec6ed
MN
480 int b_width;
481 int b_height;
482 int block_max_depth;
396a5e68 483 int last_block_max_depth;
791e7b83 484 Plane plane[MAX_PLANES];
155ec6ed 485 BlockNode *block;
51d6a3cf
MN
486#define ME_CACHE_SIZE 1024
487 int me_cache[ME_CACHE_SIZE];
488 int me_cache_generation;
a0d1931c 489 slice_buffer sb;
155ec6ed 490
e6464f8b 491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
bd2b6b33
MR
492
493 uint8_t *scratchbuf;
791e7b83
MN
494}SnowContext;
495
f9e6ebf7 496typedef struct {
d593e329
MN
497 IDWTELEM *b0;
498 IDWTELEM *b1;
499 IDWTELEM *b2;
500 IDWTELEM *b3;
f9e6ebf7 501 int y;
fe5c7e58 502} DWTCompose;
f9e6ebf7 503
a0d1931c
Y
504#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
506
51d6a3cf
MN
507static void iterative_me(SnowContext *s);
508
d593e329 509static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
a0d1931c
Y
510{
511 int i;
115329f1 512
a0d1931c
Y
513 buf->base_buffer = base_buffer;
514 buf->line_count = line_count;
515 buf->line_width = line_width;
516 buf->data_count = max_allocated_lines;
d593e329
MN
517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
115329f1 519
ef3dfbd4 520 for(i = 0; i < max_allocated_lines; i++){
d593e329 521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
a0d1931c 522 }
115329f1 523
a0d1931c
Y
524 buf->data_stack_top = max_allocated_lines - 1;
525}
526
d593e329 527static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
a0d1931c 528{
a0d1931c 529 int offset;
d593e329 530 IDWTELEM * buffer;
115329f1 531
a0d1931c
Y
532 assert(buf->data_stack_top >= 0);
533// assert(!buf->line[line]);
534 if (buf->line[line])
535 return buf->line[line];
115329f1 536
a0d1931c
Y
537 offset = buf->line_width * line;
538 buffer = buf->data_stack[buf->data_stack_top];
539 buf->data_stack_top--;
540 buf->line[line] = buffer;
115329f1 541
a0d1931c
Y
542 return buffer;
543}
544
545static void slice_buffer_release(slice_buffer * buf, int line)
546{
a0d1931c 547 int offset;
d593e329 548 IDWTELEM * buffer;
a0d1931c
Y
549
550 assert(line >= 0 && line < buf->line_count);
551 assert(buf->line[line]);
552
553 offset = buf->line_width * line;
554 buffer = buf->line[line];
555 buf->data_stack_top++;
556 buf->data_stack[buf->data_stack_top] = buffer;
557 buf->line[line] = NULL;
a0d1931c
Y
558}
559
560static void slice_buffer_flush(slice_buffer * buf)
561{
562 int i;
ef3dfbd4 563 for(i = 0; i < buf->line_count; i++){
a0d1931c 564 if (buf->line[i])
a0d1931c 565 slice_buffer_release(buf, i);
a0d1931c
Y
566 }
567}
568
569static void slice_buffer_destroy(slice_buffer * buf)
570{
571 int i;
572 slice_buffer_flush(buf);
115329f1 573
ef3dfbd4 574 for(i = buf->data_count - 1; i >= 0; i--){
e7c8206e 575 av_freep(&buf->data_stack[i]);
a0d1931c 576 }
e7c8206e 577 av_freep(&buf->data_stack);
e7c8206e 578 av_freep(&buf->line);
a0d1931c
Y
579}
580
bb270c08 581#ifdef __sgi
2554db9b 582// Avoid a name clash on SGI IRIX
bb270c08 583#undef qexp
2554db9b 584#endif
034aff03 585#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 586static uint8_t qexp[QROOT];
791e7b83
MN
587
588static inline int mirror(int v, int m){
13705b69
MN
589 while((unsigned)v > (unsigned)m){
590 v=-v;
591 if(v<0) v+= 2*m;
592 }
593 return v;
791e7b83
MN
594}
595
28869757 596static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
597 int i;
598
599 if(v){
c26abfa5 600 const int a= FFABS(v);
791e7b83
MN
601 const int e= av_log2(a);
602#if 1
115329f1 603 const int el= FFMIN(e, 10);
28869757 604 put_rac(c, state+0, 0);
791e7b83
MN
605
606 for(i=0; i<el; i++){
28869757 607 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
608 }
609 for(; i<e; i++){
28869757 610 put_rac(c, state+1+9, 1); //1..10
791e7b83 611 }
28869757 612 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
613
614 for(i=e-1; i>=el; i--){
28869757 615 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
616 }
617 for(; i>=0; i--){
28869757 618 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
619 }
620
621 if(is_signed)
28869757 622 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 623#else
115329f1 624
28869757 625 put_rac(c, state+0, 0);
791e7b83
MN
626 if(e<=9){
627 for(i=0; i<e; i++){
28869757 628 put_rac(c, state+1+i, 1); //1..10
791e7b83 629 }
28869757 630 put_rac(c, state+1+i, 0);
791e7b83
MN
631
632 for(i=e-1; i>=0; i--){
28869757 633 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
634 }
635
636 if(is_signed)
28869757 637 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
638 }else{
639 for(i=0; i<e; i++){
28869757 640 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 641 }
28869757 642 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
643
644 for(i=e-1; i>=0; i--){
28869757 645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
646 }
647
648 if(is_signed)
28869757 649 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83 650 }
96e2fbf2 651#endif /* 1 */
791e7b83 652 }else{
28869757 653 put_rac(c, state+0, 1);
791e7b83
MN
654 }
655}
656
28869757
MN
657static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
658 if(get_rac(c, state+0))
791e7b83
MN
659 return 0;
660 else{
7c2425d2
LM
661 int i, e, a;
662 e= 0;
28869757 663 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 664 e++;
791e7b83 665 }
7c2425d2 666
791e7b83 667 a= 1;
7c2425d2 668 for(i=e-1; i>=0; i--){
28869757 669 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
670 }
671
28869757 672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
673 return -a;
674 else
675 return a;
676 }
677}
678
28869757 679static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 680 int i;
0635cbfc 681 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
682
683 assert(v>=0);
0635cbfc
MN
684 assert(log2>=-4);
685
686 while(v >= r){
28869757 687 put_rac(c, state+4+log2, 1);
0635cbfc 688 v -= r;
4f4e9633 689 log2++;
0635cbfc 690 if(log2>0) r+=r;
4f4e9633 691 }
28869757 692 put_rac(c, state+4+log2, 0);
115329f1 693
4f4e9633 694 for(i=log2-1; i>=0; i--){
28869757 695 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 696 }
4f4e9633
MN
697}
698
28869757 699static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 700 int i;
0635cbfc 701 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
702 int v=0;
703
0635cbfc
MN
704 assert(log2>=-4);
705
28869757 706 while(get_rac(c, state+4+log2)){
0635cbfc 707 v+= r;
4f4e9633 708 log2++;
0635cbfc 709 if(log2>0) r+=r;
4f4e9633 710 }
115329f1 711
4f4e9633 712 for(i=log2-1; i>=0; i--){
28869757 713 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
714 }
715
716 return v;
717}
718
9d14ffbc
LB
719static av_always_inline void
720lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
721 int dst_step, int src_step, int ref_step,
722 int width, int mul, int add, int shift,
723 int highpass, int inverse){
791e7b83
MN
724 const int mirror_left= !highpass;
725 const int mirror_right= (width&1) ^ highpass;
726 const int w= (width>>1) - 1 + (highpass & width);
727 int i;
728
729#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
730 if(mirror_left){
731 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
732 dst += dst_step;
733 src += src_step;
734 }
115329f1 735
791e7b83 736 for(i=0; i<w; i++){
9d14ffbc
LB
737 dst[i*dst_step] =
738 LIFT(src[i*src_step],
739 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
740 inverse);
791e7b83 741 }
115329f1 742
791e7b83 743 if(mirror_right){
9d14ffbc
LB
744 dst[w*dst_step] =
745 LIFT(src[w*src_step],
746 ((mul*2*ref[w*ref_step]+add)>>shift),
747 inverse);
791e7b83
MN
748 }
749}
750
9d14ffbc
LB
751static av_always_inline void
752inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
753 int dst_step, int src_step, int ref_step,
754 int width, int mul, int add, int shift,
755 int highpass, int inverse){
d593e329
MN
756 const int mirror_left= !highpass;
757 const int mirror_right= (width&1) ^ highpass;
758 const int w= (width>>1) - 1 + (highpass & width);
759 int i;
760
761#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
762 if(mirror_left){
763 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
764 dst += dst_step;
765 src += src_step;
766 }
767
768 for(i=0; i<w; i++){
9d14ffbc
LB
769 dst[i*dst_step] =
770 LIFT(src[i*src_step],
771 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
772 inverse);
d593e329
MN
773 }
774
775 if(mirror_right){
9d14ffbc
LB
776 dst[w*dst_step] =
777 LIFT(src[w*src_step],
778 ((mul*2*ref[w*ref_step]+add)>>shift),
779 inverse);
d593e329
MN
780 }
781}
782
059715a4 783#ifndef liftS
9d14ffbc
LB
784static av_always_inline void
785liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
786 int dst_step, int src_step, int ref_step,
787 int width, int mul, int add, int shift,
788 int highpass, int inverse){
f5a71928
MN
789 const int mirror_left= !highpass;
790 const int mirror_right= (width&1) ^ highpass;
791 const int w= (width>>1) - 1 + (highpass & width);
792 int i;
793
794 assert(shift == 4);
9d14ffbc
LB
795#define LIFTS(src, ref, inv) \
796 ((inv) ? \
797 (src) + (((ref) + 4*(src))>>shift): \
798 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
f5a71928
MN
799 if(mirror_left){
800 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
801 dst += dst_step;
802 src += src_step;
803 }
115329f1 804
f5a71928 805 for(i=0; i<w; i++){
9d14ffbc
LB
806 dst[i*dst_step] =
807 LIFTS(src[i*src_step],
808 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
809 inverse);
f5a71928 810 }
115329f1 811
f5a71928 812 if(mirror_right){
9d14ffbc
LB
813 dst[w*dst_step] =
814 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
f5a71928
MN
815 }
816}
9d14ffbc
LB
817static av_always_inline void
818inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
819 int dst_step, int src_step, int ref_step,
820 int width, int mul, int add, int shift,
821 int highpass, int inverse){
d593e329
MN
822 const int mirror_left= !highpass;
823 const int mirror_right= (width&1) ^ highpass;
824 const int w= (width>>1) - 1 + (highpass & width);
825 int i;
826
827 assert(shift == 4);
9d14ffbc
LB
828#define LIFTS(src, ref, inv) \
829 ((inv) ? \
830 (src) + (((ref) + 4*(src))>>shift): \
831 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
d593e329
MN
832 if(mirror_left){
833 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
834 dst += dst_step;
835 src += src_step;
836 }
837
838 for(i=0; i<w; i++){
9d14ffbc
LB
839 dst[i*dst_step] =
840 LIFTS(src[i*src_step],
841 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
842 inverse);
d593e329
MN
843 }
844
845 if(mirror_right){
9d14ffbc
LB
846 dst[w*dst_step] =
847 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
d593e329
MN
848 }
849}
301df480 850#endif /* ! liftS */
f5a71928 851
aa25a462
RFI
852static void horizontal_decompose53i(DWTELEM *b, int width){
853 DWTELEM temp[width];
791e7b83 854 const int width2= width>>1;
62ab0b78 855 int x;
791e7b83
MN
856 const int w2= (width+1)>>1;
857
858 for(x=0; x<width2; x++){
859 temp[x ]= b[2*x ];
860 temp[x+w2]= b[2*x + 1];
861 }
862 if(width&1)
863 temp[x ]= b[2*x ];
864#if 0
62ab0b78
AJ
865 {
866 int A1,A2,A3,A4;
791e7b83
MN
867 A2= temp[1 ];
868 A4= temp[0 ];
869 A1= temp[0+width2];
870 A1 -= (A2 + A4)>>1;
871 A4 += (A1 + 1)>>1;
872 b[0+width2] = A1;
873 b[0 ] = A4;
874 for(x=1; x+1<width2; x+=2){
875 A3= temp[x+width2];
876 A4= temp[x+1 ];
877 A3 -= (A2 + A4)>>1;
878 A2 += (A1 + A3 + 2)>>2;
879 b[x+width2] = A3;
880 b[x ] = A2;
881
882 A1= temp[x+1+width2];
883 A2= temp[x+2 ];
884 A1 -= (A2 + A4)>>1;
885 A4 += (A1 + A3 + 2)>>2;
886 b[x+1+width2] = A1;
887 b[x+1 ] = A4;
888 }
889 A3= temp[width-1];
890 A3 -= A2;
891 A2 += (A1 + A3 + 2)>>2;
892 b[width -1] = A3;
893 b[width2-1] = A2;
62ab0b78 894 }
115329f1 895#else
791e7b83
MN
896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
301df480 898#endif /* 0 */
791e7b83
MN
899}
900
aa25a462 901static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 902 int i;
115329f1 903
791e7b83
MN
904 for(i=0; i<width; i++){
905 b1[i] -= (b0[i] + b2[i])>>1;
906 }
907}
908
aa25a462 909static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 910 int i;
115329f1 911
791e7b83
MN
912 for(i=0; i<width; i++){
913 b1[i] += (b0[i] + b2[i] + 2)>>2;
914 }
915}
916
aa25a462 917static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 918 int y;
791e7b83
MN
919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
115329f1 921
791e7b83
MN
922 for(y=-2; y<height; y+=2){
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
925
13705b69
MN
926 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
927 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
115329f1 928
13705b69
MN
929 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
930 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
115329f1 931
791e7b83
MN
932 b0=b2;
933 b1=b3;
934 }
935}
936
aa25a462
RFI
937static void horizontal_decompose97i(DWTELEM *b, int width){
938 DWTELEM temp[width];
791e7b83
MN
939 const int w2= (width+1)>>1;
940
ce611a27
MN
941 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
942 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
ff06e067 943 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
791e7b83
MN
944 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
945}
946
947
aa25a462 948static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 949 int i;
115329f1 950
791e7b83
MN
951 for(i=0; i<width; i++){
952 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
953 }
954}
955
aa25a462 956static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 957 int i;
115329f1 958
791e7b83 959 for(i=0; i<width; i++){
791e7b83 960 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
791e7b83
MN
961 }
962}
963
aa25a462 964static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 965 int i;
115329f1 966
791e7b83 967 for(i=0; i<width; i++){
f5a71928 968#ifdef liftS
791e7b83 969 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928 970#else
ce611a27 971 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
f5a71928 972#endif
791e7b83
MN
973 }
974}
975
aa25a462 976static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 977 int i;
115329f1 978
791e7b83
MN
979 for(i=0; i<width; i++){
980 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
981 }
982}
983
aa25a462 984static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 985 int y;
791e7b83
MN
986 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
987 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
988 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
989 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
115329f1 990
791e7b83
MN
991 for(y=-4; y<height; y+=2){
992 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
993 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
994
13705b69
MN
995 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
996 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
115329f1 997
13705b69
MN
998 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
999 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1000 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1001 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
791e7b83 1002
791e7b83
MN
1003 b0=b2;
1004 b1=b3;
1005 b2=b4;
1006 b3=b5;
1007 }
1008}
1009
aa25a462 1010void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83 1011 int level;
115329f1 1012
46c281e8
MN
1013 for(level=0; level<decomposition_count; level++){
1014 switch(type){
d4b287ed
LM
1015 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1016 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
791e7b83
MN
1017 }
1018 }
1019}
1020
d593e329
MN
1021static void horizontal_compose53i(IDWTELEM *b, int width){
1022 IDWTELEM temp[width];
791e7b83
MN
1023 const int width2= width>>1;
1024 const int w2= (width+1)>>1;
62ab0b78 1025 int x;
791e7b83
MN
1026
1027#if 0
62ab0b78 1028 int A1,A2,A3,A4;
791e7b83
MN
1029 A2= temp[1 ];
1030 A4= temp[0 ];
1031 A1= temp[0+width2];
1032 A1 -= (A2 + A4)>>1;
1033 A4 += (A1 + 1)>>1;
1034 b[0+width2] = A1;
1035 b[0 ] = A4;
1036 for(x=1; x+1<width2; x+=2){
1037 A3= temp[x+width2];
1038 A4= temp[x+1 ];
1039 A3 -= (A2 + A4)>>1;
1040 A2 += (A1 + A3 + 2)>>2;
1041 b[x+width2] = A3;
1042 b[x ] = A2;
1043
1044 A1= temp[x+1+width2];
1045 A2= temp[x+2 ];
1046 A1 -= (A2 + A4)>>1;
1047 A4 += (A1 + A3 + 2)>>2;
1048 b[x+1+width2] = A1;
1049 b[x+1 ] = A4;
1050 }
1051 A3= temp[width-1];
1052 A3 -= A2;
1053 A2 += (A1 + A3 + 2)>>2;
1054 b[width -1] = A3;
1055 b[width2-1] = A2;
115329f1 1056#else
d593e329
MN
1057 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1058 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
96e2fbf2 1059#endif /* 0 */
791e7b83
MN
1060 for(x=0; x<width2; x++){
1061 b[2*x ]= temp[x ];
1062 b[2*x + 1]= temp[x+w2];
1063 }
1064 if(width&1)
1065 b[2*x ]= temp[x ];
1066}
1067
d593e329 1068static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1069 int i;
115329f1 1070
791e7b83
MN
1071 for(i=0; i<width; i++){
1072 b1[i] += (b0[i] + b2[i])>>1;
1073 }
1074}
1075
d593e329 1076static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1077 int i;
115329f1 1078
791e7b83
MN
1079 for(i=0; i<width; i++){
1080 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1081 }
1082}
1083
fe5c7e58 1084static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
a0d1931c
Y
1085 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1086 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1087 cs->y = -1;
1088}
1089
fe5c7e58 1090static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
f9e6ebf7
LM
1091 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1092 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1093 cs->y = -1;
1094}
1095
fe5c7e58 1096static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1097 int y= cs->y;
115329f1 1098
d593e329
MN
1099 IDWTELEM *b0= cs->b0;
1100 IDWTELEM *b1= cs->b1;
1101 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1102 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
a0d1931c 1103
13705b69
MN
1104 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1105 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
a0d1931c 1106
13705b69
MN
1107 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1108 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
a0d1931c
Y
1109
1110 cs->b0 = b2;
1111 cs->b1 = b3;
1112 cs->y += 2;
1113}
1114
fe5c7e58 1115static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7 1116 int y= cs->y;
d593e329
MN
1117 IDWTELEM *b0= cs->b0;
1118 IDWTELEM *b1= cs->b1;
1119 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1120 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83 1121
13705b69
MN
1122 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1123 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
791e7b83 1124
13705b69
MN
1125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
791e7b83 1127
f9e6ebf7
LM
1128 cs->b0 = b2;
1129 cs->b1 = b3;
1130 cs->y += 2;
1131}
1132
1918057c 1133static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
fe5c7e58 1134 DWTCompose cs;
9a3bb2b8
MN
1135 spatial_compose53i_init(&cs, buffer, height, stride);
1136 while(cs.y <= height)
1137 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1138}
1139
791e7b83 1140
d593e329
MN
1141void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1142 IDWTELEM temp[width];
791e7b83
MN
1143 const int w2= (width+1)>>1;
1144
d593e329 1145 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
ff06e067 1146 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
d593e329
MN
1147 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1148 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
791e7b83
MN
1149}
1150
d593e329 1151static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1152 int i;
115329f1 1153
791e7b83
MN
1154 for(i=0; i<width; i++){
1155 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1156 }
1157}
1158
d593e329 1159static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1160 int i;
115329f1 1161
791e7b83 1162 for(i=0; i<width; i++){
791e7b83 1163 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
791e7b83
MN
1164 }
1165}
1166
d593e329 1167static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1168 int i;
115329f1 1169
791e7b83 1170 for(i=0; i<width; i++){
f5a71928 1171#ifdef liftS
791e7b83 1172 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1173#else
1174 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1175#endif
791e7b83
MN
1176 }
1177}
1178
d593e329 1179static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1180 int i;
115329f1 1181
791e7b83
MN
1182 for(i=0; i<width; i++){
1183 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1184 }
1185}
1186
d593e329 1187void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
565a45ac 1188 int i;
115329f1 1189
565a45ac 1190 for(i=0; i<width; i++){
565a45ac 1191 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
565a45ac 1192 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
f5a71928 1193#ifdef liftS
565a45ac 1194 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1195#else
1196 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1197#endif
565a45ac
MN
1198 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1199 }
1200}
1201
fe5c7e58 1202static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
a0d1931c
Y
1203 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1204 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1205 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1206 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1207 cs->y = -3;
1208}
1209
fe5c7e58 1210static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
f9e6ebf7
LM
1211 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1212 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1213 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1214 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1215 cs->y = -3;
1216}
791e7b83 1217
fe5c7e58 1218static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1219 int y = cs->y;
115329f1 1220
d593e329
MN
1221 IDWTELEM *b0= cs->b0;
1222 IDWTELEM *b1= cs->b1;
1223 IDWTELEM *b2= cs->b2;
1224 IDWTELEM *b3= cs->b3;
1225 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1226 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
115329f1 1227
565a45ac 1228 if(y>0 && y+4<height){
059715a4 1229 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
565a45ac 1230 }else{
13705b69
MN
1231 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1232 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1233 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1234 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
565a45ac 1235 }
a0d1931c 1236
059715a4
RE
1237 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1238 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
a0d1931c
Y
1239
1240 cs->b0=b2;
1241 cs->b1=b3;
1242 cs->b2=b4;
1243 cs->b3=b5;
1244 cs->y += 2;
1245}
1246
fe5c7e58 1247static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7 1248 int y = cs->y;
d593e329
MN
1249 IDWTELEM *b0= cs->b0;
1250 IDWTELEM *b1= cs->b1;
1251 IDWTELEM *b2= cs->b2;
1252 IDWTELEM *b3= cs->b3;
1253 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1254 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83 1255
13705b69
MN
1256 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1257 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1258 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1259 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
791e7b83 1260
059715a4
RE
1261 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1262 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
f9e6ebf7
LM
1263
1264 cs->b0=b2;
1265 cs->b1=b3;
1266 cs->b2=b4;
1267 cs->b3=b5;
1268 cs->y += 2;
1269}
1270
1918057c 1271static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
fe5c7e58 1272 DWTCompose cs;
9a3bb2b8
MN
1273 spatial_compose97i_init(&cs, buffer, height, stride);
1274 while(cs.y <= height)
1275 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1276}
1277
fe5c7e58 1278static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
a0d1931c
Y
1279 int level;
1280 for(level=decomposition_count-1; level>=0; level--){
1281 switch(type){
d4b287ed
LM
1282 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1283 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
a0d1931c
Y
1284 }
1285 }
1286}
1287
fe5c7e58 1288static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1289 int level;
1290 for(level=decomposition_count-1; level>=0; level--){
1291 switch(type){
d4b287ed
LM
1292 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1293 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
f9e6ebf7 1294 }
791e7b83
MN
1295 }
1296}
1297
fe5c7e58 1298static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
f9e6ebf7 1299 const int support = type==1 ? 3 : 5;
791e7b83 1300 int level;
f9e6ebf7 1301 if(type==2) return;
791e7b83 1302
46c281e8 1303 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1304 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1305 switch(type){
d4b287ed 1306 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
94ae6788 1307 break;
d4b287ed 1308 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
94ae6788 1309 break;
f9e6ebf7 1310 }
791e7b83
MN
1311 }
1312 }
1313}
1314
fe5c7e58 1315static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
a0d1931c
Y
1316 const int support = type==1 ? 3 : 5;
1317 int level;
1318 if(type==2) return;
1319
1320 for(level=decomposition_count-1; level>=0; level--){
1321 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1322 switch(type){
d4b287ed 1323 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
94ae6788 1324 break;
d4b287ed 1325 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
94ae6788 1326 break;
a0d1931c
Y
1327 }
1328 }
1329 }
1330}
1331
d593e329 1332static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
fe5c7e58 1333 DWTCompose cs[MAX_DECOMPOSITIONS];
f9e6ebf7
LM
1334 int y;
1335 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1336 for(y=0; y<height; y+=4)
1337 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
f9e6ebf7
LM
1338}
1339
d593e329 1340static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1341 const int w= b->width;
1342 const int h= b->height;
1343 int x, y;
1344
791e7b83 1345 if(1){
791e7b83 1346 int run=0;
a8d73e56 1347 int runs[w*h];
791e7b83 1348 int run_index=0;
b44985ba 1349 int max_index;
115329f1 1350
791e7b83
MN
1351 for(y=0; y<h; y++){
1352 for(x=0; x<w; x++){
78486403 1353 int v, p=0;
6b2f6646 1354 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1355 v= src[x + y*stride];
791e7b83
MN
1356
1357 if(y){
a8d73e56 1358 t= src[x + (y-1)*stride];
791e7b83 1359 if(x){
a8d73e56 1360 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1361 }
1362 if(x + 1 < w){
a8d73e56 1363 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1364 }
1365 }
1366 if(x){
a8d73e56 1367 l= src[x - 1 + y*stride];
6b2f6646
MN
1368 /*if(x > 1){
1369 if(orientation==1) ll= src[y + (x-2)*stride];
1370 else ll= src[x - 2 + y*stride];
791e7b83
MN
1371 }*/
1372 }
78486403 1373 if(parent){
a8d73e56
MN
1374 int px= x>>1;
1375 int py= y>>1;
115329f1 1376 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1377 p= parent[px + py*2*stride];
1378 }
1379 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1380 if(v){
1381 runs[run_index++]= run;
1382 run=0;
1383 }else{
1384 run++;
1385 }
1386 }
1387 }
1388 }
b44985ba 1389 max_index= run_index;
791e7b83
MN
1390 runs[run_index++]= run;
1391 run_index=0;
1392 run= runs[run_index++];
1393
b44985ba
MN
1394 put_symbol2(&s->c, b->state[30], max_index, 0);
1395 if(run_index <= max_index)
1396 put_symbol2(&s->c, b->state[1], run, 3);
115329f1 1397
791e7b83 1398 for(y=0; y<h; y++){
d06c75a8 1399 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1400 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1401 return -1;
1402 }
791e7b83 1403 for(x=0; x<w; x++){
78486403 1404 int v, p=0;
6b2f6646 1405 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1406 v= src[x + y*stride];
791e7b83
MN
1407
1408 if(y){
a8d73e56 1409 t= src[x + (y-1)*stride];
791e7b83 1410 if(x){
a8d73e56 1411 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1412 }
1413 if(x + 1 < w){
a8d73e56 1414 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1415 }
1416 }
1417 if(x){
a8d73e56 1418 l= src[x - 1 + y*stride];
6b2f6646
MN
1419 /*if(x > 1){
1420 if(orientation==1) ll= src[y + (x-2)*stride];
1421 else ll= src[x - 2 + y*stride];
791e7b83
MN
1422 }*/
1423 }
78486403 1424 if(parent){
a8d73e56
MN
1425 int px= x>>1;
1426 int py= y>>1;
115329f1 1427 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1428 p= parent[px + py*2*stride];
1429 }
1430 if(/*ll|*/l|lt|t|rt|p){
c26abfa5 1431 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
6b2f6646 1432
28869757 1433 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1434 }else{
1435 if(!run){
1436 run= runs[run_index++];
4f4e9633 1437
b44985ba
MN
1438 if(run_index <= max_index)
1439 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1440 assert(v);
1441 }else{
1442 run--;
1443 assert(!v);
1444 }
1445 }
1446 if(v){
c26abfa5
DB
1447 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1448 int l2= 2*FFABS(l) + (l<0);
1449 int t2= 2*FFABS(t) + (t<0);
6b2f6646 1450
c26abfa5 1451 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
538a3841 1452 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1453 }
1454 }
1455 }
791e7b83 1456 }
0ecca7a4 1457 return 0;
791e7b83
MN
1458}
1459
d593e329 1460static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1461// encode_subband_qtree(s, b, src, parent, stride, orientation);
1462// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1463 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1464// encode_subband_dzr(s, b, src, parent, stride, orientation);
1465}
1466
a0d1931c 1467static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1468 const int w= b->width;
1469 const int h= b->height;
1470 int x,y;
115329f1 1471
791e7b83 1472 if(1){
b44985ba 1473 int run, runs;
cbb1d2b1
MN
1474 x_and_coeff *xc= b->x_coeff;
1475 x_and_coeff *prev_xc= NULL;
1476 x_and_coeff *prev2_xc= xc;
1477 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1478 x_and_coeff *prev_parent_xc= parent_xc;
791e7b83 1479
b44985ba
MN
1480 runs= get_symbol2(&s->c, b->state[30], 0);
1481 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1482 else run= INT_MAX;
1483
791e7b83 1484 for(y=0; y<h; y++){
0cea8a03
MN
1485 int v=0;
1486 int lt=0, t=0, rt=0;
1487
cbb1d2b1
MN
1488 if(y && prev_xc->x == 0){
1489 rt= prev_xc->coeff;
0cea8a03 1490 }
791e7b83 1491 for(x=0; x<w; x++){
0cea8a03
MN
1492 int p=0;
1493 const int l= v;
115329f1 1494
0cea8a03 1495 lt= t; t= rt;
791e7b83 1496
ff765159 1497 if(y){
cbb1d2b1
MN
1498 if(prev_xc->x <= x)
1499 prev_xc++;
1500 if(prev_xc->x == x + 1)
1501 rt= prev_xc->coeff;
ff765159
MN
1502 else
1503 rt=0;
1504 }
cbb1d2b1
MN
1505 if(parent_xc){
1506 if(x>>1 > parent_xc->x){
1507 parent_xc++;
7b49c309 1508 }
cbb1d2b1
MN
1509 if(x>>1 == parent_xc->x){
1510 p= parent_xc->coeff;
ff765159 1511 }
78486403
MN
1512 }
1513 if(/*ll|*/l|lt|t|rt|p){
c26abfa5 1514 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1515
28869757 1516 v=get_rac(&s->c, &b->state[0][context]);
3c096ac7
MN
1517 if(v){
1518 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1519 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 1520
cbb1d2b1
MN
1521 xc->x=x;
1522 (xc++)->coeff= v;
3c096ac7 1523 }
791e7b83
MN
1524 }else{
1525 if(!run){
b44985ba
MN
1526 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1527 else run= INT_MAX;
3c096ac7
MN
1528 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1529 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
115329f1 1530
cbb1d2b1
MN
1531 xc->x=x;
1532 (xc++)->coeff= v;
791e7b83 1533 }else{
99cd59e5 1534 int max_run;
791e7b83
MN
1535 run--;
1536 v=0;
3c1adccd 1537
cbb1d2b1 1538 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
99cd59e5 1539 else max_run= FFMIN(run, w-x-1);
cbb1d2b1
MN
1540 if(parent_xc)
1541 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
99cd59e5
MN
1542 x+= max_run;
1543 run-= max_run;
791e7b83
MN
1544 }
1545 }
7b49c309 1546 }
cbb1d2b1
MN
1547 (xc++)->x= w+1; //end marker
1548 prev_xc= prev2_xc;
1549 prev2_xc= xc;
115329f1 1550
cbb1d2b1 1551 if(parent_xc){
7b49c309 1552 if(y&1){
cbb1d2b1
MN
1553 while(parent_xc->x != parent->width+1)
1554 parent_xc++;
1555 parent_xc++;
1556 prev_parent_xc= parent_xc;
7b49c309 1557 }else{
cbb1d2b1 1558 parent_xc= prev_parent_xc;
791e7b83
MN
1559 }
1560 }
1561 }
a0d1931c 1562
cbb1d2b1 1563 (xc++)->x= w+1; //end marker
a0d1931c
Y
1564 }
1565}
1566
1567static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1568 const int w= b->width;
62ab0b78 1569 int y;
f66e4f5f 1570 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
c97de57c 1571 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1572 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1573 int new_index = 0;
115329f1 1574
d593e329 1575 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
a0d1931c
Y
1576 qadd= 0;
1577 qmul= 1<<QEXPSHIFT;
1578 }
1579
1580 /* If we are on the second or later slice, restore our index. */
1581 if (start_y != 0)
1582 new_index = save_state[0];
1583
115329f1 1584
a0d1931c
Y
1585 for(y=start_y; y<h; y++){
1586 int x = 0;
1587 int v;
d593e329
MN
1588 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1589 memset(line, 0, b->width*sizeof(IDWTELEM));
a0d1931c
Y
1590 v = b->x_coeff[new_index].coeff;
1591 x = b->x_coeff[new_index++].x;
ef3dfbd4 1592 while(x < w){
538a3841
MN
1593 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1594 register int u= -(v&1);
1595 line[x] = (t^u) - u;
1596
a0d1931c
Y
1597 v = b->x_coeff[new_index].coeff;
1598 x = b->x_coeff[new_index++].x;
1599 }
791e7b83 1600 }
115329f1 1601
a0d1931c
Y
1602 /* Save our variables for the next slice. */
1603 save_state[0] = new_index;
115329f1 1604
a0d1931c 1605 return;
791e7b83
MN
1606}
1607
396a5e68 1608static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
791e7b83
MN
1609 int plane_index, level, orientation;
1610
19aa028d 1611 for(plane_index=0; plane_index<3; plane_index++){
4f90f33a 1612 for(level=0; level<MAX_DECOMPOSITIONS; level++){
791e7b83 1613 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1614 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1615 }
1616 }
1617 }
28869757
MN
1618 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1619 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1620}
1621
1622static int alloc_blocks(SnowContext *s){
1623 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1624 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 1625
155ec6ed
MN
1626 s->b_width = w;
1627 s->b_height= h;
115329f1 1628
dc7f45a0 1629 av_free(s->block);
155ec6ed
MN
1630 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1631 return 0;
1632}
1633
28869757
MN
1634static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1635 uint8_t *bytestream= d->bytestream;
1636 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1637 *d= *s;
28869757
MN
1638 d->bytestream= bytestream;
1639 d->bytestream_start= bytestream_start;
155ec6ed
MN
1640}
1641
1642//near copy & paste from dsputil, FIXME
1643static int pix_sum(uint8_t * pix, int line_size, int w)
1644{
1645 int s, i, j;
1646
1647 s = 0;
1648 for (i = 0; i < w; i++) {
1649 for (j = 0; j < w; j++) {
1650 s += pix[0];
1651 pix ++;
1652 }
1653 pix += line_size - w;
1654 }
1655 return s;
1656}
1657
1658//near copy & paste from dsputil, FIXME
1659static int pix_norm1(uint8_t * pix, int line_size, int w)
1660{
1661 int s, i, j;
1d503957 1662 uint32_t *sq = ff_squareTbl + 256;
155ec6ed
MN
1663
1664 s = 0;
1665 for (i = 0; i < w; i++) {
1666 for (j = 0; j < w; j ++) {
1667 s += sq[pix[0]];
1668 pix ++;
1669 }
1670 pix += line_size - w;
1671 }
1672 return s;
1673}
1674
8c36eaaa 1675static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
155ec6ed
MN
1676 const int w= s->b_width << s->block_max_depth;
1677 const int rem_depth= s->block_max_depth - level;
1678 const int index= (x + y*w) << rem_depth;
1679 const int block_w= 1<<rem_depth;
1680 BlockNode block;
1681 int i,j;
115329f1 1682
155ec6ed
MN
1683 block.color[0]= l;
1684 block.color[1]= cb;
1685 block.color[2]= cr;
1686 block.mx= mx;
1687 block.my= my;
8c36eaaa 1688 block.ref= ref;
155ec6ed
MN
1689 block.type= type;
1690 block.level= level;
1691
1692 for(j=0; j<block_w; j++){
1693 for(i=0; i<block_w; i++){
1694 s->block[index + i + j*w]= block;
1695 }
1696 }
1697}
1698
1699static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1700 const int offset[3]= {
1701 y*c-> stride + x,
1702 ((y*c->uvstride + x)>>1),
1703 ((y*c->uvstride + x)>>1),
1704 };
1705 int i;
1706 for(i=0; i<3; i++){
1707 c->src[0][i]= src [i];
1708 c->ref[0][i]= ref [i] + offset[i];
1709 }
1710 assert(!ref_index);
1711}
1712
85fc0e75 1713static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
aadcc5ce 1714 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
85fc0e75
LM
1715 if(s->ref_frames == 1){
1716 *mx = mid_pred(left->mx, top->mx, tr->mx);
1717 *my = mid_pred(left->my, top->my, tr->my);
1718 }else{
1719 const int *scale = scale_mv_ref[ref];
6884c36c
PI
1720 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1721 (top ->mx * scale[top ->ref] + 128) >>8,
1722 (tr ->mx * scale[tr ->ref] + 128) >>8);
1723 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1724 (top ->my * scale[top ->ref] + 128) >>8,
1725 (tr ->my * scale[tr ->ref] + 128) >>8);
85fc0e75
LM
1726 }
1727}
1728
155ec6ed
MN
1729//FIXME copy&paste
1730#define P_LEFT P[1]
1731#define P_TOP P[2]
1732#define P_TOPRIGHT P[3]
1733#define P_MEDIAN P[4]
1734#define P_MV1 P[9]
1735#define FLAG_QPEL 1 //must be 1
1736
1737static int encode_q_branch(SnowContext *s, int level, int x, int y){
1738 uint8_t p_buffer[1024];
1739 uint8_t i_buffer[1024];
1740 uint8_t p_state[sizeof(s->block_state)];
1741 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
1742 RangeCoder pc, ic;
1743 uint8_t *pbbak= s->c.bytestream;
1744 uint8_t *pbbak_start= s->c.bytestream_start;
1e6b5700 1745 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
155ec6ed
MN
1746 const int w= s->b_width << s->block_max_depth;
1747 const int h= s->b_height << s->block_max_depth;
1748 const int rem_depth= s->block_max_depth - level;
1749 const int index= (x + y*w) << rem_depth;
1750 const int block_w= 1<<(LOG2_MB_SIZE - level);
155ec6ed
MN
1751 int trx= (x+1)<<rem_depth;
1752 int try= (y+1)<<rem_depth;
aadcc5ce
PI
1753 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1754 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1755 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1756 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1757 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1758 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed
MN
1759 int pl = left->color[0];
1760 int pcb= left->color[1];
1761 int pcr= left->color[2];
85fc0e75 1762 int pmx, pmy;
155ec6ed 1763 int mx=0, my=0;
51d6a3cf 1764 int l,cr,cb;
155ec6ed
MN
1765 const int stride= s->current_picture.linesize[0];
1766 const int uvstride= s->current_picture.linesize[1];
51d6a3cf
MN
1767 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1768 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1769 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
155ec6ed
MN
1770 int P[10][2];
1771 int16_t last_mv[3][2];
1772 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1773 const int shift= 1+qpel;
1774 MotionEstContext *c= &s->m.me;
8c36eaaa 1775 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
1776 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1777 int my_context= av_log2(2*FFABS(left->my - top->my));
155ec6ed 1778 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
8c36eaaa 1779 int ref, best_ref, ref_score, ref_mx, ref_my;
155ec6ed
MN
1780
1781 assert(sizeof(s->block_state) >= 256);
1782 if(s->keyframe){
85fc0e75 1783 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
155ec6ed
MN
1784 return 0;
1785 }
1786
155ec6ed
MN
1787// clip predictors / edge ?
1788
1789 P_LEFT[0]= left->mx;
1790 P_LEFT[1]= left->my;
1791 P_TOP [0]= top->mx;
1792 P_TOP [1]= top->my;
1793 P_TOPRIGHT[0]= tr->mx;
1794 P_TOPRIGHT[1]= tr->my;
115329f1 1795
155ec6ed
MN
1796 last_mv[0][0]= s->block[index].mx;
1797 last_mv[0][1]= s->block[index].my;
1798 last_mv[1][0]= right->mx;
1799 last_mv[1][1]= right->my;
1800 last_mv[2][0]= bottom->mx;
1801 last_mv[2][1]= bottom->my;
115329f1 1802
155ec6ed 1803 s->m.mb_stride=2;
115329f1 1804 s->m.mb_x=
155ec6ed 1805 s->m.mb_y= 0;
e2158da8 1806 c->skip= 0;
155ec6ed 1807
e2158da8
PI
1808 assert(c-> stride == stride);
1809 assert(c->uvstride == uvstride);
115329f1 1810
155ec6ed
MN
1811 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1812 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1813 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1814 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
115329f1 1815
ff158dc9
MN
1816 c->xmin = - x*block_w - 16+2;
1817 c->ymin = - y*block_w - 16+2;
1818 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1819 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
1820
1821 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
115329f1 1822 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
155ec6ed
MN
1823 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1824 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1825 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1826 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1827 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1828
1829 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1830 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1831
1832 if (!y) {
1833 c->pred_x= P_LEFT[0];
1834 c->pred_y= P_LEFT[1];
1835 } else {
1836 c->pred_x = P_MEDIAN[0];
1837 c->pred_y = P_MEDIAN[1];
1838 }
1839
8c36eaaa
LM
1840 score= INT_MAX;
1841 best_ref= 0;
1842 for(ref=0; ref<s->ref_frames; ref++){
1843 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1844
1845 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1846 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
155ec6ed 1847
8c36eaaa
LM
1848 assert(ref_mx >= c->xmin);
1849 assert(ref_mx <= c->xmax);
1850 assert(ref_my >= c->ymin);
1851 assert(ref_my <= c->ymax);
115329f1 1852
e2158da8 1853 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
8c36eaaa
LM
1854 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1855 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1856 if(s->ref_mvs[ref]){
1857 s->ref_mvs[ref][index][0]= ref_mx;
1858 s->ref_mvs[ref][index][1]= ref_my;
1859 s->ref_scores[ref][index]= ref_score;
1860 }
1861 if(score > ref_score){
1862 score= ref_score;
1863 best_ref= ref;
1864 mx= ref_mx;
1865 my= ref_my;
1866 }
1867 }
755bfeab 1868 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
115329f1 1869
155ec6ed 1870 // subpel search
61d49d12 1871 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
155ec6ed 1872 pc= s->c;
28869757
MN
1873 pc.bytestream_start=
1874 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1875 memcpy(p_state, s->block_state, sizeof(s->block_state));
1876
1877 if(level!=s->block_max_depth)
28869757
MN
1878 put_rac(&pc, &p_state[4 + s_context], 1);
1879 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
8c36eaaa
LM
1880 if(s->ref_frames > 1)
1881 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
85fc0e75 1882 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
8c36eaaa
LM
1883 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1884 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
28869757 1885 p_len= pc.bytestream - pc.bytestream_start;
1e6b5700 1886 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1887
1888 block_s= block_w*block_w;
51d6a3cf 1889 sum = pix_sum(current_data[0], stride, block_w);
155ec6ed 1890 l= (sum + block_s/2)/block_s;
51d6a3cf 1891 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
115329f1 1892
155ec6ed 1893 block_s= block_w*block_w>>2;
51d6a3cf 1894 sum = pix_sum(current_data[1], uvstride, block_w>>1);
155ec6ed
MN
1895 cb= (sum + block_s/2)/block_s;
1896// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
51d6a3cf 1897 sum = pix_sum(current_data[2], uvstride, block_w>>1);
155ec6ed
MN
1898 cr= (sum + block_s/2)/block_s;
1899// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1900
1901 ic= s->c;
28869757
MN
1902 ic.bytestream_start=
1903 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1904 memcpy(i_state, s->block_state, sizeof(s->block_state));
1905 if(level!=s->block_max_depth)
28869757
MN
1906 put_rac(&ic, &i_state[4 + s_context], 1);
1907 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
1908 put_symbol(&ic, &i_state[32], l-pl , 1);
1909 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1910 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757 1911 i_len= ic.bytestream - ic.bytestream_start;
1e6b5700 1912 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1913
1914// assert(score==256*256*256*64-1);
1915 assert(iscore < 255*255*256 + s->lambda2*10);
1916 assert(iscore >= 0);
1917 assert(l>=0 && l<=255);
1918 assert(pl>=0 && pl<=255);
1919
1920 if(level==0){
1921 int varc= iscore >> 8;
1922 int vard= score >> 8;
1923 if (vard <= 64 || vard < varc)
1924 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1925 else
1926 c->scene_change_score+= s->m.qscale;
1927 }
115329f1 1928
155ec6ed 1929 if(level!=s->block_max_depth){
28869757 1930 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
1931 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1932 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1933 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1934 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1935 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
115329f1 1936
155ec6ed
MN
1937 if(score2 < score && score2 < iscore)
1938 return score2;
1939 }
115329f1 1940
155ec6ed 1941 if(iscore < score){
85fc0e75 1942 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
28869757 1943 memcpy(pbbak, i_buffer, i_len);
155ec6ed 1944 s->c= ic;
28869757
MN
1945 s->c.bytestream_start= pbbak_start;
1946 s->c.bytestream= pbbak + i_len;
8c36eaaa 1947 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
155ec6ed
MN
1948 memcpy(s->block_state, i_state, sizeof(s->block_state));
1949 return iscore;
1950 }else{
28869757 1951 memcpy(pbbak, p_buffer, p_len);
155ec6ed 1952 s->c= pc;
28869757
MN
1953 s->c.bytestream_start= pbbak_start;
1954 s->c.bytestream= pbbak + p_len;
8c36eaaa 1955 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
155ec6ed
MN
1956 memcpy(s->block_state, p_state, sizeof(s->block_state));
1957 return score;
1958 }
1959}
1960
849f1035 1961static av_always_inline int same_block(BlockNode *a, BlockNode *b){
51d6a3cf
MN
1962 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1963 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1964 }else{
8c36eaaa 1965 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
51d6a3cf
MN
1966 }
1967}
1968
1969static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1970 const int w= s->b_width << s->block_max_depth;
1971 const int rem_depth= s->block_max_depth - level;
1972 const int index= (x + y*w) << rem_depth;
1973 int trx= (x+1)<<rem_depth;
1974 BlockNode *b= &s->block[index];
aadcc5ce
PI
1975 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1976 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1977 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1978 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
51d6a3cf
MN
1979 int pl = left->color[0];
1980 int pcb= left->color[1];
1981 int pcr= left->color[2];
85fc0e75 1982 int pmx, pmy;
8c36eaaa 1983 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
1984 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1985 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
51d6a3cf
MN
1986 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1987
1988 if(s->keyframe){
85fc0e75 1989 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
51d6a3cf
MN
1990 return;
1991 }
1992
1993 if(level!=s->block_max_depth){
1994 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
d6f41eed
MN
1995 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1996 }else{
51d6a3cf
MN
1997 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1998 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1999 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2000 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2001 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2002 return;
51d6a3cf
MN
2003 }
2004 }
2005 if(b->type & BLOCK_INTRA){
85fc0e75 2006 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
51d6a3cf
MN
2007 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2008 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2009 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2010 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
8c36eaaa 2011 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
51d6a3cf 2012 }else{
85fc0e75 2013 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
51d6a3cf 2014 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
8c36eaaa
LM
2015 if(s->ref_frames > 1)
2016 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
51d6a3cf
MN
2017 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2018 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
8c36eaaa 2019 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
51d6a3cf
MN
2020 }
2021}
2022
155ec6ed
MN
2023static void decode_q_branch(SnowContext *s, int level, int x, int y){
2024 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2025 const int rem_depth= s->block_max_depth - level;
2026 const int index= (x + y*w) << rem_depth;
155ec6ed 2027 int trx= (x+1)<<rem_depth;
aadcc5ce
PI
2028 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2029 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2030 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2031 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed 2032 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 2033
155ec6ed 2034 if(s->keyframe){
8c36eaaa 2035 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
155ec6ed
MN
2036 return;
2037 }
2038
28869757 2039 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1e90b34f 2040 int type, mx, my;
155ec6ed
MN
2041 int l = left->color[0];
2042 int cb= left->color[1];
2043 int cr= left->color[2];
8c36eaaa
LM
2044 int ref = 0;
2045 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
2046 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2047 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
115329f1 2048
28869757 2049 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2050
2051 if(type){
85fc0e75 2052 pred_mv(s, &mx, &my, 0, left, top, tr);
155ec6ed
MN
2053 l += get_symbol(&s->c, &s->block_state[32], 1);
2054 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2055 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2056 }else{
8c36eaaa
LM
2057 if(s->ref_frames > 1)
2058 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
85fc0e75 2059 pred_mv(s, &mx, &my, ref, left, top, tr);
8c36eaaa
LM
2060 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2061 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
155ec6ed 2062 }
8c36eaaa 2063 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
155ec6ed
MN
2064 }else{
2065 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2066 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2067 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2068 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2069 }
2070}
2071
74e6a8aa 2072static void encode_blocks(SnowContext *s, int search){
155ec6ed
MN
2073 int x, y;
2074 int w= s->b_width;
2075 int h= s->b_height;
2076
74e6a8aa 2077 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
51d6a3cf
MN
2078 iterative_me(s);
2079
155ec6ed 2080 for(y=0; y<h; y++){
d06c75a8 2081 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2082 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2083 return;
2084 }
155ec6ed 2085 for(x=0; x<w; x++){
74e6a8aa 2086 if(s->avctx->me_method == ME_ITER || !search)
51d6a3cf
MN
2087 encode_q_branch2(s, 0, x, y);
2088 else
2089 encode_q_branch (s, 0, x, y);
155ec6ed
MN
2090 }
2091 }
2092}
2093
2094static void decode_blocks(SnowContext *s){
2095 int x, y;
2096 int w= s->b_width;
2097 int h= s->b_height;
2098
2099 for(y=0; y<h; y++){
2100 for(x=0; x<w; x++){
2101 decode_q_branch(s, 0, x, y);
2102 }
2103 }
791e7b83
MN
2104}
2105
7d7f57d9 2106static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
7942269b 2107 static const uint8_t weight[64]={
a68ca08e
MN
2108 8,7,6,5,4,3,2,1,
2109 7,7,0,0,0,0,0,1,
2110 6,0,6,0,0,0,2,0,
2111 5,0,0,5,0,3,0,0,
2112 4,0,0,0,4,0,0,0,
2113 3,0,0,5,0,3,0,0,
2114 2,0,6,0,0,0,2,0,
2115 1,7,0,0,0,0,0,1,
2116 };
2117
7942269b 2118 static const uint8_t brane[256]={
a68ca08e
MN
2119 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2120 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2121 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2122 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2123 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2124 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2125 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2126 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2127 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2128 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2129 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2130 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2131 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2132 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2133 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2134 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2135 };
2136
7942269b 2137 static const uint8_t needs[16]={
a68ca08e
MN
2138 0,1,0,0,
2139 2,4,2,0,
2140 0,1,0,0,
2141 15
2142 };
2143
2144 int x, y, b, r, l;
61d6e445
MN
2145 int16_t tmpIt [64*(32+HTAPS_MAX)];
2146 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
a68ca08e
MN
2147 int16_t *tmpI= tmpIt;
2148 uint8_t *tmp2= tmp2t[0];
f0a70840 2149 const uint8_t *hpel[11];
a68ca08e
MN
2150 assert(dx<16 && dy<16);
2151 r= brane[dx + 16*dy]&15;
2152 l= brane[dx + 16*dy]>>4;
2153
2154 b= needs[l] | needs[r];
7d7f57d9
MN
2155 if(p && !p->diag_mc)
2156 b= 15;
a68ca08e
MN
2157
2158 if(b&5){
61d6e445 2159 for(y=0; y < b_h+HTAPS_MAX-1; y++){
65dc0f53 2160 for(x=0; x < b_w; x++){
61d6e445
MN
2161 int a_1=src[x + HTAPS_MAX/2-4];
2162 int a0= src[x + HTAPS_MAX/2-3];
2163 int a1= src[x + HTAPS_MAX/2-2];
2164 int a2= src[x + HTAPS_MAX/2-1];
2165 int a3= src[x + HTAPS_MAX/2+0];
2166 int a4= src[x + HTAPS_MAX/2+1];
2167 int a5= src[x + HTAPS_MAX/2+2];
2168 int a6= src[x + HTAPS_MAX/2+3];
7d7f57d9
MN
2169 int am=0;
2170 if(!p || p->fast_mc){
2171 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2172 tmpI[x]= am;
2173 am= (am+16)>>5;
2174 }else{
2175 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2176 tmpI[x]= am;
2177 am= (am+32)>>6;
2178 }
791e7b83 2179
65dc0f53
MN
2180 if(am&(~255)) am= ~(am>>31);
2181 tmp2[x]= am;
2182 }
2183 tmpI+= 64;
2184 tmp2+= stride;
2185 src += stride;
791e7b83 2186 }
65dc0f53 2187 src -= stride*y;
a68ca08e 2188 }
61d6e445 2189 src += HTAPS_MAX/2 - 1;
a68ca08e 2190 tmp2= tmp2t[1];
115329f1 2191
a68ca08e 2192 if(b&2){
65dc0f53
MN
2193 for(y=0; y < b_h; y++){
2194 for(x=0; x < b_w+1; x++){
61d6e445
MN
2195 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2196 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2197 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2198 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2199 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2200 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2201 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2202 int a6= src[x + (HTAPS_MAX/2+3)*stride];
7d7f57d9
MN
2203 int am=0;
2204 if(!p || p->fast_mc)
2205 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2206 else
2207 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
791e7b83 2208
65dc0f53
MN
2209 if(am&(~255)) am= ~(am>>31);
2210 tmp2[x]= am;
2211 }
2212 src += stride;
2213 tmp2+= stride;
a68ca08e 2214 }
65dc0f53 2215 src -= stride*y;
a68ca08e 2216 }
61d6e445 2217 src += stride*(HTAPS_MAX/2 - 1);
a68ca08e
MN
2218 tmp2= tmp2t[2];
2219 tmpI= tmpIt;
2220 if(b&4){
2221 for(y=0; y < b_h; y++){
2222 for(x=0; x < b_w; x++){
61d6e445
MN
2223 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2224 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2225 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2226 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2227 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2228 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2229 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2230 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
7d7f57d9
MN
2231 int am=0;
2232 if(!p || p->fast_mc)
2233 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2234 else
2235 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
a68ca08e
MN
2236 if(am&(~255)) am= ~(am>>31);
2237 tmp2[x]= am;
2238 }
2239 tmpI+= 64;
2240 tmp2+= stride;
2241 }
2242 }
115329f1 2243
a68ca08e 2244 hpel[ 0]= src;
61d6e445 2245 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
a68ca08e
MN
2246 hpel[ 2]= src + 1;
2247
2248 hpel[ 4]= tmp2t[1];
2249 hpel[ 5]= tmp2t[2];
2250 hpel[ 6]= tmp2t[1] + 1;
2251
2252 hpel[ 8]= src + stride;
2253 hpel[ 9]= hpel[1] + stride;
2254 hpel[10]= hpel[8] + 1;
2255
2256 if(b==15){
f0a70840
BC
2257 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2258 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2259 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2260 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
a68ca08e
MN
2261 dx&=7;
2262 dy&=7;
2263 for(y=0; y < b_h; y++){
2264 for(x=0; x < b_w; x++){
2265 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2266 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2267 }
2268 src1+=stride;
2269 src2+=stride;
2270 src3+=stride;
2271 src4+=stride;
2272 dst +=stride;
2273 }
2274 }else{
f0a70840
BC
2275 const uint8_t *src1= hpel[l];
2276 const uint8_t *src2= hpel[r];
a68ca08e
MN
2277 int a= weight[((dx&7) + (8*(dy&7)))];
2278 int b= 8-a;
2279 for(y=0; y < b_h; y++){
2280 for(x=0; x < b_w; x++){
2281 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2282 }
2283 src1+=stride;
2284 src2+=stride;
2285 dst +=stride;
791e7b83
MN
2286 }
2287 }
2288}
2289
791e7b83 2290#define mca(dx,dy,b_w)\
bad700e3 2291static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
61d6e445 2292 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
791e7b83 2293 assert(h==b_w);\
61d6e445 2294 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
791e7b83
MN
2295}
2296
2297mca( 0, 0,16)
2298mca( 8, 0,16)
2299mca( 0, 8,16)
2300mca( 8, 8,16)
d92b5807
MN
2301mca( 0, 0,8)
2302mca( 8, 0,8)
2303mca( 0, 8,8)
2304mca( 8, 8,8)
791e7b83 2305
8c36eaaa 2306static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 2307 if(block->type & BLOCK_INTRA){
ff158dc9 2308 int x, y;
2692ceab
MN
2309 const int color = block->color[plane_index];
2310 const int color4= color*0x01010101;
1015631b
LM
2311 if(b_w==32){
2312 for(y=0; y < b_h; y++){
2313 *(uint32_t*)&dst[0 + y*stride]= color4;
2314 *(uint32_t*)&dst[4 + y*stride]= color4;
2315 *(uint32_t*)&dst[8 + y*stride]= color4;
2316 *(uint32_t*)&dst[12+ y*stride]= color4;
2317 *(uint32_t*)&dst[16+ y*stride]= color4;
2318 *(uint32_t*)&dst[20+ y*stride]= color4;
2319 *(uint32_t*)&dst[24+ y*stride]= color4;
2320 *(uint32_t*)&dst[28+ y*stride]= color4;
2321 }
2322 }else if(b_w==16){
2692ceab
MN
2323 for(y=0; y < b_h; y++){
2324 *(uint32_t*)&dst[0 + y*stride]= color4;
2325 *(uint32_t*)&dst[4 + y*stride]= color4;
2326 *(uint32_t*)&dst[8 + y*stride]= color4;
2327 *(uint32_t*)&dst[12+ y*stride]= color4;
2328 }
2329 }else if(b_w==8){
2330 for(y=0; y < b_h; y++){
2331 *(uint32_t*)&dst[0 + y*stride]= color4;
2332 *(uint32_t*)&dst[4 + y*stride]= color4;
2333 }
2334 }else if(b_w==4){
2335 for(y=0; y < b_h; y++){
2336 *(uint32_t*)&dst[0 + y*stride]= color4;
2337 }
2338 }else{
2339 for(y=0; y < b_h; y++){
2340 for(x=0; x < b_w; x++){
2341 dst[x + y*stride]= color;
2342 }
ff158dc9
MN
2343 }
2344 }
2345 }else{
8c36eaaa 2346 uint8_t *src= s->last_picture[block->ref].data[plane_index];
ff158dc9
MN
2347 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2348 int mx= block->mx*scale;
2349 int my= block->my*scale;
ec697587
MN
2350 const int dx= mx&15;
2351 const int dy= my&15;
80e44bc3 2352 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
61d6e445
MN
2353 sx += (mx>>4) - (HTAPS_MAX/2-1);
2354 sy += (my>>4) - (HTAPS_MAX/2-1);
ff158dc9 2355 src += sx + sy*stride;
61d6e445
MN
2356 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2357 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2358 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
ff158dc9
MN
2359 src= tmp + MB_SIZE;
2360 }
87f20c2f
MN
2361// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2362// assert(!(b_w&(b_w-1)));
2692ceab 2363 assert(b_w>1 && b_h>1);
89438028 2364 assert((tab_index>=0 && tab_index<4) || b_w==32);
7d7f57d9
MN
2365 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2366 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1015631b
LM
2367 else if(b_w==32){
2368 int y;
2369 for(y=0; y<b_h; y+=16){
7d7f57d9
MN
2370 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2371 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1015631b
LM
2372 }
2373 }else if(b_w==b_h)
7d7f57d9 2374 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2692ceab 2375 else if(b_w==2*b_h){
7d7f57d9
MN
2376 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2377 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2692ceab
MN
2378 }else{
2379 assert(2*b_w==b_h);
7d7f57d9
MN
2380 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2381 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2692ceab 2382 }
ff158dc9
MN
2383 }
2384}
2385
9dd6c804 2386void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
059715a4
RE
2387 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2388 int y, x;
d593e329 2389 IDWTELEM * dst;
059715a4 2390 for(y=0; y<b_h; y++){
19032450 2391 //FIXME ugly misuse of obmc_stride
9dd6c804
PI
2392 const uint8_t *obmc1= obmc + y*obmc_stride;
2393 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2394 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2395 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
059715a4
RE
2396 dst = slice_buffer_get_line(sb, src_y + y);
2397 for(x=0; x<b_w; x++){
2398 int v= obmc1[x] * block[3][x + y*src_stride]
2399 +obmc2[x] * block[2][x + y*src_stride]
2400 +obmc3[x] * block[1][x + y*src_stride]
2401 +obmc4[x] * block[0][x + y*src_stride];
2402
2403 v <<= 8 - LOG2_OBMC_MAX;
2404 if(FRAC_BITS != 8){
059715a4
RE
2405 v >>= 8 - FRAC_BITS;
2406 }
2407 if(add){
2408 v += dst[x + src_x];
2409 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2410 if(v&(~255)) v= ~(v>>31);
2411 dst8[x + y*src_stride] = v;
2412 }else{
2413 dst[x + src_x] -= v;
2414 }
2415 }
2416 }
2417}
2418
e6464f8b 2419//FIXME name cleanup (b_w, block_w, b_width stuff)
d593e329 2420static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
a0d1931c
Y
2421 const int b_width = s->b_width << s->block_max_depth;
2422 const int b_height= s->b_height << s->block_max_depth;
2423 const int b_stride= b_width;
2424 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2425 BlockNode *rt= lt+1;
2426 BlockNode *lb= lt+b_stride;
2427 BlockNode *rb= lb+1;
115329f1 2428 uint8_t *block[4];
cc884a35 2429 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
bd2b6b33 2430 uint8_t *tmp = s->scratchbuf;
cc884a35 2431 uint8_t *ptmp;
a0d1931c
Y
2432 int x,y;
2433
2434 if(b_x<0){
2435 lt= rt;
2436 lb= rb;
2437 }else if(b_x + 1 >= b_width){
2438 rt= lt;
2439 rb= lb;
2440 }
2441 if(b_y<0){
2442 lt= lb;
2443 rt= rb;
2444 }else if(b_y + 1 >= b_height){
2445 lb= lt;
2446 rb= rt;
2447 }
115329f1 2448
e6464f8b 2449 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
a0d1931c
Y
2450 obmc -= src_x;
2451 b_w += src_x;
f7e89c73 2452 if(!sliced && !offset_dst)
1015631b 2453 dst -= src_x;
ff158dc9
MN
2454 src_x=0;
2455 }else if(src_x + b_w > w){
2456 b_w = w - src_x;
2457 }
2458 if(src_y<0){
2459 obmc -= src_y*obmc_stride;
2460 b_h += src_y;
f7e89c73 2461 if(!sliced && !offset_dst)
1015631b 2462 dst -= src_y*dst_stride;
ff158dc9
MN
2463 src_y=0;
2464 }else if(src_y + b_h> h){
2465 b_h = h - src_y;
791e7b83 2466 }
115329f1 2467
ff158dc9 2468 if(b_w<=0 || b_h<=0) return;
155ec6ed 2469
94ae6788
DB
2470 assert(src_stride > 2*MB_SIZE + 5);
2471
f7e89c73 2472 if(!sliced && offset_dst)
1015631b 2473 dst += src_x + src_y*dst_stride;
715a97f0 2474 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2475// src += src_x + src_y*src_stride;
2476
cc884a35
MN
2477 ptmp= tmp + 3*tmp_step;
2478 block[0]= ptmp;
2479 ptmp+=tmp_step;
8c36eaaa 2480 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
2481
2482 if(same_block(lt, rt)){
2483 block[1]= block[0];
791e7b83 2484 }else{
cc884a35
MN
2485 block[1]= ptmp;
2486 ptmp+=tmp_step;
8c36eaaa 2487 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
ff158dc9 2488 }
115329f1 2489
ff158dc9
MN
2490 if(same_block(lt, lb)){
2491 block[2]= block[0];
2492 }else if(same_block(rt, lb)){
2493 block[2]= block[1];
2494 }else{
cc884a35
MN
2495 block[2]= ptmp;
2496 ptmp+=tmp_step;
8c36eaaa 2497 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
ff158dc9 2498 }
791e7b83 2499
ff158dc9
MN
2500 if(same_block(lt, rb) ){
2501 block[3]= block[0];
2502 }else if(same_block(rt, rb)){
2503 block[3]= block[1];
2504 }else if(same_block(lb, rb)){
2505 block[3]= block[2];
2506 }else{
cc884a35 2507 block[3]= ptmp;
8c36eaaa 2508 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
ff158dc9
MN
2509 }
2510#if 0
2511 for(y=0; y<b_h; y++){
2512 for(x=0; x<b_w; x++){
2513 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2514 if(add) dst[x + y*dst_stride] += v;
2515 else dst[x + y*dst_stride] -= v;
2516 }
2517 }
2518 for(y=0; y<b_h; y++){
2519 uint8_t *obmc2= obmc + (obmc_stride>>1);
2520 for(x=0; x<b_w; x++){
2521 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2522 if(add) dst[x + y*dst_stride] += v;
2523 else dst[x + y*dst_stride] -= v;
2524 }
2525 }
2526 for(y=0; y<b_h; y++){
2527 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2528 for(x=0; x<b_w; x++){
2529 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2530 if(add) dst[x + y*dst_stride] += v;
2531 else dst[x + y*dst_stride] -= v;
2532 }
2533 }
2534 for(y=0; y<b_h; y++){
2535 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2536 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2537 for(x=0; x<b_w; x++){
2538 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2539 if(add) dst[x + y*dst_stride] += v;
2540 else dst[x + y*dst_stride] -= v;
2541 }
2542 }
2543#else
f7e89c73 2544 if(sliced){
f7e89c73 2545 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
e73e4e75
DB
2546 }else{
2547 for(y=0; y<b_h; y++){
2548 //FIXME ugly misuse of obmc_stride
2549 const uint8_t *obmc1= obmc + y*obmc_stride;
2550 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2551 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2552 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2553 for(x=0; x<b_w; x++){
2554 int v= obmc1[x] * block[3][x + y*src_stride]
2555 +obmc2[x] * block[2][x + y*src_stride]
2556 +obmc3[x] * block[1][x + y*src_stride]
2557 +obmc4[x] * block[0][x + y*src_stride];
2558
2559 v <<= 8 - LOG2_OBMC_MAX;
2560 if(FRAC_BITS != 8){
2561 v >>= 8 - FRAC_BITS;
2562 }
2563 if(add){
2564 v += dst[x + y*dst_stride];
2565 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2566 if(v&(~255)) v= ~(v>>31);
2567 dst8[x + y*src_stride] = v;
2568 }else{
2569 dst[x + y*dst_stride] -= v;
2570 }
715a97f0 2571 }
791e7b83
MN
2572 }
2573 }
96e2fbf2 2574#endif /* 0 */
791e7b83
MN
2575}
2576
d593e329 2577static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
a0d1931c
Y
2578 Plane *p= &s->plane[plane_index];
2579 const int mb_w= s->b_width << s->block_max_depth;
2580 const int mb_h= s->b_height << s->block_max_depth;
2581 int x, y, mb_x;
2582 int block_size = MB_SIZE >> s->block_max_depth;
2583 int block_w = plane_index ? block_size/2 : block_size;
2584 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2585 int obmc_stride= plane_index ? block_size : 2*block_size;
2586 int ref_stride= s->current_picture.linesize[plane_index];
a0d1931c
Y
2587 uint8_t *dst8= s->current_picture.data[plane_index];
2588 int w= p->width;
2589 int h= p->height;
115329f1 2590
a0d1931c
Y
2591 if(s->keyframe || (s->avctx->debug&512)){
2592 if(mb_y==mb_h)
2593 return;
2594
2595 if(add){
ef3dfbd4 2596 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
a0d1931c 2597// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 2598 IDWTELEM * line = sb->line[y];
ef3dfbd4 2599 for(x=0; x<w; x++){
a0d1931c
Y
2600// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2601 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2602 v >>= FRAC_BITS;
2603 if(v&(~255)) v= ~(v>>31);
2604 dst8[x + y*ref_stride]= v;
2605 }
2606 }
2607 }else{
ef3dfbd4 2608 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
a0d1931c 2609// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 2610 IDWTELEM * line = sb->line[y];
ef3dfbd4 2611 for(x=0; x<w; x++){
a0d1931c
Y
2612 line[x] -= 128 << FRAC_BITS;
2613// buf[x + y*w]-= 128<<FRAC_BITS;
2614 }
2615 }
2616 }
2617
2618 return;
2619 }
115329f1 2620
e73e4e75
DB
2621 for(mb_x=0; mb_x<=mb_w; mb_x++){
2622 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2623 block_w*mb_x - block_w/2,
2624 block_w*mb_y - block_w/2,
2625 block_w, block_w,
2626 w, h,
2627 w, ref_stride, obmc_stride,
2628 mb_x - 1, mb_y - 1,
2629 add, 0, plane_index);
2630 }
a0d1931c
Y
2631}
2632
d593e329 2633static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2634 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2635 const int mb_w= s->b_width << s->block_max_depth;
2636 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2637 int x, y, mb_x;
155ec6ed
MN
2638 int block_size = MB_SIZE >> s->block_max_depth;
2639 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2640 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 2641 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2642 int ref_stride= s->current_picture.linesize[plane_index];
715a97f0 2643 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2644 int w= p->width;
2645 int h= p->height;
115329f1 2646
ff158dc9 2647 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2648 if(mb_y==mb_h)
2649 return;
2650
715a97f0 2651 if(add){
86e59cc0 2652 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2653 for(x=0; x<w; x++){
2654 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2655 v >>= FRAC_BITS;
2656 if(v&(~255)) v= ~(v>>31);
2657 dst8[x + y*ref_stride]= v;
2658 }
2659 }
2660 }else{
86e59cc0 2661 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2662 for(x=0; x<w; x++){
2663 buf[x + y*w]-= 128<<FRAC_BITS;
2664 }
ff158dc9 2665 }
791e7b83 2666 }
ff158dc9
MN
2667
2668 return;
791e7b83 2669 }
115329f1 2670
94ae6788
DB
2671 for(mb_x=0; mb_x<=mb_w; mb_x++){
2672 add_yblock(s, 0, NULL, buf, dst8, obmc,
2673 block_w*mb_x - block_w/2,
2674 block_w*mb_y - block_w/2,
2675 block_w, block_w,
2676 w, h,
2677 w, ref_stride, obmc_stride,
2678 mb_x - 1, mb_y - 1,
2679 add, 1, plane_index);
2680 }
f9e6ebf7
LM
2681}
2682
d593e329 2683static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
f9e6ebf7
LM
2684 const int mb_h= s->b_height << s->block_max_depth;
2685 int mb_y;
2686 for(mb_y=0; mb_y<=mb_h; mb_y++)
2687 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2688}
2689
51d6a3cf
MN
2690static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2691 int i, x2, y2;
2692 Plane *p= &s->plane[plane_index];
2693 const int block_size = MB_SIZE >> s->block_max_depth;
2694 const int block_w = plane_index ? block_size/2 : block_size;
2695 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2696 const int obmc_stride= plane_index ? block_size : 2*block_size;
2697 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2698 uint8_t *src= s-> input_picture.data[plane_index];
d593e329 2699 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
51d6a3cf
MN
2700 const int b_stride = s->b_width << s->block_max_depth;
2701 const int w= p->width;
2702 const int h= p->height;
2703 int index= mb_x + mb_y*b_stride;
2704 BlockNode *b= &s->block[index];
2705 BlockNode backup= *b;
2706 int ab=0;
2707 int aa=0;
2708
2709 b->type|= BLOCK_INTRA;
2710 b->color[plane_index]= 0;
d593e329 2711 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
51d6a3cf
MN
2712
2713 for(i=0; i<4; i++){
2714 int mb_x2= mb_x + (i &1) - 1;
2715 int mb_y2= mb_y + (i>>1) - 1;
2716 int x= block_w*mb_x2 + block_w/2;
2717 int y= block_w*mb_y2 + block_w/2;
2718
f7e89c73 2719 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
1015631b 2720 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
51d6a3cf
MN
2721
2722 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2723 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2724 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2725 int obmc_v= obmc[index];
1015631b 2726 int d;
51d6a3cf
MN
2727 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2728 if(x<0) obmc_v += obmc[index + block_w];
2729 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2730 if(x+block_w>w) obmc_v += obmc[index - block_w];
e6464f8b 2731 //FIXME precalculate this or simplify it somehow else
51d6a3cf 2732
1015631b
LM
2733 d = -dst[index] + (1<<(FRAC_BITS-1));
2734 dst[index] = d;
2735 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
e6464f8b 2736 aa += obmc_v * obmc_v; //FIXME precalculate this
51d6a3cf
MN
2737 }
2738 }
2739 }
2740 *b= backup;
2741
755bfeab 2742 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
51d6a3cf
MN
2743}
2744
b104969f
LM
2745static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2746 const int b_stride = s->b_width << s->block_max_depth;
2747 const int b_height = s->b_height<< s->block_max_depth;
2748 int index= x + y*b_stride;
aadcc5ce
PI
2749 const BlockNode *b = &s->block[index];
2750 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2751 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2752 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2753 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
b104969f 2754 int dmx, dmy;
c26abfa5
DB
2755// int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2756// int my_context= av_log2(2*FFABS(left->my - top->my));
b104969f
LM
2757
2758 if(x<0 || x>=b_stride || y>=b_height)
2759 return 0;
b104969f
LM
2760/*
27611 0 0
276201X 1-2 1
2763001XX 3-6 2-3
27640001XXX 7-14 4-7
276500001XXXX 15-30 8-15
2766*/
2767//FIXME try accurate rate
e6464f8b 2768//FIXME intra and inter predictors if surrounding blocks are not the same type
b104969f 2769 if(b->type & BLOCK_INTRA){
c26abfa5
DB
2770 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2771 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2772 + av_log2(2*FFABS(left->color[2] - b->color[2])));
85fc0e75
LM
2773 }else{
2774 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2775 dmx-= b->mx;
2776 dmy-= b->my;
c26abfa5
DB
2777 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2778 + av_log2(2*FFABS(dmy))
8c36eaaa 2779 + av_log2(2*b->ref));
85fc0e75 2780 }
b104969f
LM
2781}
2782
1015631b 2783static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
51d6a3cf
MN
2784 Plane *p= &s->plane[plane_index];
2785 const int block_size = MB_SIZE >> s->block_max_depth;
2786 const int block_w = plane_index ? block_size/2 : block_size;
51d6a3cf
MN
2787 const int obmc_stride= plane_index ? block_size : 2*block_size;
2788 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2789 uint8_t *dst= s->current_picture.data[plane_index];
1015631b 2790 uint8_t *src= s-> input_picture.data[plane_index];
d593e329 2791 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
bd2b6b33 2792 uint8_t *cur = s->scratchbuf;
61d6e445 2793 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
51d6a3cf
MN
2794 const int b_stride = s->b_width << s->block_max_depth;
2795 const int b_height = s->b_height<< s->block_max_depth;
2796 const int w= p->width;
2797 const int h= p->height;
1015631b 2798 int distortion;
51d6a3cf
MN
2799 int rate= 0;
2800 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
1015631b
LM
2801 int sx= block_w*mb_x - block_w/2;
2802 int sy= block_w*mb_y - block_w/2;
561a18d3
RE
2803 int x0= FFMAX(0,-sx);
2804 int y0= FFMAX(0,-sy);
2805 int x1= FFMIN(block_w*2, w-sx);
2806 int y1= FFMIN(block_w*2, h-sy);
1015631b
LM
2807 int i,x,y;
2808
8c36eaaa 2809 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
1015631b
LM
2810
2811 for(y=y0; y<y1; y++){
2812 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
d593e329 2813 const IDWTELEM *pred1 = pred + y*obmc_stride;
1015631b
LM
2814 uint8_t *cur1 = cur + y*ref_stride;
2815 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2816 for(x=x0; x<x1; x++){
d593e329 2817#if FRAC_BITS >= LOG2_OBMC_MAX
1015631b 2818 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
d593e329
MN
2819#else
2820 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2821#endif
1015631b
LM
2822 v = (v + pred1[x]) >> FRAC_BITS;
2823 if(v&(~255)) v= ~(v>>31);
2824 dst1[x] = v;
51d6a3cf 2825 }
1015631b 2826 }
51d6a3cf 2827
561a18d3
RE
2828 /* copy the regions where obmc[] = (uint8_t)256 */
2829 if(LOG2_OBMC_MAX == 8
2830 && (mb_x == 0 || mb_x == b_stride-1)
2831 && (mb_y == 0 || mb_y == b_height-1)){
2832 if(mb_x == 0)
2833 x1 = block_w;
2834 else
2835 x0 = block_w;
2836 if(mb_y == 0)
2837 y1 = block_w;
2838 else
2839 y0 = block_w;
2840 for(y=y0; y<y1; y++)
2841 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2842 }
2843
1015631b 2844 if(block_w==16){
871371a7
LM
2845 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2846 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
e6464f8b
DB
2847 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2848 * So improving the score of one block is not strictly guaranteed
2849 * to improve the score of the whole frame, thus iterative motion
2850 * estimation does not always converge. */
871371a7 2851 if(s->avctx->me_cmp == FF_CMP_W97)
486497e0 2852 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
871371a7 2853 else if(s->avctx->me_cmp == FF_CMP_W53)
486497e0 2854 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
871371a7
LM
2855 else{
2856 distortion = 0;
2857 for(i=0; i<4; i++){
2858 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2859 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2860 }
1015631b
LM
2861 }
2862 }else{
2863 assert(block_w==8);
2864 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
51d6a3cf
MN
2865 }
2866
2867 if(plane_index==0){
2868 for(i=0; i<4; i++){
2869/* ..RRr
2870 * .RXx.
2871 * rxx..
2872 */
b104969f
LM
2873 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2874 }
48d1b9a1
LM
2875 if(mb_x == b_stride-2)
2876 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
b104969f
LM
2877 }
2878 return distortion + rate*penalty_factor;
2879}
2880
2881static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2882 int i, y2;
2883 Plane *p= &s->plane[plane_index];
2884 const int block_size = MB_SIZE >> s->block_max_depth;
2885 const int block_w = plane_index ? block_size/2 : block_size;
2886 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2887 const int obmc_stride= plane_index ? block_size : 2*block_size;
2888 const int ref_stride= s->current_picture.linesize[plane_index];
b104969f
LM
2889 uint8_t *dst= s->current_picture.data[plane_index];
2890 uint8_t *src= s-> input_picture.data[plane_index];
b5a33ff1
MN
2891 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2892 // const has only been removed from zero_dst to suppress a warning
2893 static IDWTELEM zero_dst[4096]; //FIXME
b104969f 2894 const int b_stride = s->b_width << s->block_max_depth;
b104969f
LM
2895 const int w= p->width;
2896 const int h= p->height;
2897 int distortion= 0;
2898 int rate= 0;
2899 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2900
2901 for(i=0; i<9; i++){
2902 int mb_x2= mb_x + (i%3) - 1;
2903 int mb_y2= mb_y + (i/3) - 1;
2904 int x= block_w*mb_x2 + block_w/2;
2905 int y= block_w*mb_y2 + block_w/2;
2906
f7e89c73 2907 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
b104969f
LM
2908 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2909
2910 //FIXME find a cleaner/simpler way to skip the outside stuff
2911 for(y2= y; y2<0; y2++)
2912 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2913 for(y2= h; y2<y+block_w; y2++)
2914 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2915 if(x<0){
2916 for(y2= y; y2<y+block_w; y2++)
2917 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
51d6a3cf 2918 }
b104969f
LM
2919 if(x+block_w > w){
2920 for(y2= y; y2<y+block_w; y2++)
2921 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2922 }
2923
2924 assert(block_w== 8 || block_w==16);
2925 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
51d6a3cf
MN
2926 }
2927
b104969f
LM
2928 if(plane_index==0){
2929 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2930 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2931
2932/* ..RRRr
2933 * .RXXx.
2934 * .RXXx.
2935 * rxxx.
2936 */
2937 if(merged)
2938 rate = get_block_bits(s, mb_x, mb_y, 2);
2939 for(i=merged?4:0; i<9; i++){
2940 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2941 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2942 }
2943 }
51d6a3cf
MN
2944 return distortion + rate*penalty_factor;
2945}
2946
849f1035 2947static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
51d6a3cf
MN
2948 const int b_stride= s->b_width << s->block_max_depth;
2949 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2950 BlockNode backup= *block;
2951 int rd, index, value;
2952
2953 assert(mb_x>=0 && mb_y>=0);
735f9f34 2954 assert(mb_x<b_stride);
51d6a3cf
MN
2955
2956 if(intra){
2957 block->color[0] = p[0];
2958 block->color[1] = p[1];
2959 block->color[2] = p[2];
2960 block->type |= BLOCK_INTRA;
2961 }else{
2962 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
8c36eaaa 2963 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
51d6a3cf
MN
2964 if(s->me_cache[index] == value)
2965 return 0;
2966 s->me_cache[index]= value;
2967
2968 block->mx= p[0];
2969 block->my= p[1];
2970 block->type &= ~BLOCK_INTRA;
2971 }
2972
1015631b 2973 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
51d6a3cf
MN
2974
2975//FIXME chroma
2976 if(rd < *best_rd){
2977 *best_rd= rd;
2978 return 1;
2979 }else{
2980 *block= backup;
2981 return 0;
2982 }
2983}
2984
e6464f8b
DB
2985/* special case for int[2] args we discard afterwards,
2986 * fixes compilation problem with gcc 2.95 */
849f1035 2987static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
52137f2f 2988 int p[2] = {p0, p1};
fc8c4992 2989 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
52137f2f
FR
2990}
2991
849f1035 2992static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
b104969f
LM
2993 const int b_stride= s->b_width << s->block_max_depth;
2994 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2995 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2996 int rd, index, value;
2997
2998 assert(mb_x>=0 && mb_y>=0);
2999 assert(mb_x<b_stride);
3000 assert(((mb_x|mb_y)&1) == 0);
3001
3002 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
8c36eaaa 3003 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
b104969f
LM
3004 if(s->me_cache[index] == value)
3005 return 0;
3006 s->me_cache[index]= value;
3007
3008 block->mx= p0;
3009 block->my= p1;
8c36eaaa 3010 block->ref= ref;
b104969f
LM
3011 block->type &= ~BLOCK_INTRA;
3012 block[1]= block[b_stride]= block[b_stride+1]= *block;
3013
3014 rd= get_4block_rd(s, mb_x, mb_y, 0);
3015
3016//FIXME chroma
3017 if(rd < *best_rd){
3018 *best_rd= rd;
3019 return 1;
3020 }else{
3021 block[0]= backup[0];
3022 block[1]= backup[1];
3023 block[b_stride]= backup[2];
3024 block[b_stride+1]= backup[3];
3025 return 0;
3026 }
3027}
3028
51d6a3cf
MN
3029static void iterative_me(SnowContext *s){
3030 int pass, mb_x, mb_y;
3031 const int b_width = s->b_width << s->block_max_depth;
3032 const int b_height= s->b_height << s->block_max_depth;
3033 const int b_stride= b_width;
3034 int color[3];
3035
8f8ae495
LM
3036 {
3037 RangeCoder r = s->c;
3038 uint8_t state[sizeof(s->block_state)];
3039 memcpy(state, s->block_state, sizeof(s->block_state));
3040 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3041 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3042 encode_q_branch(s, 0, mb_x, mb_y);
3043 s->c = r;
3044 memcpy(s->block_state, state, sizeof(s->block_state));
3045 }
3046
871371a7 3047 for(pass=0; pass<25; pass++){
51d6a3cf
MN
3048 int change= 0;
3049
3050 for(mb_y= 0; mb_y<b_height; mb_y++){
3051 for(mb_x= 0; mb_x<b_width; mb_x++){
8c36eaaa
LM
3052 int dia_change, i, j, ref;
3053 int best_rd= INT_MAX, ref_rd;
3054 BlockNode backup, ref_b;
51d6a3cf
MN
3055 const int index= mb_x + mb_y * b_stride;
3056 BlockNode *block= &s->block[index];
7f21a9a7
LM
3057 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3058 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3059 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3060 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3061 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3062 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3063 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3064 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
1015631b
LM
3065 const int b_w= (MB_SIZE >> s->block_max_depth);
3066 uint8_t obmc_edged[b_w*2][b_w*2];
51d6a3cf
MN
3067
3068 if(pass && (block->type & BLOCK_OPT))
3069 continue;
3070 block->type |= BLOCK_OPT;
3071
3072 backup= *block;
3073
3074 if(!s->me_cache_generation)
3075 memset(s->me_cache, 0, sizeof(s->me_cache));
3076 s->me_cache_generation += 1<<22;
3077
e6464f8b 3078 //FIXME precalculate
1015631b
LM
3079 {
3080 int x, y;
3081 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3082 if(mb_x==0)
3083 for(y=0; y<b_w*2; y++)
3084 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3085 if(mb_x==b_stride-1)
3086 for(y=0; y<b_w*2; y++)
3087 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3088 if(mb_y==0){
3089 for(x=0; x<b_w*2; x++)
3090 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3091 for(y=1; y<b_w; y++)
3092 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3093 }
3094 if(mb_y==b_height-1){
3095 for(x=0; x<b_w*2; x++)
3096 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3097 for(y=b_w; y<b_w*2-1; y++)
3098 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3099 }
3100 }
3101
3102 //skip stuff outside the picture
ef3dfbd4 3103 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
1015631b
LM
3104 uint8_t *src= s-> input_picture.data[0];
3105 uint8_t *dst= s->current_picture.data[0];
3106 const int stride= s->current_picture.linesize[0];
3107 const int block_w= MB_SIZE >> s->block_max_depth;
3108 const int sx= block_w*mb_x - block_w/2;
3109 const int sy= block_w*mb_y - block_w/2;
3110 const int w= s->plane[0].width;
3111 const int h= s->plane[0].height;
3112 int y;
3113
3114 for(y=sy; y<0; y++)
3115 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3116 for(y=h; y<sy+block_w*2; y++)
3117 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3118 if(sx<0){
3119 for(y=sy; y<sy+block_w*2; y++)
3120 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3121 }
3122 if(sx+block_w*2 > w){
3123 for(y=sy; y<sy+block_w*2; y++)
3124 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3125 }
3126 }
3127
3128 // intra(black) = neighbors' contribution to the current block
3129 for(i=0; i<3; i++)
3130 color[i]= get_dc(s, mb_x, mb_y, i);
3131
755bfeab 3132 // get previous score (cannot be cached due to OBMC)
48d1b9a1
LM
3133 if(pass > 0 && (block->type&BLOCK_INTRA)){
3134 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3135 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3136 }else
fc8c4992 3137 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
48d1b9a1 3138
8c36eaaa
LM
3139 ref_b= *block;
3140 ref_rd= best_rd;
3141 for(ref=0; ref < s->ref_frames; ref++){
3142 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3143 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3144 continue;
3145 block->ref= ref;
3146 best_rd= INT_MAX;
3147
3148 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3149 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
7f21a9a7 3150 if(tb)
8c36eaaa 3151 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
7f21a9a7 3152 if(lb)
8c36eaaa 3153 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
7f21a9a7 3154 if(rb)
8c36eaaa 3155 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
7f21a9a7 3156 if(bb)
8c36eaaa
LM
3157 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3158
3159 /* fullpel ME */
e6464f8b 3160 //FIXME avoid subpel interpolation / round to nearest integer
8c36eaaa
LM
3161 do{
3162 dia_change=0;
3163 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3164 for(j=0; j<i; j++){
3165 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3166 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3167 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3168 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3169 }
51d6a3cf 3170 }
8c36eaaa
LM
3171 }while(dia_change);
3172 /* subpel ME */
3173 do{
3174 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3175 dia_change=0;
3176 for(i=0; i<8; i++)
3177 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3178 }while(dia_change);
3179 //FIXME or try the standard 2 pass qpel or similar
3180
3181 mvr[0][0]= block->mx;
3182 mvr[0][1]= block->my;
3183 if(ref_rd > best_rd){
3184 ref_rd= best_rd;
3185 ref_b= *block;
51d6a3cf 3186 }
8c36eaaa
LM
3187 }
3188 best_rd= ref_rd;
3189 *block= ref_b;
13705b69 3190#if 1
1015631b 3191 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
51d6a3cf 3192 //FIXME RD style color selection
13705b69 3193#endif
51d6a3cf 3194 if(!same_block(block, &backup)){
7f21a9a7
LM
3195 if(tb ) tb ->type &= ~BLOCK_OPT;
3196 if(lb ) lb ->type &= ~BLOCK_OPT;
3197 if(rb ) rb ->type &= ~BLOCK_OPT;
3198 if(bb ) bb ->type &= ~BLOCK_OPT;
3199 if(tlb) tlb->type &= ~BLOCK_OPT;
3200 if(trb) trb->type &= ~BLOCK_OPT;
3201 if(blb) blb->type &= ~BLOCK_OPT;
3202 if(brb) brb->type &= ~BLOCK_OPT;
51d6a3cf
MN
3203 change ++;
3204 }
3205 }
3206 }
3207 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3208 if(!change)
3209 break;
3210 }
b104969f
LM
3211
3212 if(s->block_max_depth == 1){
3213 int change= 0;
3214 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3215 for(mb_x= 0; mb_x<b_width; mb_x+=2){
7f21a9a7 3216 int i;
b104969f
LM
3217 int best_rd, init_rd;
3218 const int index= mb_x + mb_y * b_stride;
3219 BlockNode *b[4];
3220
3221 b[0]= &s->block[index];
3222 b[1]= b[0]+1;
3223 b[2]= b[0]+b_stride;
3224 b[3]= b[2]+1;
3225 if(same_block(b[0], b[1]) &&
3226 same_block(b[0], b[2]) &&
3227 same_block(b[0], b[3]))
3228 continue;
3229
3230 if(!s->me_cache_generation)
3231 memset(s->me_cache, 0, sizeof(s->me_cache));
3232 s->me_cache_generation += 1<<22;
3233
3234 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3235
8c36eaaa 3236 //FIXME more multiref search?
b104969f
LM
3237 check_4block_inter(s, mb_x, mb_y,
3238 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
8c36eaaa 3239 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
b104969f
LM
3240
3241 for(i=0; i<4; i++)
3242 if(!(b[i]->type&BLOCK_INTRA))
8c36eaaa 3243 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
b104969f
LM
3244
3245 if(init_rd != best_rd)
3246 change++;
3247 }
3248 }
3249 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3250 }
51d6a3cf
MN
3251}
3252
d593e329 3253static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
791e7b83
MN
3254 const int w= b->width;
3255 const int h= b->height;
f66e4f5f 3256 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
b538791b 3257 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
da66b631 3258 int x,y, thres1, thres2;
791e7b83 3259
d593e329
MN
3260 if(s->qlog == LOSSLESS_QLOG){
3261 for(y=0; y<h; y++)
3262 for(x=0; x<w; x++)
3263 dst[x + y*stride]= src[x + y*stride];
3264 return;
3265 }
115329f1 3266
791e7b83 3267 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
3268 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3269 thres2= 2*thres1;
115329f1 3270
791e7b83
MN
3271 if(!bias){
3272 for(y=0; y<h; y++){
3273 for(x=0; x<w; x++){
da66b631 3274 int i= src[x + y*stride];
115329f1 3275
da66b631
MN
3276 if((unsigned)(i+thres1) > thres2){
3277 if(i>=0){
3278 i<<= QEXPSHIFT;
3279 i/= qmul; //FIXME optimize
d593e329 3280 dst[x + y*stride]= i;
da66b631
MN
3281 }else{
3282 i= -i;
3283 i<<= QEXPSHIFT;
3284 i/= qmul; //FIXME optimize
d593e329 3285 dst[x + y*stride]= -i;
da66b631
MN
3286 }
3287 }else
d593e329 3288 dst[x + y*stride]= 0;
791e7b83
MN
3289 }
3290 }
3291 }else{