Correct x/ymin to avoid segfault due to out of picture reads.
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
b78e7197
DB
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
791e7b83
MN
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
b78e7197 9 * version 2.1 of the License, or (at your option) any later version.
791e7b83 10 *
b78e7197 11 * FFmpeg is distributed in the hope that it will be useful,
791e7b83
MN
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
b78e7197 17 * License along with FFmpeg; if not, write to the Free Software
5509bffa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
MN
19 */
20
21#include "avcodec.h"
791e7b83 22#include "dsputil.h"
059715a4 23#include "snow.h"
28869757
MN
24
25#include "rangecoder.h"
199436b9 26#include "mathops.h"
791e7b83
MN
27
28#include "mpegvideo.h"
29
30#undef NDEBUG
31#include <assert.h>
32
791e7b83
MN
33static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50};
51static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68};
538a3841
MN
69static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86};
791e7b83
MN
87static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104};
105static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122};
123static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140};
141static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158};
159static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
176};
177
791e7b83
MN
178#if 0 //64*cubic
179static const uint8_t obmc32[1024]={
fa731ccd
MN
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
791e7b83
MN
212//error:0.000022
213};
214static const uint8_t obmc16[256]={
fa731ccd
MN
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
791e7b83
MN
231//error:0.000033
232};
233#elif 1 // 64*linear
234static const uint8_t obmc32[1024]={
561a18d3
RE
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
791e7b83
MN
267 //error:0.000020
268};
269static const uint8_t obmc16[256]={
561a18d3
RE
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
791e7b83
MN
286//error:0.000015
287};
288#else //64*cos
289static const uint8_t obmc32[1024]={
fa731ccd
MN
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
791e7b83
MN
322//error:0.000022
323};
324static const uint8_t obmc16[256]={
fa731ccd
MN
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
791e7b83
MN
341//error:0.000022
342};
96e2fbf2 343#endif /* 0 */
791e7b83 344
155ec6ed
MN
345//linear *64
346static const uint8_t obmc8[64]={
561a18d3
RE
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
155ec6ed
MN
355//error:0.000000
356};
357
358//linear *64
359static const uint8_t obmc4[16]={
561a18d3
RE
360 16, 48, 48, 16,
361 48,144,144, 48,
362 48,144,144, 48,
363 16, 48, 48, 16,
155ec6ed
MN
364//error:0.000000
365};
366
cf2baeb3 367static const uint8_t * const obmc_tab[4]={
155ec6ed
MN
368 obmc32, obmc16, obmc8, obmc4
369};
370
85fc0e75
LM
371static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372
155ec6ed
MN
373typedef struct BlockNode{
374 int16_t mx;
375 int16_t my;
8c36eaaa 376 uint8_t ref;
155ec6ed
MN
377 uint8_t color[3];
378 uint8_t type;
379//#define TYPE_SPLIT 1
380#define BLOCK_INTRA 1
51d6a3cf 381#define BLOCK_OPT 2
155ec6ed
MN
382//#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
384}BlockNode;
385
51d6a3cf
MN
386static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
388 .mx= 0,
389 .my= 0,
8c36eaaa 390 .ref= 0,
51d6a3cf
MN
391 .type= 0,
392 .level= 0,
393};
394
155ec6ed
MN
395#define LOG2_MB_SIZE 4
396#define MB_SIZE (1<<LOG2_MB_SIZE)
b538791b 397#define ENCODER_EXTRA_BITS 4
61d6e445 398#define HTAPS_MAX 8
155ec6ed 399
a0d1931c
Y
400typedef struct x_and_coeff{
401 int16_t x;
538a3841 402 uint16_t coeff;
a0d1931c
Y
403} x_and_coeff;
404
791e7b83
MN
405typedef struct SubBand{
406 int level;
407 int stride;
408 int width;
409 int height;
e6464f8b 410 int qlog; ///< log(qscale)/log[2^(1/6)]
791e7b83 411 DWTELEM *buf;
d593e329 412 IDWTELEM *ibuf;
a0d1931c
Y
413 int buf_x_offset;
414 int buf_y_offset;
415 int stride_line; ///< Stride measured in lines, not pixels.
416 x_and_coeff * x_coeff;
791e7b83
MN
417 struct SubBand *parent;
418 uint8_t state[/*7*2*/ 7 + 512][32];
419}SubBand;
420
421typedef struct Plane{
422 int width;
423 int height;
424 SubBand band[MAX_DECOMPOSITIONS][4];
7d7f57d9
MN
425
426 int htaps;
61d6e445 427 int8_t hcoeff[HTAPS_MAX/2];
7d7f57d9
MN
428 int diag_mc;
429 int fast_mc;
430
431 int last_htaps;
61d6e445 432 int8_t last_hcoeff[HTAPS_MAX/2];
7d7f57d9 433 int last_diag_mc;
791e7b83
MN
434}Plane;
435
436typedef struct SnowContext{
e6464f8b 437// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
791e7b83
MN
438
439 AVCodecContext *avctx;
28869757 440 RangeCoder c;
791e7b83 441 DSPContext dsp;
51d6a3cf
MN
442 AVFrame new_picture;
443 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83 444 AVFrame current_picture;
8c36eaaa 445 AVFrame last_picture[MAX_REF_FRAMES];
5be3a818 446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
791e7b83
MN
447 AVFrame mconly_picture;
448// uint8_t q_context[16];
449 uint8_t header_state[32];
155ec6ed 450 uint8_t block_state[128 + 32*128];
791e7b83 451 int keyframe;
19aa028d 452 int always_reset;
791e7b83
MN
453 int version;
454 int spatial_decomposition_type;
396a5e68 455 int last_spatial_decomposition_type;
791e7b83
MN
456 int temporal_decomposition_type;
457 int spatial_decomposition_count;
8db13728 458 int last_spatial_decomposition_count;
791e7b83 459 int temporal_decomposition_count;
8c36eaaa
LM
460 int max_ref_frames;
461 int ref_frames;
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463 uint32_t *ref_scores[MAX_REF_FRAMES];
791e7b83 464 DWTELEM *spatial_dwt_buffer;
d593e329 465 IDWTELEM *spatial_idwt_buffer;
791e7b83
MN
466 int colorspace_type;
467 int chroma_h_shift;
468 int chroma_v_shift;
469 int spatial_scalability;
470 int qlog;
396a5e68 471 int last_qlog;
155ec6ed
MN
472 int lambda;
473 int lambda2;
4e64bead 474 int pass1_rc;
791e7b83 475 int mv_scale;
396a5e68 476 int last_mv_scale;
791e7b83 477 int qbias;
396a5e68 478 int last_qbias;
791e7b83 479#define QBIAS_SHIFT 3
155ec6ed
MN
480 int b_width;
481 int b_height;
482 int block_max_depth;
396a5e68 483 int last_block_max_depth;
791e7b83 484 Plane plane[MAX_PLANES];
155ec6ed 485 BlockNode *block;
51d6a3cf
MN
486#define ME_CACHE_SIZE 1024
487 int me_cache[ME_CACHE_SIZE];
488 int me_cache_generation;
a0d1931c 489 slice_buffer sb;
155ec6ed 490
e6464f8b 491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
bd2b6b33
MR
492
493 uint8_t *scratchbuf;
791e7b83
MN
494}SnowContext;
495
f9e6ebf7 496typedef struct {
d593e329
MN
497 IDWTELEM *b0;
498 IDWTELEM *b1;
499 IDWTELEM *b2;
500 IDWTELEM *b3;
f9e6ebf7 501 int y;
fe5c7e58 502} DWTCompose;
f9e6ebf7 503
a0d1931c
Y
504#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
506
51d6a3cf
MN
507static void iterative_me(SnowContext *s);
508
d593e329 509static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
a0d1931c
Y
510{
511 int i;
115329f1 512
a0d1931c
Y
513 buf->base_buffer = base_buffer;
514 buf->line_count = line_count;
515 buf->line_width = line_width;
516 buf->data_count = max_allocated_lines;
d593e329
MN
517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
115329f1 519
ef3dfbd4 520 for(i = 0; i < max_allocated_lines; i++){
d593e329 521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
a0d1931c 522 }
115329f1 523
a0d1931c
Y
524 buf->data_stack_top = max_allocated_lines - 1;
525}
526
d593e329 527static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
a0d1931c 528{
d593e329 529 IDWTELEM * buffer;
115329f1 530
a0d1931c
Y
531 assert(buf->data_stack_top >= 0);
532// assert(!buf->line[line]);
533 if (buf->line[line])
534 return buf->line[line];
115329f1 535
a0d1931c
Y
536 buffer = buf->data_stack[buf->data_stack_top];
537 buf->data_stack_top--;
538 buf->line[line] = buffer;
115329f1 539
a0d1931c
Y
540 return buffer;
541}
542
543static void slice_buffer_release(slice_buffer * buf, int line)
544{
d593e329 545 IDWTELEM * buffer;
a0d1931c
Y
546
547 assert(line >= 0 && line < buf->line_count);
548 assert(buf->line[line]);
549
a0d1931c
Y
550 buffer = buf->line[line];
551 buf->data_stack_top++;
552 buf->data_stack[buf->data_stack_top] = buffer;
553 buf->line[line] = NULL;
a0d1931c
Y
554}
555
556static void slice_buffer_flush(slice_buffer * buf)
557{
558 int i;
ef3dfbd4 559 for(i = 0; i < buf->line_count; i++){
a0d1931c 560 if (buf->line[i])
a0d1931c 561 slice_buffer_release(buf, i);
a0d1931c
Y
562 }
563}
564
565static void slice_buffer_destroy(slice_buffer * buf)
566{
567 int i;
568 slice_buffer_flush(buf);
115329f1 569
ef3dfbd4 570 for(i = buf->data_count - 1; i >= 0; i--){
e7c8206e 571 av_freep(&buf->data_stack[i]);
a0d1931c 572 }
e7c8206e 573 av_freep(&buf->data_stack);
e7c8206e 574 av_freep(&buf->line);
a0d1931c
Y
575}
576
bb270c08 577#ifdef __sgi
2554db9b 578// Avoid a name clash on SGI IRIX
bb270c08 579#undef qexp
2554db9b 580#endif
034aff03 581#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 582static uint8_t qexp[QROOT];
791e7b83
MN
583
584static inline int mirror(int v, int m){
13705b69
MN
585 while((unsigned)v > (unsigned)m){
586 v=-v;
587 if(v<0) v+= 2*m;
588 }
589 return v;
791e7b83
MN
590}
591
28869757 592static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
593 int i;
594
595 if(v){
c26abfa5 596 const int a= FFABS(v);
791e7b83
MN
597 const int e= av_log2(a);
598#if 1
115329f1 599 const int el= FFMIN(e, 10);
28869757 600 put_rac(c, state+0, 0);
791e7b83
MN
601
602 for(i=0; i<el; i++){
28869757 603 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
604 }
605 for(; i<e; i++){
28869757 606 put_rac(c, state+1+9, 1); //1..10
791e7b83 607 }
28869757 608 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
609
610 for(i=e-1; i>=el; i--){
28869757 611 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
612 }
613 for(; i>=0; i--){
28869757 614 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
615 }
616
617 if(is_signed)
28869757 618 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 619#else
115329f1 620
28869757 621 put_rac(c, state+0, 0);
791e7b83
MN
622 if(e<=9){
623 for(i=0; i<e; i++){
28869757 624 put_rac(c, state+1+i, 1); //1..10
791e7b83 625 }
28869757 626 put_rac(c, state+1+i, 0);
791e7b83
MN
627
628 for(i=e-1; i>=0; i--){
28869757 629 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
630 }
631
632 if(is_signed)
28869757 633 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
634 }else{
635 for(i=0; i<e; i++){
28869757 636 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 637 }
e1b3d272 638 put_rac(c, state+1+9, 0);
791e7b83
MN
639
640 for(i=e-1; i>=0; i--){
28869757 641 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
642 }
643
644 if(is_signed)
e1b3d272 645 put_rac(c, state+11 + 10, v < 0); //11..21
791e7b83 646 }
96e2fbf2 647#endif /* 1 */
791e7b83 648 }else{
28869757 649 put_rac(c, state+0, 1);
791e7b83
MN
650 }
651}
652
28869757
MN
653static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
654 if(get_rac(c, state+0))
791e7b83
MN
655 return 0;
656 else{
7c2425d2
LM
657 int i, e, a;
658 e= 0;
28869757 659 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 660 e++;
791e7b83 661 }
7c2425d2 662
791e7b83 663 a= 1;
7c2425d2 664 for(i=e-1; i>=0; i--){
28869757 665 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
666 }
667
3788e661
MN
668 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
669 return (a^e)-e;
791e7b83
MN
670 }
671}
672
28869757 673static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 674 int i;
0635cbfc 675 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
676
677 assert(v>=0);
0635cbfc
MN
678 assert(log2>=-4);
679
680 while(v >= r){
28869757 681 put_rac(c, state+4+log2, 1);
0635cbfc 682 v -= r;
4f4e9633 683 log2++;
0635cbfc 684 if(log2>0) r+=r;
4f4e9633 685 }
28869757 686 put_rac(c, state+4+log2, 0);
115329f1 687
4f4e9633 688 for(i=log2-1; i>=0; i--){
28869757 689 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 690 }
4f4e9633
MN
691}
692
28869757 693static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 694 int i;
0635cbfc 695 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
696 int v=0;
697
0635cbfc
MN
698 assert(log2>=-4);
699
28869757 700 while(get_rac(c, state+4+log2)){
0635cbfc 701 v+= r;
4f4e9633 702 log2++;
0635cbfc 703 if(log2>0) r+=r;
4f4e9633 704 }
115329f1 705
4f4e9633 706 for(i=log2-1; i>=0; i--){
28869757 707 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
708 }
709
710 return v;
711}
712
9d14ffbc
LB
713static av_always_inline void
714lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
715 int dst_step, int src_step, int ref_step,
716 int width, int mul, int add, int shift,
717 int highpass, int inverse){
791e7b83
MN
718 const int mirror_left= !highpass;
719 const int mirror_right= (width&1) ^ highpass;
720 const int w= (width>>1) - 1 + (highpass & width);
721 int i;
722
723#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
724 if(mirror_left){
725 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
726 dst += dst_step;
727 src += src_step;
728 }
115329f1 729
791e7b83 730 for(i=0; i<w; i++){
9d14ffbc
LB
731 dst[i*dst_step] =
732 LIFT(src[i*src_step],
733 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
734 inverse);
791e7b83 735 }
115329f1 736
791e7b83 737 if(mirror_right){
9d14ffbc
LB
738 dst[w*dst_step] =
739 LIFT(src[w*src_step],
740 ((mul*2*ref[w*ref_step]+add)>>shift),
741 inverse);
791e7b83
MN
742 }
743}
744
9d14ffbc
LB
745static av_always_inline void
746inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
747 int dst_step, int src_step, int ref_step,
748 int width, int mul, int add, int shift,
749 int highpass, int inverse){
d593e329
MN
750 const int mirror_left= !highpass;
751 const int mirror_right= (width&1) ^ highpass;
752 const int w= (width>>1) - 1 + (highpass & width);
753 int i;
754
755#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
756 if(mirror_left){
757 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
758 dst += dst_step;
759 src += src_step;
760 }
761
762 for(i=0; i<w; i++){
9d14ffbc
LB
763 dst[i*dst_step] =
764 LIFT(src[i*src_step],
765 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
766 inverse);
d593e329
MN
767 }
768
769 if(mirror_right){
9d14ffbc
LB
770 dst[w*dst_step] =
771 LIFT(src[w*src_step],
772 ((mul*2*ref[w*ref_step]+add)>>shift),
773 inverse);
d593e329
MN
774 }
775}
776
059715a4 777#ifndef liftS
9d14ffbc
LB
778static av_always_inline void
779liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
780 int dst_step, int src_step, int ref_step,
781 int width, int mul, int add, int shift,
782 int highpass, int inverse){
f5a71928
MN
783 const int mirror_left= !highpass;
784 const int mirror_right= (width&1) ^ highpass;
785 const int w= (width>>1) - 1 + (highpass & width);
786 int i;
787
788 assert(shift == 4);
9d14ffbc
LB
789#define LIFTS(src, ref, inv) \
790 ((inv) ? \
791 (src) + (((ref) + 4*(src))>>shift): \
792 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
f5a71928
MN
793 if(mirror_left){
794 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
795 dst += dst_step;
796 src += src_step;
797 }
115329f1 798
f5a71928 799 for(i=0; i<w; i++){
9d14ffbc
LB
800 dst[i*dst_step] =
801 LIFTS(src[i*src_step],
802 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
803 inverse);
f5a71928 804 }
115329f1 805
f5a71928 806 if(mirror_right){
9d14ffbc
LB
807 dst[w*dst_step] =
808 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
f5a71928
MN
809 }
810}
9d14ffbc
LB
811static av_always_inline void
812inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
813 int dst_step, int src_step, int ref_step,
814 int width, int mul, int add, int shift,
815 int highpass, int inverse){
d593e329
MN
816 const int mirror_left= !highpass;
817 const int mirror_right= (width&1) ^ highpass;
818 const int w= (width>>1) - 1 + (highpass & width);
819 int i;
820
821 assert(shift == 4);
9d14ffbc
LB
822#define LIFTS(src, ref, inv) \
823 ((inv) ? \
824 (src) + (((ref) + 4*(src))>>shift): \
825 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
d593e329
MN
826 if(mirror_left){
827 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
828 dst += dst_step;
829 src += src_step;
830 }
831
832 for(i=0; i<w; i++){
9d14ffbc
LB
833 dst[i*dst_step] =
834 LIFTS(src[i*src_step],
835 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
836 inverse);
d593e329
MN
837 }
838
839 if(mirror_right){
9d14ffbc
LB
840 dst[w*dst_step] =
841 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
d593e329
MN
842 }
843}
301df480 844#endif /* ! liftS */
f5a71928 845
aa25a462
RFI
846static void horizontal_decompose53i(DWTELEM *b, int width){
847 DWTELEM temp[width];
791e7b83 848 const int width2= width>>1;
62ab0b78 849 int x;
791e7b83
MN
850 const int w2= (width+1)>>1;
851
852 for(x=0; x<width2; x++){
853 temp[x ]= b[2*x ];
854 temp[x+w2]= b[2*x + 1];
855 }
856 if(width&1)
857 temp[x ]= b[2*x ];
858#if 0
62ab0b78
AJ
859 {
860 int A1,A2,A3,A4;
791e7b83
MN
861 A2= temp[1 ];
862 A4= temp[0 ];
863 A1= temp[0+width2];
864 A1 -= (A2 + A4)>>1;
865 A4 += (A1 + 1)>>1;
866 b[0+width2] = A1;
867 b[0 ] = A4;
868 for(x=1; x+1<width2; x+=2){
869 A3= temp[x+width2];
870 A4= temp[x+1 ];
871 A3 -= (A2 + A4)>>1;
872 A2 += (A1 + A3 + 2)>>2;
873 b[x+width2] = A3;
874 b[x ] = A2;
875
876 A1= temp[x+1+width2];
877 A2= temp[x+2 ];
878 A1 -= (A2 + A4)>>1;
879 A4 += (A1 + A3 + 2)>>2;
880 b[x+1+width2] = A1;
881 b[x+1 ] = A4;
882 }
883 A3= temp[width-1];
884 A3 -= A2;
885 A2 += (A1 + A3 + 2)>>2;
886 b[width -1] = A3;
887 b[width2-1] = A2;
62ab0b78 888 }
115329f1 889#else
791e7b83
MN
890 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
891 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
301df480 892#endif /* 0 */
791e7b83
MN
893}
894
aa25a462 895static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 896 int i;
115329f1 897
791e7b83
MN
898 for(i=0; i<width; i++){
899 b1[i] -= (b0[i] + b2[i])>>1;
900 }
901}
902
aa25a462 903static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 904 int i;
115329f1 905
791e7b83
MN
906 for(i=0; i<width; i++){
907 b1[i] += (b0[i] + b2[i] + 2)>>2;
908 }
909}
910
aa25a462 911static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 912 int y;
791e7b83
MN
913 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
914 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
115329f1 915
791e7b83
MN
916 for(y=-2; y<height; y+=2){
917 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
918 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
919
13705b69
MN
920 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
921 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
115329f1 922
13705b69
MN
923 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
924 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
115329f1 925
791e7b83
MN
926 b0=b2;
927 b1=b3;
928 }
929}
930
aa25a462
RFI
931static void horizontal_decompose97i(DWTELEM *b, int width){
932 DWTELEM temp[width];
791e7b83
MN
933 const int w2= (width+1)>>1;
934
ce611a27
MN
935 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
936 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
ff06e067 937 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
791e7b83
MN
938 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
939}
940
941
aa25a462 942static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 943 int i;
115329f1 944
791e7b83
MN
945 for(i=0; i<width; i++){
946 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
947 }
948}
949
aa25a462 950static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 951 int i;
115329f1 952
791e7b83 953 for(i=0; i<width; i++){
791e7b83 954 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
791e7b83
MN
955 }
956}
957
aa25a462 958static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 959 int i;
115329f1 960
791e7b83 961 for(i=0; i<width; i++){
f5a71928 962#ifdef liftS
791e7b83 963 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928 964#else
ce611a27 965 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
f5a71928 966#endif
791e7b83
MN
967 }
968}
969
aa25a462 970static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 971 int i;
115329f1 972
791e7b83
MN
973 for(i=0; i<width; i++){
974 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
975 }
976}
977
aa25a462 978static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 979 int y;
791e7b83
MN
980 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
981 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
982 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
983 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
115329f1 984
791e7b83
MN
985 for(y=-4; y<height; y+=2){
986 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
987 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
988
13705b69
MN
989 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
990 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
115329f1 991
13705b69
MN
992 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
993 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
994 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
995 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
791e7b83 996
791e7b83
MN
997 b0=b2;
998 b1=b3;
999 b2=b4;
1000 b3=b5;
1001 }
1002}
1003
aa25a462 1004void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83 1005 int level;
115329f1 1006
46c281e8
MN
1007 for(level=0; level<decomposition_count; level++){
1008 switch(type){
d4b287ed
LM
1009 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1010 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
791e7b83
MN
1011 }
1012 }
1013}
1014
d593e329
MN
1015static void horizontal_compose53i(IDWTELEM *b, int width){
1016 IDWTELEM temp[width];
791e7b83
MN
1017 const int width2= width>>1;
1018 const int w2= (width+1)>>1;
62ab0b78 1019 int x;
791e7b83
MN
1020
1021#if 0
62ab0b78 1022 int A1,A2,A3,A4;
791e7b83
MN
1023 A2= temp[1 ];
1024 A4= temp[0 ];
1025 A1= temp[0+width2];
1026 A1 -= (A2 + A4)>>1;
1027 A4 += (A1 + 1)>>1;
1028 b[0+width2] = A1;
1029 b[0 ] = A4;
1030 for(x=1; x+1<width2; x+=2){
1031 A3= temp[x+width2];
1032 A4= temp[x+1 ];
1033 A3 -= (A2 + A4)>>1;
1034 A2 += (A1 + A3 + 2)>>2;
1035 b[x+width2] = A3;
1036 b[x ] = A2;
1037
1038 A1= temp[x+1+width2];
1039 A2= temp[x+2 ];
1040 A1 -= (A2 + A4)>>1;
1041 A4 += (A1 + A3 + 2)>>2;
1042 b[x+1+width2] = A1;
1043 b[x+1 ] = A4;
1044 }
1045 A3= temp[width-1];
1046 A3 -= A2;
1047 A2 += (A1 + A3 + 2)>>2;
1048 b[width -1] = A3;
1049 b[width2-1] = A2;
115329f1 1050#else
d593e329
MN
1051 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1052 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
96e2fbf2 1053#endif /* 0 */
791e7b83
MN
1054 for(x=0; x<width2; x++){
1055 b[2*x ]= temp[x ];
1056 b[2*x + 1]= temp[x+w2];
1057 }
1058 if(width&1)
1059 b[2*x ]= temp[x ];
1060}
1061
d593e329 1062static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1063 int i;
115329f1 1064
791e7b83
MN
1065 for(i=0; i<width; i++){
1066 b1[i] += (b0[i] + b2[i])>>1;
1067 }
1068}
1069
d593e329 1070static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1071 int i;
115329f1 1072
791e7b83
MN
1073 for(i=0; i<width; i++){
1074 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1075 }
1076}
1077
fe5c7e58 1078static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
a0d1931c
Y
1079 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1080 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1081 cs->y = -1;
1082}
1083
fe5c7e58 1084static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
f9e6ebf7
LM
1085 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1086 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1087 cs->y = -1;
1088}
1089
fe5c7e58 1090static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1091 int y= cs->y;
115329f1 1092
d593e329
MN
1093 IDWTELEM *b0= cs->b0;
1094 IDWTELEM *b1= cs->b1;
1095 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1096 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
a0d1931c 1097
13705b69
MN
1098 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1099 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
a0d1931c 1100
13705b69
MN
1101 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1102 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
a0d1931c
Y
1103
1104 cs->b0 = b2;
1105 cs->b1 = b3;
1106 cs->y += 2;
1107}
1108
fe5c7e58 1109static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7 1110 int y= cs->y;
d593e329
MN
1111 IDWTELEM *b0= cs->b0;
1112 IDWTELEM *b1= cs->b1;
1113 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1114 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83 1115
13705b69
MN
1116 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1117 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
791e7b83 1118
13705b69
MN
1119 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1120 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
791e7b83 1121
f9e6ebf7
LM
1122 cs->b0 = b2;
1123 cs->b1 = b3;
1124 cs->y += 2;
1125}
1126
1918057c 1127static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
fe5c7e58 1128 DWTCompose cs;
9a3bb2b8
MN
1129 spatial_compose53i_init(&cs, buffer, height, stride);
1130 while(cs.y <= height)
1131 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1132}
1133
791e7b83 1134
d593e329
MN
1135void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1136 IDWTELEM temp[width];
791e7b83
MN
1137 const int w2= (width+1)>>1;
1138
d593e329 1139 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
ff06e067 1140 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
d593e329
MN
1141 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1142 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
791e7b83
MN
1143}
1144
d593e329 1145static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1146 int i;
115329f1 1147
791e7b83
MN
1148 for(i=0; i<width; i++){
1149 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1150 }
1151}
1152
d593e329 1153static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1154 int i;
115329f1 1155
791e7b83 1156 for(i=0; i<width; i++){
791e7b83 1157 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
791e7b83
MN
1158 }
1159}
1160
d593e329 1161static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1162 int i;
115329f1 1163
791e7b83 1164 for(i=0; i<width; i++){
f5a71928 1165#ifdef liftS
791e7b83 1166 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1167#else
1168 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1169#endif
791e7b83
MN
1170 }
1171}
1172
d593e329 1173static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1174 int i;
115329f1 1175
791e7b83
MN
1176 for(i=0; i<width; i++){
1177 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1178 }
1179}
1180
d593e329 1181void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
565a45ac 1182 int i;
115329f1 1183
565a45ac 1184 for(i=0; i<width; i++){
565a45ac 1185 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
565a45ac 1186 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
f5a71928 1187#ifdef liftS
565a45ac 1188 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1189#else
1190 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1191#endif
565a45ac
MN
1192 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1193 }
1194}
1195
fe5c7e58 1196static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
a0d1931c
Y
1197 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1198 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1199 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1200 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1201 cs->y = -3;
1202}
1203
fe5c7e58 1204static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
f9e6ebf7
LM
1205 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1206 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1207 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1208 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1209 cs->y = -3;
1210}
791e7b83 1211
fe5c7e58 1212static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1213 int y = cs->y;
115329f1 1214
d593e329
MN
1215 IDWTELEM *b0= cs->b0;
1216 IDWTELEM *b1= cs->b1;
1217 IDWTELEM *b2= cs->b2;
1218 IDWTELEM *b3= cs->b3;
1219 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1220 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
115329f1 1221
565a45ac 1222 if(y>0 && y+4<height){
059715a4 1223 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
565a45ac 1224 }else{
13705b69
MN
1225 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1226 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1227 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1228 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
565a45ac 1229 }
a0d1931c 1230
a4873f7d
LT
1231 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1232 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
a0d1931c
Y
1233
1234 cs->b0=b2;
1235 cs->b1=b3;
1236 cs->b2=b4;
1237 cs->b3=b5;
1238 cs->y += 2;
1239}
1240
fe5c7e58 1241static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7 1242 int y = cs->y;
d593e329
MN
1243 IDWTELEM *b0= cs->b0;
1244 IDWTELEM *b1= cs->b1;
1245 IDWTELEM *b2= cs->b2;
1246 IDWTELEM *b3= cs->b3;
1247 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1248 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83 1249
a4873f7d
LT
1250 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1251 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1252 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1253 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
791e7b83 1254
a4873f7d
LT
1255 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1256 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
f9e6ebf7
LM
1257
1258 cs->b0=b2;
1259 cs->b1=b3;
1260 cs->b2=b4;
1261 cs->b3=b5;
1262 cs->y += 2;
1263}
1264
1918057c 1265static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
fe5c7e58 1266 DWTCompose cs;
9a3bb2b8
MN
1267 spatial_compose97i_init(&cs, buffer, height, stride);
1268 while(cs.y <= height)
1269 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1270}
1271
fe5c7e58 1272static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
a0d1931c
Y
1273 int level;
1274 for(level=decomposition_count-1; level>=0; level--){
1275 switch(type){
d4b287ed
LM
1276 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1277 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
a0d1931c
Y
1278 }
1279 }
1280}
1281
fe5c7e58 1282static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1283 int level;
1284 for(level=decomposition_count-1; level>=0; level--){
1285 switch(type){
d4b287ed
LM
1286 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1287 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
f9e6ebf7 1288 }
791e7b83
MN
1289 }
1290}
1291
fe5c7e58 1292static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
f9e6ebf7 1293 const int support = type==1 ? 3 : 5;
791e7b83 1294 int level;
f9e6ebf7 1295 if(type==2) return;
791e7b83 1296
46c281e8 1297 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1298 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1299 switch(type){
d4b287ed 1300 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
94ae6788 1301 break;
d4b287ed 1302 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
94ae6788 1303 break;
f9e6ebf7 1304 }
791e7b83
MN
1305 }
1306 }
1307}
1308
fe5c7e58 1309static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
a0d1931c
Y
1310 const int support = type==1 ? 3 : 5;
1311 int level;
1312 if(type==2) return;
1313
1314 for(level=decomposition_count-1; level>=0; level--){
1315 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1316 switch(type){
d4b287ed 1317 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
94ae6788 1318 break;
d4b287ed 1319 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
94ae6788 1320 break;
a0d1931c
Y
1321 }
1322 }
1323 }
1324}
1325
d593e329 1326static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
fe5c7e58 1327 DWTCompose cs[MAX_DECOMPOSITIONS];
f9e6ebf7
LM
1328 int y;
1329 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1330 for(y=0; y<height; y+=4)
1331 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
f9e6ebf7
LM
1332}
1333
d593e329 1334static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1335 const int w= b->width;
1336 const int h= b->height;
1337 int x, y;
1338
791e7b83 1339 if(1){
791e7b83 1340 int run=0;
a8d73e56 1341 int runs[w*h];
791e7b83 1342 int run_index=0;
b44985ba 1343 int max_index;
115329f1 1344
791e7b83
MN
1345 for(y=0; y<h; y++){
1346 for(x=0; x<w; x++){
78486403 1347 int v, p=0;
6b2f6646 1348 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1349 v= src[x + y*stride];
791e7b83
MN
1350
1351 if(y){
a8d73e56 1352 t= src[x + (y-1)*stride];
791e7b83 1353 if(x){
a8d73e56 1354 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1355 }
1356 if(x + 1 < w){
a8d73e56 1357 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1358 }
1359 }
1360 if(x){
a8d73e56 1361 l= src[x - 1 + y*stride];
6b2f6646
MN
1362 /*if(x > 1){
1363 if(orientation==1) ll= src[y + (x-2)*stride];
1364 else ll= src[x - 2 + y*stride];
791e7b83
MN
1365 }*/
1366 }
78486403 1367 if(parent){
a8d73e56
MN
1368 int px= x>>1;
1369 int py= y>>1;
115329f1 1370 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1371 p= parent[px + py*2*stride];
1372 }
1373 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1374 if(v){
1375 runs[run_index++]= run;
1376 run=0;
1377 }else{
1378 run++;
1379 }
1380 }
1381 }
1382 }
b44985ba 1383 max_index= run_index;
791e7b83
MN
1384 runs[run_index++]= run;
1385 run_index=0;
1386 run= runs[run_index++];
1387
b44985ba
MN
1388 put_symbol2(&s->c, b->state[30], max_index, 0);
1389 if(run_index <= max_index)
1390 put_symbol2(&s->c, b->state[1], run, 3);
115329f1 1391
791e7b83 1392 for(y=0; y<h; y++){
d06c75a8 1393 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1394 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1395 return -1;
1396 }
791e7b83 1397 for(x=0; x<w; x++){
78486403 1398 int v, p=0;
6b2f6646 1399 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1400 v= src[x + y*stride];
791e7b83
MN
1401
1402 if(y){
a8d73e56 1403 t= src[x + (y-1)*stride];
791e7b83 1404 if(x){
a8d73e56 1405 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1406 }
1407 if(x + 1 < w){
a8d73e56 1408 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1409 }
1410 }
1411 if(x){
a8d73e56 1412 l= src[x - 1 + y*stride];
6b2f6646
MN
1413 /*if(x > 1){
1414 if(orientation==1) ll= src[y + (x-2)*stride];
1415 else ll= src[x - 2 + y*stride];
791e7b83
MN
1416 }*/
1417 }
78486403 1418 if(parent){
a8d73e56
MN
1419 int px= x>>1;
1420 int py= y>>1;
115329f1 1421 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1422 p= parent[px + py*2*stride];
1423 }
1424 if(/*ll|*/l|lt|t|rt|p){
c26abfa5 1425 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
6b2f6646 1426
28869757 1427 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1428 }else{
1429 if(!run){
1430 run= runs[run_index++];
4f4e9633 1431
b44985ba
MN
1432 if(run_index <= max_index)
1433 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1434 assert(v);
1435 }else{
1436 run--;
1437 assert(!v);
1438 }
1439 }
1440 if(v){
c26abfa5
DB
1441 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1442 int l2= 2*FFABS(l) + (l<0);
1443 int t2= 2*FFABS(t) + (t<0);
6b2f6646 1444
c26abfa5 1445 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
538a3841 1446 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1447 }
1448 }
1449 }
791e7b83 1450 }
0ecca7a4 1451 return 0;
791e7b83
MN
1452}
1453
d593e329 1454static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1455// encode_subband_qtree(s, b, src, parent, stride, orientation);
1456// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1457 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1458// encode_subband_dzr(s, b, src, parent, stride, orientation);
1459}
1460
a0d1931c 1461static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1462 const int w= b->width;
1463 const int h= b->height;
1464 int x,y;
115329f1 1465
791e7b83 1466 if(1){
b44985ba 1467 int run, runs;
cbb1d2b1
MN
1468 x_and_coeff *xc= b->x_coeff;
1469 x_and_coeff *prev_xc= NULL;
1470 x_and_coeff *prev2_xc= xc;
1471 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1472 x_and_coeff *prev_parent_xc= parent_xc;
791e7b83 1473
b44985ba
MN
1474 runs= get_symbol2(&s->c, b->state[30], 0);
1475 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1476 else run= INT_MAX;
1477
791e7b83 1478 for(y=0; y<h; y++){
0cea8a03
MN
1479 int v=0;
1480 int lt=0, t=0, rt=0;
1481
cbb1d2b1
MN
1482 if(y && prev_xc->x == 0){
1483 rt= prev_xc->coeff;
0cea8a03 1484 }
791e7b83 1485 for(x=0; x<w; x++){
0cea8a03
MN
1486 int p=0;
1487 const int l= v;
115329f1 1488
0cea8a03 1489 lt= t; t= rt;
791e7b83 1490
ff765159 1491 if(y){
cbb1d2b1
MN
1492 if(prev_xc->x <= x)
1493 prev_xc++;
1494 if(prev_xc->x == x + 1)
1495 rt= prev_xc->coeff;
ff765159
MN
1496 else
1497 rt=0;
1498 }
cbb1d2b1
MN
1499 if(parent_xc){
1500 if(x>>1 > parent_xc->x){
1501 parent_xc++;
7b49c309 1502 }
cbb1d2b1
MN
1503 if(x>>1 == parent_xc->x){
1504 p= parent_xc->coeff;
ff765159 1505 }
78486403
MN
1506 }
1507 if(/*ll|*/l|lt|t|rt|p){
c26abfa5 1508 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1509
28869757 1510 v=get_rac(&s->c, &b->state[0][context]);
3c096ac7
MN
1511 if(v){
1512 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1513 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 1514
cbb1d2b1
MN
1515 xc->x=x;
1516 (xc++)->coeff= v;
3c096ac7 1517 }
791e7b83
MN
1518 }else{
1519 if(!run){
b44985ba
MN
1520 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1521 else run= INT_MAX;
3c096ac7
MN
1522 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1523 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
115329f1 1524
cbb1d2b1
MN
1525 xc->x=x;
1526 (xc++)->coeff= v;
791e7b83 1527 }else{
99cd59e5 1528 int max_run;
791e7b83
MN
1529 run--;
1530 v=0;
3c1adccd 1531
cbb1d2b1 1532 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
99cd59e5 1533 else max_run= FFMIN(run, w-x-1);
cbb1d2b1
MN
1534 if(parent_xc)
1535 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
99cd59e5
MN
1536 x+= max_run;
1537 run-= max_run;
791e7b83
MN
1538 }
1539 }
7b49c309 1540 }
cbb1d2b1
MN
1541 (xc++)->x= w+1; //end marker
1542 prev_xc= prev2_xc;
1543 prev2_xc= xc;
115329f1 1544
cbb1d2b1 1545 if(parent_xc){
7b49c309 1546 if(y&1){
cbb1d2b1
MN
1547 while(parent_xc->x != parent->width+1)
1548 parent_xc++;
1549 parent_xc++;
1550 prev_parent_xc= parent_xc;
7b49c309 1551 }else{
cbb1d2b1 1552 parent_xc= prev_parent_xc;
791e7b83
MN
1553 }
1554 }
1555 }
a0d1931c 1556
cbb1d2b1 1557 (xc++)->x= w+1; //end marker
a0d1931c
Y
1558 }
1559}
1560
1561static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1562 const int w= b->width;
62ab0b78 1563 int y;
f66e4f5f 1564 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
c97de57c 1565 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1566 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1567 int new_index = 0;
115329f1 1568
d593e329 1569 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
a0d1931c
Y
1570 qadd= 0;
1571 qmul= 1<<QEXPSHIFT;
1572 }
1573
1574 /* If we are on the second or later slice, restore our index. */
1575 if (start_y != 0)
1576 new_index = save_state[0];
1577
115329f1 1578
a0d1931c
Y
1579 for(y=start_y; y<h; y++){
1580 int x = 0;
1581 int v;
d593e329
MN
1582 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1583 memset(line, 0, b->width*sizeof(IDWTELEM));
a0d1931c
Y
1584 v = b->x_coeff[new_index].coeff;
1585 x = b->x_coeff[new_index++].x;
ef3dfbd4 1586 while(x < w){
538a3841
MN
1587 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1588 register int u= -(v&1);
1589 line[x] = (t^u) - u;
1590
a0d1931c
Y
1591 v = b->x_coeff[new_index].coeff;
1592 x = b->x_coeff[new_index++].x;
1593 }
791e7b83 1594 }
115329f1 1595
a0d1931c
Y
1596 /* Save our variables for the next slice. */
1597 save_state[0] = new_index;
115329f1 1598
a0d1931c 1599 return;
791e7b83
MN
1600}
1601
396a5e68 1602static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
791e7b83
MN
1603 int plane_index, level, orientation;
1604
19aa028d 1605 for(plane_index=0; plane_index<3; plane_index++){
4f90f33a 1606 for(level=0; level<MAX_DECOMPOSITIONS; level++){
791e7b83 1607 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1608 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1609 }
1610 }
1611 }
28869757
MN
1612 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1613 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1614}
1615
1616static int alloc_blocks(SnowContext *s){
1617 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1618 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 1619
155ec6ed
MN
1620 s->b_width = w;
1621 s->b_height= h;
115329f1 1622
dc7f45a0 1623 av_free(s->block);
155ec6ed
MN
1624 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1625 return 0;
1626}
1627
28869757
MN
1628static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1629 uint8_t *bytestream= d->bytestream;
1630 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1631 *d= *s;
28869757
MN
1632 d->bytestream= bytestream;
1633 d->bytestream_start= bytestream_start;
155ec6ed
MN
1634}
1635
1636//near copy & paste from dsputil, FIXME
1637static int pix_sum(uint8_t * pix, int line_size, int w)
1638{
1639 int s, i, j;
1640
1641 s = 0;
1642 for (i = 0; i < w; i++) {
1643 for (j = 0; j < w; j++) {
1644 s += pix[0];
1645 pix ++;
1646 }
1647 pix += line_size - w;
1648 }
1649 return s;
1650}
1651
1652//near copy & paste from dsputil, FIXME
1653static int pix_norm1(uint8_t * pix, int line_size, int w)
1654{
1655 int s, i, j;
1d503957 1656 uint32_t *sq = ff_squareTbl + 256;
155ec6ed
MN
1657
1658 s = 0;
1659 for (i = 0; i < w; i++) {
1660 for (j = 0; j < w; j ++) {
1661 s += sq[pix[0]];
1662 pix ++;
1663 }
1664 pix += line_size - w;
1665 }
1666 return s;
1667}
1668
8c36eaaa 1669static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
155ec6ed
MN
1670 const int w= s->b_width << s->block_max_depth;
1671 const int rem_depth= s->block_max_depth - level;
1672 const int index= (x + y*w) << rem_depth;
1673 const int block_w= 1<<rem_depth;
1674 BlockNode block;
1675 int i,j;
115329f1 1676
155ec6ed
MN
1677 block.color[0]= l;
1678 block.color[1]= cb;
1679 block.color[2]= cr;
1680 block.mx= mx;
1681 block.my= my;
8c36eaaa 1682 block.ref= ref;
155ec6ed
MN
1683 block.type= type;
1684 block.level= level;
1685
1686 for(j=0; j<block_w; j++){
1687 for(i=0; i<block_w; i++){
1688 s->block[index + i + j*w]= block;
1689 }
1690 }
1691}
1692
1693static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1694 const int offset[3]= {
1695 y*c-> stride + x,
1696 ((y*c->uvstride + x)>>1),
1697 ((y*c->uvstride + x)>>1),
1698 };
1699 int i;
1700 for(i=0; i<3; i++){
1701 c->src[0][i]= src [i];
1702 c->ref[0][i]= ref [i] + offset[i];
1703 }
1704 assert(!ref_index);
1705}
1706
85fc0e75 1707static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
aadcc5ce 1708 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
85fc0e75
LM
1709 if(s->ref_frames == 1){
1710 *mx = mid_pred(left->mx, top->mx, tr->mx);
1711 *my = mid_pred(left->my, top->my, tr->my);
1712 }else{
1713 const int *scale = scale_mv_ref[ref];
6884c36c
PI
1714 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1715 (top ->mx * scale[top ->ref] + 128) >>8,
1716 (tr ->mx * scale[tr ->ref] + 128) >>8);
1717 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1718 (top ->my * scale[top ->ref] + 128) >>8,
1719 (tr ->my * scale[tr ->ref] + 128) >>8);
85fc0e75
LM
1720 }
1721}
1722
155ec6ed
MN
1723//FIXME copy&paste
1724#define P_LEFT P[1]
1725#define P_TOP P[2]
1726#define P_TOPRIGHT P[3]
1727#define P_MEDIAN P[4]
1728#define P_MV1 P[9]
1729#define FLAG_QPEL 1 //must be 1
1730
1731static int encode_q_branch(SnowContext *s, int level, int x, int y){
1732 uint8_t p_buffer[1024];
1733 uint8_t i_buffer[1024];
1734 uint8_t p_state[sizeof(s->block_state)];
1735 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
1736 RangeCoder pc, ic;
1737 uint8_t *pbbak= s->c.bytestream;
1738 uint8_t *pbbak_start= s->c.bytestream_start;
1e6b5700 1739 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
155ec6ed
MN
1740 const int w= s->b_width << s->block_max_depth;
1741 const int h= s->b_height << s->block_max_depth;
1742 const int rem_depth= s->block_max_depth - level;
1743 const int index= (x + y*w) << rem_depth;
1744 const int block_w= 1<<(LOG2_MB_SIZE - level);
155ec6ed
MN
1745 int trx= (x+1)<<rem_depth;
1746 int try= (y+1)<<rem_depth;
aadcc5ce
PI
1747 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1748 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1749 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1750 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1751 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1752 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed
MN
1753 int pl = left->color[0];
1754 int pcb= left->color[1];
1755 int pcr= left->color[2];
85fc0e75 1756 int pmx, pmy;
155ec6ed 1757 int mx=0, my=0;
51d6a3cf 1758 int l,cr,cb;
155ec6ed
MN
1759 const int stride= s->current_picture.linesize[0];
1760 const int uvstride= s->current_picture.linesize[1];
51d6a3cf
MN
1761 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1762 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1763 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
155ec6ed
MN
1764 int P[10][2];
1765 int16_t last_mv[3][2];
1766 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1767 const int shift= 1+qpel;
1768 MotionEstContext *c= &s->m.me;
8c36eaaa 1769 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
1770 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1771 int my_context= av_log2(2*FFABS(left->my - top->my));
155ec6ed 1772 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
8c36eaaa 1773 int ref, best_ref, ref_score, ref_mx, ref_my;
155ec6ed
MN
1774
1775 assert(sizeof(s->block_state) >= 256);
1776 if(s->keyframe){
85fc0e75 1777 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
155ec6ed
MN
1778 return 0;
1779 }
1780
155ec6ed
MN
1781// clip predictors / edge ?
1782
1783 P_LEFT[0]= left->mx;
1784 P_LEFT[1]= left->my;
1785 P_TOP [0]= top->mx;
1786 P_TOP [1]= top->my;
1787 P_TOPRIGHT[0]= tr->mx;
1788 P_TOPRIGHT[1]= tr->my;
115329f1 1789
155ec6ed
MN
1790 last_mv[0][0]= s->block[index].mx;
1791 last_mv[0][1]= s->block[index].my;
1792 last_mv[1][0]= right->mx;
1793 last_mv[1][1]= right->my;
1794 last_mv[2][0]= bottom->mx;
1795 last_mv[2][1]= bottom->my;
115329f1 1796
155ec6ed 1797 s->m.mb_stride=2;
115329f1 1798 s->m.mb_x=
155ec6ed 1799 s->m.mb_y= 0;
e2158da8 1800 c->skip= 0;
155ec6ed 1801
e2158da8
PI
1802 assert(c-> stride == stride);
1803 assert(c->uvstride == uvstride);
115329f1 1804
155ec6ed
MN
1805 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1806 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1807 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1808 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
115329f1 1809
9a3eaeeb
MN
1810 c->xmin = - x*block_w - 16+3;
1811 c->ymin = - y*block_w - 16+3;
1812 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
1813 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
155ec6ed
MN
1814
1815 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
115329f1 1816 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
155ec6ed
MN
1817 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1818 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1819 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1820 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1821 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1822
1823 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1824 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1825
1826 if (!y) {
1827 c->pred_x= P_LEFT[0];
1828 c->pred_y= P_LEFT[1];
1829 } else {
1830 c->pred_x = P_MEDIAN[0];
1831 c->pred_y = P_MEDIAN[1];
1832 }
1833
8c36eaaa
LM
1834 score= INT_MAX;
1835 best_ref= 0;
1836 for(ref=0; ref<s->ref_frames; ref++){
1837 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1838
1839 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1840 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
155ec6ed 1841
8c36eaaa
LM
1842 assert(ref_mx >= c->xmin);
1843 assert(ref_mx <= c->xmax);
1844 assert(ref_my >= c->ymin);
1845 assert(ref_my <= c->ymax);
115329f1 1846
e2158da8 1847 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
8c36eaaa
LM
1848 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1849 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1850 if(s->ref_mvs[ref]){
1851 s->ref_mvs[ref][index][0]= ref_mx;
1852 s->ref_mvs[ref][index][1]= ref_my;
1853 s->ref_scores[ref][index]= ref_score;
1854 }
1855 if(score > ref_score){
1856 score= ref_score;
1857 best_ref= ref;
1858 mx= ref_mx;
1859 my= ref_my;
1860 }
1861 }
755bfeab 1862 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
115329f1 1863
155ec6ed 1864 // subpel search
61d49d12 1865 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
155ec6ed 1866 pc= s->c;
28869757
MN
1867 pc.bytestream_start=
1868 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1869 memcpy(p_state, s->block_state, sizeof(s->block_state));
1870
1871 if(level!=s->block_max_depth)
28869757
MN
1872 put_rac(&pc, &p_state[4 + s_context], 1);
1873 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
8c36eaaa
LM
1874 if(s->ref_frames > 1)
1875 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
85fc0e75 1876 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
8c36eaaa
LM
1877 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1878 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
28869757 1879 p_len= pc.bytestream - pc.bytestream_start;
1e6b5700 1880 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1881
1882 block_s= block_w*block_w;
51d6a3cf 1883 sum = pix_sum(current_data[0], stride, block_w);
155ec6ed 1884 l= (sum + block_s/2)/block_s;
51d6a3cf 1885 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
115329f1 1886
155ec6ed 1887 block_s= block_w*block_w>>2;
51d6a3cf 1888 sum = pix_sum(current_data[1], uvstride, block_w>>1);
155ec6ed
MN
1889 cb= (sum + block_s/2)/block_s;
1890// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
51d6a3cf 1891 sum = pix_sum(current_data[2], uvstride, block_w>>1);
155ec6ed
MN
1892 cr= (sum + block_s/2)/block_s;
1893// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1894
1895 ic= s->c;
28869757
MN
1896 ic.bytestream_start=
1897 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1898 memcpy(i_state, s->block_state, sizeof(s->block_state));
1899 if(level!=s->block_max_depth)
28869757
MN
1900 put_rac(&ic, &i_state[4 + s_context], 1);
1901 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
1902 put_symbol(&ic, &i_state[32], l-pl , 1);
1903 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1904 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757 1905 i_len= ic.bytestream - ic.bytestream_start;
1e6b5700 1906 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1907
1908// assert(score==256*256*256*64-1);
1909 assert(iscore < 255*255*256 + s->lambda2*10);
1910 assert(iscore >= 0);
1911 assert(l>=0 && l<=255);
1912 assert(pl>=0 && pl<=255);
1913
1914 if(level==0){
1915 int varc= iscore >> 8;
1916 int vard= score >> 8;
1917 if (vard <= 64 || vard < varc)
1918 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1919 else
1920 c->scene_change_score+= s->m.qscale;
1921 }
115329f1 1922
155ec6ed 1923 if(level!=s->block_max_depth){
28869757 1924 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
1925 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1926 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1927 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1928 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1929 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
115329f1 1930
155ec6ed
MN
1931 if(score2 < score && score2 < iscore)
1932 return score2;
1933 }
115329f1 1934
155ec6ed 1935 if(iscore < score){
85fc0e75 1936 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
28869757 1937 memcpy(pbbak, i_buffer, i_len);
155ec6ed 1938 s->c= ic;
28869757
MN
1939 s->c.bytestream_start= pbbak_start;
1940 s->c.bytestream= pbbak + i_len;
8c36eaaa 1941 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
155ec6ed
MN
1942 memcpy(s->block_state, i_state, sizeof(s->block_state));
1943 return iscore;
1944 }else{
28869757 1945 memcpy(pbbak, p_buffer, p_len);
155ec6ed 1946 s->c= pc;
28869757
MN
1947 s->c.bytestream_start= pbbak_start;
1948 s->c.bytestream= pbbak + p_len;
8c36eaaa 1949 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
155ec6ed
MN
1950 memcpy(s->block_state, p_state, sizeof(s->block_state));
1951 return score;
1952 }
1953}
1954
849f1035 1955static av_always_inline int same_block(BlockNode *a, BlockNode *b){
51d6a3cf
MN
1956 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1957 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1958 }else{
8c36eaaa 1959 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
51d6a3cf
MN
1960 }
1961}
1962
1963static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1964 const int w= s->b_width << s->block_max_depth;
1965 const int rem_depth= s->block_max_depth - level;
1966 const int index= (x + y*w) << rem_depth;
1967 int trx= (x+1)<<rem_depth;
1968 BlockNode *b= &s->block[index];
aadcc5ce
PI
1969 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1970 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1971 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1972 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
51d6a3cf
MN
1973 int pl = left->color[0];
1974 int pcb= left->color[1];
1975 int pcr= left->color[2];
85fc0e75 1976 int pmx, pmy;
8c36eaaa 1977 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
1978 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1979 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
51d6a3cf
MN
1980 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1981
1982 if(s->keyframe){
85fc0e75 1983 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
51d6a3cf
MN
1984 return;
1985 }
1986
1987 if(level!=s->block_max_depth){
1988 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
d6f41eed
MN
1989 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1990 }else{
51d6a3cf
MN
1991 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1992 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1993 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1994 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1995 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
1996 return;
51d6a3cf
MN
1997 }
1998 }
1999 if(b->type & BLOCK_INTRA){
85fc0e75 2000 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
51d6a3cf
MN
2001 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2002 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2003 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2004 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
8c36eaaa 2005 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
51d6a3cf 2006 }else{
85fc0e75 2007 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
51d6a3cf 2008 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
8c36eaaa
LM
2009 if(s->ref_frames > 1)
2010 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
51d6a3cf
MN
2011 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2012 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
8c36eaaa 2013 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
51d6a3cf
MN
2014 }
2015}
2016
155ec6ed
MN
2017static void decode_q_branch(SnowContext *s, int level, int x, int y){
2018 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2019 const int rem_depth= s->block_max_depth - level;
2020 const int index= (x + y*w) << rem_depth;
155ec6ed 2021 int trx= (x+1)<<rem_depth;
aadcc5ce
PI
2022 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2023 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2024 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2025 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed 2026 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 2027
155ec6ed 2028 if(s->keyframe){
8c36eaaa 2029 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
155ec6ed
MN
2030 return;
2031 }
2032
28869757 2033 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1e90b34f 2034 int type, mx, my;
155ec6ed
MN
2035 int l = left->color[0];
2036 int cb= left->color[1];
2037 int cr= left->color[2];
8c36eaaa
LM
2038 int ref = 0;
2039 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
2040 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2041 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
115329f1 2042
28869757 2043 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2044
2045 if(type){
85fc0e75 2046 pred_mv(s, &mx, &my, 0, left, top, tr);
155ec6ed
MN
2047 l += get_symbol(&s->c, &s->block_state[32], 1);
2048 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2049 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2050 }else{
8c36eaaa
LM
2051 if(s->ref_frames > 1)
2052 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
85fc0e75 2053 pred_mv(s, &mx, &my, ref, left, top, tr);
8c36eaaa
LM
2054 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2055 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
155ec6ed 2056 }
8c36eaaa 2057 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
155ec6ed
MN
2058 }else{
2059 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2060 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2061 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2062 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2063 }
2064}
2065
74e6a8aa 2066static void encode_blocks(SnowContext *s, int search){
155ec6ed
MN
2067 int x, y;
2068 int w= s->b_width;
2069 int h= s->b_height;
2070
74e6a8aa 2071 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
51d6a3cf
MN
2072 iterative_me(s);
2073
155ec6ed 2074 for(y=0; y<h; y++){
d06c75a8 2075 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2076 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2077 return;
2078 }
155ec6ed 2079 for(x=0; x<w; x++){
74e6a8aa 2080 if(s->avctx->me_method == ME_ITER || !search)
51d6a3cf
MN
2081 encode_q_branch2(s, 0, x, y);
2082 else
2083 encode_q_branch (s, 0, x, y);
155ec6ed
MN
2084 }
2085 }
2086}
2087
2088static void decode_blocks(SnowContext *s){
2089 int x, y;
2090 int w= s->b_width;
2091 int h= s->b_height;
2092
2093 for(y=0; y<h; y++){
2094 for(x=0; x<w; x++){
2095 decode_q_branch(s, 0, x, y);
2096 }
2097 }
791e7b83
MN
2098}
2099
7d7f57d9 2100static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
7942269b 2101 static const uint8_t weight[64]={
a68ca08e
MN
2102 8,7,6,5,4,3,2,1,
2103 7,7,0,0,0,0,0,1,
2104 6,0,6,0,0,0,2,0,
2105 5,0,0,5,0,3,0,0,
2106 4,0,0,0,4,0,0,0,
2107 3,0,0,5,0,3,0,0,
2108 2,0,6,0,0,0,2,0,
2109 1,7,0,0,0,0,0,1,
2110 };
2111
7942269b 2112 static const uint8_t brane[256]={
a68ca08e
MN
2113 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2114 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2115 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2116 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2117 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2118 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2119 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2120 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2121 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2122 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2123 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2124 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2125 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2126 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2127 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2128 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2129 };
2130
7942269b 2131 static const uint8_t needs[16]={
a68ca08e
MN
2132 0,1,0,0,
2133 2,4,2,0,
2134 0,1,0,0,
2135 15
2136 };
2137
2138 int x, y, b, r, l;
61d6e445
MN
2139 int16_t tmpIt [64*(32+HTAPS_MAX)];
2140 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
a68ca08e
MN
2141 int16_t *tmpI= tmpIt;
2142 uint8_t *tmp2= tmp2t[0];
f0a70840 2143 const uint8_t *hpel[11];
a68ca08e
MN
2144 assert(dx<16 && dy<16);
2145 r= brane[dx + 16*dy]&15;
2146 l= brane[dx + 16*dy]>>4;
2147
2148 b= needs[l] | needs[r];
7d7f57d9
MN
2149 if(p && !p->diag_mc)
2150 b= 15;
a68ca08e
MN
2151
2152 if(b&5){
61d6e445 2153 for(y=0; y < b_h+HTAPS_MAX-1; y++){
65dc0f53 2154 for(x=0; x < b_w; x++){
61d6e445
MN
2155 int a_1=src[x + HTAPS_MAX/2-4];
2156 int a0= src[x + HTAPS_MAX/2-3];
2157 int a1= src[x + HTAPS_MAX/2-2];
2158 int a2= src[x + HTAPS_MAX/2-1];
2159 int a3= src[x + HTAPS_MAX/2+0];
2160 int a4= src[x + HTAPS_MAX/2+1];
2161 int a5= src[x + HTAPS_MAX/2+2];
2162 int a6= src[x + HTAPS_MAX/2+3];
7d7f57d9
MN
2163 int am=0;
2164 if(!p || p->fast_mc){
2165 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2166 tmpI[x]= am;
2167 am= (am+16)>>5;
2168 }else{
2169 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2170 tmpI[x]= am;
2171 am= (am+32)>>6;
2172 }
791e7b83 2173
65dc0f53
MN
2174 if(am&(~255)) am= ~(am>>31);
2175 tmp2[x]= am;
2176 }
2177 tmpI+= 64;
2178 tmp2+= stride;
2179 src += stride;
791e7b83 2180 }
65dc0f53 2181 src -= stride*y;
a68ca08e 2182 }
61d6e445 2183 src += HTAPS_MAX/2 - 1;
a68ca08e 2184 tmp2= tmp2t[1];
115329f1 2185
a68ca08e 2186 if(b&2){
65dc0f53
MN
2187 for(y=0; y < b_h; y++){
2188 for(x=0; x < b_w+1; x++){
61d6e445
MN
2189 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2190 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2191 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2192 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2193 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2194 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2195 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2196 int a6= src[x + (HTAPS_MAX/2+3)*stride];
7d7f57d9
MN
2197 int am=0;
2198 if(!p || p->fast_mc)
2199 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2200 else
2201 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
791e7b83 2202
65dc0f53
MN
2203 if(am&(~255)) am= ~(am>>31);
2204 tmp2[x]= am;
2205 }
2206 src += stride;
2207 tmp2+= stride;
a68ca08e 2208 }
65dc0f53 2209 src -= stride*y;
a68ca08e 2210 }
61d6e445 2211 src += stride*(HTAPS_MAX/2 - 1);
a68ca08e
MN
2212 tmp2= tmp2t[2];
2213 tmpI= tmpIt;
2214 if(b&4){
2215 for(y=0; y < b_h; y++){
2216 for(x=0; x < b_w; x++){
61d6e445
MN
2217 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2218 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2219 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2220 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2221 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2222 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2223 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2224 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
7d7f57d9
MN
2225 int am=0;
2226 if(!p || p->fast_mc)
2227 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2228 else
2229 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
a68ca08e
MN
2230 if(am&(~255)) am= ~(am>>31);
2231 tmp2[x]= am;
2232 }
2233 tmpI+= 64;
2234 tmp2+= stride;
2235 }
2236 }
115329f1 2237
a68ca08e 2238 hpel[ 0]= src;
61d6e445 2239 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
a68ca08e
MN
2240 hpel[ 2]= src + 1;
2241
2242 hpel[ 4]= tmp2t[1];
2243 hpel[ 5]= tmp2t[2];
2244 hpel[ 6]= tmp2t[1] + 1;
2245
2246 hpel[ 8]= src + stride;
2247 hpel[ 9]= hpel[1] + stride;
2248 hpel[10]= hpel[8] + 1;
2249
2250 if(b==15){
f0a70840
BC
2251 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2252 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2253 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2254 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
a68ca08e
MN
2255 dx&=7;
2256 dy&=7;
2257 for(y=0; y < b_h; y++){
2258 for(x=0; x < b_w; x++){
2259 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2260 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2261 }
2262 src1+=stride;
2263 src2+=stride;
2264 src3+=stride;
2265 src4+=stride;
2266 dst +=stride;
2267 }
2268 }else{
f0a70840
BC
2269 const uint8_t *src1= hpel[l];
2270 const uint8_t *src2= hpel[r];
a68ca08e
MN
2271 int a= weight[((dx&7) + (8*(dy&7)))];
2272 int b= 8-a;
2273 for(y=0; y < b_h; y++){
2274 for(x=0; x < b_w; x++){
2275 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2276 }
2277 src1+=stride;
2278 src2+=stride;
2279 dst +=stride;
791e7b83
MN
2280 }
2281 }
2282}
2283
791e7b83 2284#define mca(dx,dy,b_w)\
bad700e3 2285static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
61d6e445 2286 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
791e7b83 2287 assert(h==b_w);\
61d6e445 2288 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
791e7b83
MN
2289}
2290
2291mca( 0, 0,16)
2292mca( 8, 0,16)
2293mca( 0, 8,16)
2294mca( 8, 8,16)
d92b5807
MN
2295mca( 0, 0,8)
2296mca( 8, 0,8)
2297mca( 0, 8,8)
2298mca( 8, 8,8)
791e7b83 2299
8c36eaaa 2300static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 2301 if(block->type & BLOCK_INTRA){
ff158dc9 2302 int x, y;
2692ceab
MN
2303 const int color = block->color[plane_index];
2304 const int color4= color*0x01010101;
1015631b
LM
2305 if(b_w==32){
2306 for(y=0; y < b_h; y++){
2307 *(uint32_t*)&dst[0 + y*stride]= color4;
2308 *(uint32_t*)&dst[4 + y*stride]= color4;
2309 *(uint32_t*)&dst[8 + y*stride]= color4;
2310 *(uint32_t*)&dst[12+ y*stride]= color4;
2311 *(uint32_t*)&dst[16+ y*stride]= color4;
2312 *(uint32_t*)&dst[20+ y*stride]= color4;
2313 *(uint32_t*)&dst[24+ y*stride]= color4;
2314 *(uint32_t*)&dst[28+ y*stride]= color4;
2315 }
2316 }else if(b_w==16){
2692ceab
MN
2317 for(y=0; y < b_h; y++){
2318 *(uint32_t*)&dst[0 + y*stride]= color4;
2319 *(uint32_t*)&dst[4 + y*stride]= color4;
2320 *(uint32_t*)&dst[8 + y*stride]= color4;
2321 *(uint32_t*)&dst[12+ y*stride]= color4;
2322 }
2323 }else if(b_w==8){
2324 for(y=0; y < b_h; y++){
2325 *(uint32_t*)&dst[0 + y*stride]= color4;
2326 *(uint32_t*)&dst[4 + y*stride]= color4;
2327 }
2328 }else if(b_w==4){
2329 for(y=0; y < b_h; y++){
2330 *(uint32_t*)&dst[0 + y*stride]= color4;
2331 }
2332 }else{
2333 for(y=0; y < b_h; y++){
2334 for(x=0; x < b_w; x++){
2335 dst[x + y*stride]= color;
2336 }
ff158dc9
MN
2337 }
2338 }
2339 }else{
8c36eaaa 2340 uint8_t *src= s->last_picture[block->ref].data[plane_index];
ff158dc9
MN
2341 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2342 int mx= block->mx*scale;
2343 int my= block->my*scale;
ec697587
MN
2344 const int dx= mx&15;
2345 const int dy= my&15;
80e44bc3 2346 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
61d6e445
MN
2347 sx += (mx>>4) - (HTAPS_MAX/2-1);
2348 sy += (my>>4) - (HTAPS_MAX/2-1);
ff158dc9 2349 src += sx + sy*stride;
61d6e445
MN
2350 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2351 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2352 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
ff158dc9
MN
2353 src= tmp + MB_SIZE;
2354 }
87f20c2f
MN
2355// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2356// assert(!(b_w&(b_w-1)));
2692ceab 2357 assert(b_w>1 && b_h>1);
89438028 2358 assert((tab_index>=0 && tab_index<4) || b_w==32);
7d7f57d9
MN
2359 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2360 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1015631b
LM
2361 else if(b_w==32){
2362 int y;
2363 for(y=0; y<b_h; y+=16){
7d7f57d9
MN
2364 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2365 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1015631b
LM
2366 }
2367 }else if(b_w==b_h)
7d7f57d9 2368 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2692ceab 2369 else if(b_w==2*b_h){
7d7f57d9
MN
2370 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2371 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2692ceab
MN
2372 }else{
2373 assert(2*b_w==b_h);
7d7f57d9
MN
2374 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2375 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2692ceab 2376 }
ff158dc9
MN
2377 }
2378}
2379
9dd6c804 2380void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
059715a4
RE
2381 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2382 int y, x;
d593e329 2383 IDWTELEM * dst;
059715a4 2384 for(y=0; y<b_h; y++){
19032450 2385 //FIXME ugly misuse of obmc_stride
9dd6c804
PI
2386 const uint8_t *obmc1= obmc + y*obmc_stride;
2387 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2388 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2389 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
059715a4
RE
2390 dst = slice_buffer_get_line(sb, src_y + y);
2391 for(x=0; x<b_w; x++){
2392 int v= obmc1[x] * block[3][x + y*src_stride]
2393 +obmc2[x] * block[2][x + y*src_stride]
2394 +obmc3[x] * block[1][x + y*src_stride]
2395 +obmc4[x] * block[0][x + y*src_stride];
2396
2397 v <<= 8 - LOG2_OBMC_MAX;
2398 if(FRAC_BITS != 8){
059715a4
RE
2399 v >>= 8 - FRAC_BITS;
2400 }
2401 if(add){
2402 v += dst[x + src_x];
2403 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2404 if(v&(~255)) v= ~(v>>31);
2405 dst8[x + y*src_stride] = v;
2406 }else{
2407 dst[x + src_x] -= v;
2408 }
2409 }
2410 }
2411}
2412
e6464f8b 2413//FIXME name cleanup (b_w, block_w, b_width stuff)
d593e329 2414static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
a0d1931c
Y
2415 const int b_width = s->b_width << s->block_max_depth;
2416 const int b_height= s->b_height << s->block_max_depth;
2417 const int b_stride= b_width;
2418 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2419 BlockNode *rt= lt+1;
2420 BlockNode *lb= lt+b_stride;
2421 BlockNode *rb= lb+1;
115329f1 2422 uint8_t *block[4];
cc884a35 2423 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
bd2b6b33 2424 uint8_t *tmp = s->scratchbuf;
cc884a35 2425 uint8_t *ptmp;
a0d1931c
Y
2426 int x,y;
2427
2428 if(b_x<0){
2429 lt= rt;
2430 lb= rb;
2431 }else if(b_x + 1 >= b_width){
2432 rt= lt;
2433 rb= lb;
2434 }
2435 if(b_y<0){
2436 lt= lb;
2437 rt= rb;
2438 }else if(b_y + 1 >= b_height){
2439 lb= lt;
2440 rb= rt;
2441 }
115329f1 2442
e6464f8b 2443 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
a0d1931c
Y
2444 obmc -= src_x;
2445 b_w += src_x;
f7e89c73 2446 if(!sliced && !offset_dst)
1015631b 2447 dst -= src_x;
ff158dc9
MN
2448 src_x=0;
2449 }else if(src_x + b_w > w){
2450 b_w = w - src_x;
2451 }
2452 if(src_y<0){
2453 obmc -= src_y*obmc_stride;
2454 b_h += src_y;
f7e89c73 2455 if(!sliced && !offset_dst)
1015631b 2456 dst -= src_y*dst_stride;
ff158dc9
MN
2457 src_y=0;
2458 }else if(src_y + b_h> h){
2459 b_h = h - src_y;
791e7b83 2460 }
115329f1 2461
ff158dc9 2462 if(b_w<=0 || b_h<=0) return;
155ec6ed 2463
94ae6788
DB
2464 assert(src_stride > 2*MB_SIZE + 5);
2465
f7e89c73 2466 if(!sliced && offset_dst)
1015631b 2467 dst += src_x + src_y*dst_stride;
715a97f0 2468 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2469// src += src_x + src_y*src_stride;
2470
cc884a35
MN
2471 ptmp= tmp + 3*tmp_step;
2472 block[0]= ptmp;
2473 ptmp+=tmp_step;
8c36eaaa 2474 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
2475
2476 if(same_block(lt, rt)){
2477 block[1]= block[0];
791e7b83 2478 }else{
cc884a35
MN
2479 block[1]= ptmp;
2480 ptmp+=tmp_step;
8c36eaaa 2481 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
ff158dc9 2482 }
115329f1 2483
ff158dc9
MN
2484 if(same_block(lt, lb)){
2485 block[2]= block[0];
2486 }else if(same_block(rt, lb)){
2487 block[2]= block[1];
2488 }else{
cc884a35
MN
2489 block[2]= ptmp;
2490 ptmp+=tmp_step;
8c36eaaa 2491 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
ff158dc9 2492 }
791e7b83 2493
ff158dc9
MN
2494 if(same_block(lt, rb) ){
2495 block[3]= block[0];
2496 }else if(same_block(rt, rb)){
2497 block[3]= block[1];
2498 }else if(same_block(lb, rb)){
2499 block[3]= block[2];
2500 }else{
cc884a35 2501 block[3]= ptmp;
8c36eaaa 2502 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
ff158dc9
MN
2503 }
2504#if 0
2505 for(y=0; y<b_h; y++){
2506 for(x=0; x<b_w; x++){
2507 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2508 if(add) dst[x + y*dst_stride] += v;
2509 else dst[x + y*dst_stride] -= v;
2510 }
2511 }
2512 for(y=0; y<b_h; y++){
2513 uint8_t *obmc2= obmc + (obmc_stride>>1);
2514 for(x=0; x<b_w; x++){
2515 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2516 if(add) dst[x + y*dst_stride] += v;
2517 else dst[x + y*dst_stride] -= v;
2518 }
2519 }
2520 for(y=0; y<b_h; y++){
2521 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2522 for(x=0; x<b_w; x++){
2523 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2524 if(add) dst[x + y*dst_stride] += v;
2525 else dst[x + y*dst_stride] -= v;
2526 }
2527 }
2528 for(y=0; y<b_h; y++){
2529 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2530 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2531 for(x=0; x<b_w; x++){
2532 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2533 if(add) dst[x + y*dst_stride] += v;
2534 else dst[x + y*dst_stride] -= v;
2535 }
2536 }
2537#else
f7e89c73 2538 if(sliced){
f7e89c73 2539 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
e73e4e75
DB
2540 }else{
2541 for(y=0; y<b_h; y++){
2542 //FIXME ugly misuse of obmc_stride
2543 const uint8_t *obmc1= obmc + y*obmc_stride;
2544 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2545 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2546 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2547 for(x=0; x<b_w; x++){
2548 int v= obmc1[x] * block[3][x + y*src_stride]
2549 +obmc2[x] * block[2][x + y*src_stride]
2550 +obmc3[x] * block[1][x + y*src_stride]
2551 +obmc4[x] * block[0][x + y*src_stride];
2552
2553 v <<= 8 - LOG2_OBMC_MAX;
2554 if(FRAC_BITS != 8){
2555 v >>= 8 - FRAC_BITS;
2556 }
2557 if(add){
2558 v += dst[x + y*dst_stride];
2559 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2560 if(v&(~255)) v= ~(v>>31);
2561 dst8[x + y*src_stride] = v;
2562 }else{
2563 dst[x + y*dst_stride] -= v;
2564 }
715a97f0 2565 }
791e7b83
MN
2566 }
2567 }
96e2fbf2 2568#endif /* 0 */
791e7b83
MN
2569}
2570
d593e329 2571static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
a0d1931c
Y
2572 Plane *p= &s->plane[plane_index];
2573 const int mb_w= s->b_width << s->block_max_depth;
2574 const int mb_h= s->b_height << s->block_max_depth;
2575 int x, y, mb_x;
2576 int block_size = MB_SIZE >> s->block_max_depth;
2577 int block_w = plane_index ? block_size/2 : block_size;
2578 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2579 int obmc_stride= plane_index ? block_size : 2*block_size;
2580 int ref_stride= s->current_picture.linesize[plane_index];
a0d1931c
Y
2581 uint8_t *dst8= s->current_picture.data[plane_index];
2582 int w= p->width;
2583 int h= p->height;
115329f1 2584
a0d1931c
Y
2585 if(s->keyframe || (s->avctx->debug&512)){
2586 if(mb_y==mb_h)
2587 return;
2588
2589 if(add){
ef3dfbd4 2590 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
a0d1931c 2591// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 2592 IDWTELEM * line = sb->line[y];
ef3dfbd4 2593 for(x=0; x<w; x++){
a0d1931c
Y
2594// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2595 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2596 v >>= FRAC_BITS;
2597 if(v&(~255)) v= ~(v>>31);
2598 dst8[x + y*ref_stride]= v;
2599 }
2600 }
2601 }else{
ef3dfbd4 2602 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
a0d1931c 2603// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 2604 IDWTELEM * line = sb->line[y];
ef3dfbd4 2605 for(x=0; x<w; x++){
a0d1931c
Y
2606 line[x] -= 128 << FRAC_BITS;
2607// buf[x + y*w]-= 128<<FRAC_BITS;
2608 }
2609 }
2610 }
2611
2612 return;
2613 }
115329f1 2614
e73e4e75
DB
2615 for(mb_x=0; mb_x<=mb_w; mb_x++){
2616 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2617 block_w*mb_x - block_w/2,
2618 block_w*mb_y - block_w/2,
2619 block_w, block_w,
2620 w, h,
2621 w, ref_stride, obmc_stride,
2622 mb_x - 1, mb_y - 1,
2623 add, 0, plane_index);
2624 }
a0d1931c
Y
2625}
2626
d593e329 2627static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2628 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2629 const int mb_w= s->b_width << s->block_max_depth;
2630 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2631 int x, y, mb_x;
155ec6ed
MN
2632 int block_size = MB_SIZE >> s->block_max_depth;
2633 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2634 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 2635 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2636 int ref_stride= s->current_picture.linesize[plane_index];
715a97f0 2637 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2638 int w= p->width;
2639 int h= p->height;
115329f1 2640
ff158dc9 2641 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2642 if(mb_y==mb_h)
2643 return;
2644
715a97f0 2645 if(add){
86e59cc0 2646 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2647 for(x=0; x<w; x++){
2648 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2649 v >>= FRAC_BITS;
2650 if(v&(~255)) v= ~(v>>31);
2651 dst8[x + y*ref_stride]= v;
2652 }
2653 }
2654 }else{
86e59cc0 2655 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2656 for(x=0; x<w; x++){
2657 buf[x + y*w]-= 128<<FRAC_BITS;
2658 }
ff158dc9 2659 }
791e7b83 2660 }
ff158dc9
MN
2661
2662 return;
791e7b83 2663 }
115329f1 2664
94ae6788
DB
2665 for(mb_x=0; mb_x<=mb_w; mb_x++){
2666 add_yblock(s, 0, NULL, buf, dst8, obmc,
2667 block_w*mb_x - block_w/2,
2668 block_w*mb_y - block_w/2,
2669 block_w, block_w,
2670 w, h,
2671 w, ref_stride, obmc_stride,
2672 mb_x - 1, mb_y - 1,
2673 add, 1, plane_index);
2674 }
f9e6ebf7
LM
2675}
2676
d593e329 2677static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
f9e6ebf7
LM
2678 const int mb_h= s->b_height << s->block_max_depth;
2679 int mb_y;
2680 for(mb_y=0; mb_y<=mb_h; mb_y++)
2681 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2682}
2683
51d6a3cf
MN
2684static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2685 int i, x2, y2;
2686 Plane *p= &s->plane[plane_index];
2687 const int block_size = MB_SIZE >> s->block_max_depth;
2688 const int block_w = plane_index ? block_size/2 : block_size;
2689 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2690 const int obmc_stride= plane_index ? block_size : 2*block_size;
2691 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2692 uint8_t *src= s-> input_picture.data[plane_index];
d593e329 2693 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
51d6a3cf
MN
2694 const int b_stride = s->b_width << s->block_max_depth;
2695 const int w= p->width;
2696 const int h= p->height;
2697 int index= mb_x + mb_y*b_stride;
2698 BlockNode *b= &s->block[index];
2699 BlockNode backup= *b;
2700 int ab=0;
2701 int aa=0;
2702
2703 b->type|= BLOCK_INTRA;
2704 b->color[plane_index]= 0;
d593e329 2705 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
51d6a3cf
MN
2706
2707 for(i=0; i<4; i++){
2708 int mb_x2= mb_x + (i &1) - 1;
2709 int mb_y2= mb_y + (i>>1) - 1;
2710 int x= block_w*mb_x2 + block_w/2;
2711 int y= block_w*mb_y2 + block_w/2;
2712
f7e89c73 2713 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
1015631b 2714 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
51d6a3cf
MN
2715
2716 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2717 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2718 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2719 int obmc_v= obmc[index];
1015631b 2720 int d;
51d6a3cf
MN
2721 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2722 if(x<0) obmc_v += obmc[index + block_w];
2723 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2724 if(x+block_w>w) obmc_v += obmc[index - block_w];
e6464f8b 2725 //FIXME precalculate this or simplify it somehow else
51d6a3cf 2726
1015631b
LM
2727 d = -dst[index] + (1<<(FRAC_BITS-1));
2728 dst[index] = d;
2729 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
e6464f8b 2730 aa += obmc_v * obmc_v; //FIXME precalculate this
51d6a3cf
MN
2731 }
2732 }
2733 }
2734 *b= backup;
2735
755bfeab 2736 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
51d6a3cf
MN
2737}
2738
b104969f
LM
2739static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2740 const int b_stride = s->b_width << s->block_max_depth;
2741 const int b_height = s->b_height<< s->block_max_depth;
2742 int index= x + y*b_stride;
aadcc5ce
PI
2743 const BlockNode *b = &s->block[index];
2744 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2745 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2746 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2747 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
b104969f 2748 int dmx, dmy;
c26abfa5
DB
2749// int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2750// int my_context= av_log2(2*FFABS(left->my - top->my));
b104969f
LM
2751
2752 if(x<0 || x>=b_stride || y>=b_height)
2753 return 0;
b104969f
LM
2754/*
27551 0 0
275601X 1-2 1
2757001XX 3-6 2-3
27580001XXX 7-14 4-7
275900001XXXX 15-30 8-15
2760*/
2761//FIXME try accurate rate
e6464f8b 2762//FIXME intra and inter predictors if surrounding blocks are not the same type
b104969f 2763 if(b->type & BLOCK_INTRA){
c26abfa5
DB
2764 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2765 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2766 + av_log2(2*FFABS(left->color[2] - b->color[2])));
85fc0e75
LM
2767 }else{
2768 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2769 dmx-= b->mx;
2770 dmy-= b->my;
c26abfa5
DB
2771 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2772 + av_log2(2*FFABS(dmy))
8c36eaaa 2773 + av_log2(2*b->ref));
85fc0e75 2774 }
b104969f
LM
2775}
2776
1015631b 2777static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
51d6a3cf
MN
2778 Plane *p= &s->plane[plane_index];
2779 const int block_size = MB_SIZE >> s->block_max_depth;
2780 const int block_w = plane_index ? block_size/2 : block_size;
51d6a3cf
MN
2781 const int obmc_stride= plane_index ? block_size : 2*block_size;
2782 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2783 uint8_t *dst= s->current_picture.data[plane_index];
1015631b 2784 uint8_t *src= s-> input_picture.data[plane_index];
d593e329 2785 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
bd2b6b33 2786 uint8_t *cur = s->scratchbuf;
61d6e445 2787 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
51d6a3cf
MN
2788 const int b_stride = s->b_width << s->block_max_depth;
2789 const int b_height = s->b_height<< s->block_max_depth;
2790 const int w= p->width;
2791 const int h= p->height;
1015631b 2792 int distortion;
51d6a3cf
MN
2793 int rate= 0;
2794 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
1015631b
LM
2795 int sx= block_w*mb_x - block_w/2;
2796 int sy= block_w*mb_y - block_w/2;
561a18d3
RE
2797 int x0= FFMAX(0,-sx);
2798 int y0= FFMAX(0,-sy);
2799 int x1= FFMIN(block_w*2, w-sx);
2800 int y1= FFMIN(block_w*2, h-sy);
1015631b
LM
2801 int i,x,y;
2802
8c36eaaa 2803 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
1015631b
LM
2804
2805 for(y=y0; y<y1; y++){
2806 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
d593e329 2807 const IDWTELEM *pred1 = pred + y*obmc_stride;
1015631b
LM
2808 uint8_t *cur1 = cur + y*ref_stride;
2809 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2810 for(x=x0; x<x1; x++){
d593e329 2811#if FRAC_BITS >= LOG2_OBMC_MAX
1015631b 2812 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
d593e329
MN
2813#else
2814 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2815#endif
1015631b
LM
2816 v = (v + pred1[x]) >> FRAC_BITS;
2817 if(v&(~255)) v= ~(v>>31);
2818 dst1[x] = v;
51d6a3cf 2819 }
1015631b 2820 }
51d6a3cf 2821
561a18d3
RE
2822 /* copy the regions where obmc[] = (uint8_t)256 */
2823 if(LOG2_OBMC_MAX == 8
2824 && (mb_x == 0 || mb_x == b_stride-1)
2825 && (mb_y == 0 || mb_y == b_height-1)){
2826 if(mb_x == 0)
2827 x1 = block_w;
2828 else
2829 x0 = block_w;
2830 if(mb_y == 0)
2831 y1 = block_w;
2832 else
2833 y0 = block_w;
2834 for(y=y0; y<y1; y++)
2835 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2836 }
2837
1015631b 2838 if(block_w==16){
871371a7
LM
2839 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2840 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
e6464f8b
DB
2841 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2842 * So improving the score of one block is not strictly guaranteed
2843 * to improve the score of the whole frame, thus iterative motion
2844 * estimation does not always converge. */
871371a7 2845 if(s->avctx->me_cmp == FF_CMP_W97)
486497e0 2846 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
871371a7 2847 else if(s->avctx->me_cmp == FF_CMP_W53)
486497e0 2848 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
871371a7
LM
2849 else{
2850 distortion = 0;
2851 for(i=0; i<4; i++){
2852 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2853 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2854 }
1015631b
LM
2855 }
2856 }else{
2857 assert(block_w==8);
2858 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
51d6a3cf
MN
2859 }
2860
2861 if(plane_index==0){
2862 for(i=0; i<4; i++){
2863/* ..RRr
2864 * .RXx.
2865 * rxx..
2866 */
b104969f
LM
2867 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2868 }
48d1b9a1
LM
2869 if(mb_x == b_stride-2)
2870 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
b104969f
LM
2871 }
2872 return distortion + rate*penalty_factor;
2873}
2874
2875static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2876 int i, y2;
2877 Plane *p= &s->plane[plane_index];
2878 const int block_size = MB_SIZE >> s->block_max_depth;
2879 const int block_w = plane_index ? block_size/2 : block_size;
2880 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2881 const int obmc_stride= plane_index ? block_size : 2*block_size;
2882 const int ref_stride= s->current_picture.linesize[plane_index];
b104969f
LM
2883 uint8_t *dst= s->current_picture.data[plane_index];
2884 uint8_t *src= s-> input_picture.data[plane_index];
b5a33ff1
MN
2885 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2886 // const has only been removed from zero_dst to suppress a warning
2887 static IDWTELEM zero_dst[4096]; //FIXME
b104969f 2888 const int b_stride = s->b_width << s->block_max_depth;
b104969f
LM
2889 const int w= p->width;
2890 const int h= p->height;
2891 int distortion= 0;
2892 int rate= 0;
2893 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2894
2895 for(i=0; i<9; i++){
2896 int mb_x2= mb_x + (i%3) - 1;
2897 int mb_y2= mb_y + (i/3) - 1;
2898 int x= block_w*mb_x2 + block_w/2;
2899 int y= block_w*mb_y2 + block_w/2;
2900
f7e89c73 2901 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
b104969f
LM
2902 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2903
2904 //FIXME find a cleaner/simpler way to skip the outside stuff
2905 for(y2= y; y2<0; y2++)
2906 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2907 for(y2= h; y2<y+block_w; y2++)
2908 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2909 if(x<0){
2910 for(y2= y; y2<y+block_w; y2++)
2911 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
51d6a3cf 2912 }
b104969f
LM
2913 if(x+block_w > w){
2914 for(y2= y; y2<y+block_w; y2++)
2915 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2916 }
2917
2918 assert(block_w== 8 || block_w==16);
2919 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
51d6a3cf
MN
2920 }
2921
b104969f
LM
2922 if(plane_index==0){
2923 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2924 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2925
2926/* ..RRRr
2927 * .RXXx.
2928 * .RXXx.
2929 * rxxx.
2930 */
2931 if(merged)
2932 rate = get_block_bits(s, mb_x, mb_y, 2);
2933 for(i=merged?4:0; i<9; i++){
2934 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2935 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2936 }
2937 }
51d6a3cf
MN
2938 return distortion + rate*penalty_factor;
2939}
2940
849f1035 2941static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
51d6a3cf
MN
2942 const int b_stride= s->b_width << s->block_max_depth;
2943 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2944 BlockNode backup= *block;
2945 int rd, index, value;
2946
2947 assert(mb_x>=0 && mb_y>=0);
735f9f34 2948 assert(mb_x<b_stride);
51d6a3cf
MN
2949
2950 if(intra){
2951 block->color[0] = p[0];
2952 block->color[1] = p[1];
2953 block->color[2] = p[2];
2954 block->type |= BLOCK_INTRA;
2955 }else{
2956 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
8c36eaaa 2957 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
51d6a3cf
MN
2958 if(s->me_cache[index] == value)
2959 return 0;
2960 s->me_cache[index]= value;
2961
2962 block->mx= p[0];
2963 block->my= p[1];
2964 block->type &= ~BLOCK_INTRA;
2965 }
2966
1015631b 2967 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
51d6a3cf
MN
2968
2969//FIXME chroma
2970 if(rd < *best_rd){
2971 *best_rd= rd;
2972 return 1;
2973 }else{
2974 *block= backup;
2975 return 0;
2976 }
2977}
2978
e6464f8b
DB
2979/* special case for int[2] args we discard afterwards,
2980 * fixes compilation problem with gcc 2.95 */
849f1035 2981static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
52137f2f 2982 int p[2] = {p0, p1};
fc8c4992 2983 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
52137f2f
FR
2984}
2985
849f1035 2986static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
b104969f
LM
2987 const int b_stride= s->b_width << s->block_max_depth;
2988 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2989 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2990 int rd, index, value;
2991
2992 assert(mb_x>=0 && mb_y>=0);
2993 assert(mb_x<b_stride);
2994 assert(((mb_x|mb_y)&1) == 0);
2995
2996 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
8c36eaaa 2997 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
b104969f
LM
2998 if(s->me_cache[index] == value)
2999 return 0;
3000 s->me_cache[index]= value;
3001
3002 block->mx= p0;
3003 block->my= p1;
8c36eaaa 3004 block->ref= ref;
b104969f
LM
3005 block->type &= ~BLOCK_INTRA;
3006 block[1]= block[b_stride]= block[b_stride+1]= *block;
3007
3008 rd= get_4block_rd(s, mb_x, mb_y, 0);
3009
3010//FIXME chroma
3011 if(rd < *best_rd){
3012 *best_rd= rd;
3013 return 1;
3014 }else{
3015 block[0]= backup[0];
3016 block[1]= backup[1];
3017 block[b_stride]= backup[2];
3018 block[b_stride+1]= backup[3];
3019 return 0;
3020 }
3021}
3022
51d6a3cf
MN
3023static void iterative_me(SnowContext *s){
3024 int pass, mb_x, mb_y;
3025 const int b_width = s->b_width << s->block_max_depth;
3026 const int b_height= s->b_height << s->block_max_depth;
3027 const int b_stride= b_width;
3028 int color[3];
3029
8f8ae495
LM
3030 {
3031 RangeCoder r = s->c;
3032 uint8_t state[sizeof(s->block_state)];
3033 memcpy(state, s->block_state, sizeof(s->block_state));
3034 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3035 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3036 encode_q_branch(s, 0, mb_x, mb_y);
3037 s->c = r;
3038 memcpy(s->block_state, state, sizeof(s->block_state));
3039 }
3040
871371a7 3041 for(pass=0; pass<25; pass++){
51d6a3cf
MN
3042 int change= 0;
3043
3044 for(mb_y= 0; mb_y<b_height; mb_y++){
3045 for(mb_x= 0; mb_x<b_width; mb_x++){
8c36eaaa
LM
3046 int dia_change, i, j, ref;
3047 int best_rd= INT_MAX, ref_rd;
3048 BlockNode backup, ref_b;
51d6a3cf
MN
3049 const int index= mb_x + mb_y * b_stride;
3050 BlockNode *block= &s->block[index];
7f21a9a7
LM
3051 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3052 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3053 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3054 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3055 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3056 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3057 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3058 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
1015631b
LM
3059 const int b_w= (MB_SIZE >> s->block_max_depth);
3060 uint8_t obmc_edged[b_w*2][b_w*2];
51d6a3cf
MN
3061
3062 if(pass && (block->type & BLOCK_OPT))
3063 continue;
3064 block->type |= BLOCK_OPT;
3065
3066 backup= *block;
3067
3068 if(!s->me_cache_generation)
3069 memset(s->me_cache, 0, sizeof(s->me_cache));
3070 s->me_cache_generation += 1<<22;
3071
e6464f8b 3072 //FIXME precalculate
1015631b
LM
3073 {
3074 int x, y;
3075 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3076 if(mb_x==0)
3077 for(y=0; y<b_w*2; y++)
3078 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3079 if(mb_x==b_stride-1)
3080 for(y=0; y<b_w*2; y++)
3081 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3082 if(mb_y==0){
3083 for(x=0; x<b_w*2; x++)
3084 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3085 for(y=1; y<b_w; y++)
3086 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3087 }
3088 if(mb_y==b_height-1){
3089 for(x=0; x<b_w*2; x++)
3090 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3091 for(y=b_w; y<b_w*2-1; y++)
3092 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3093 }
3094 }
3095
3096 //skip stuff outside the picture
ef3dfbd4 3097 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
1015631b
LM
3098 uint8_t *src= s-> input_picture.data[0];
3099 uint8_t *dst= s->current_picture.data[0];
3100 const int stride= s->current_picture.linesize[0];
3101 const int block_w= MB_SIZE >> s->block_max_depth;
3102 const int sx= block_w*mb_x - block_w/2;
3103 const int sy= block_w*mb_y - block_w/2;
3104 const int w= s->plane[0].width;
3105 const int h= s->plane[0].height;
3106 int y;
3107
3108 for(y=sy; y<0; y++)
3109 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3110 for(y=h; y<sy+block_w*2; y++)
3111 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3112 if(sx<0){
3113 for(y=sy; y<sy+block_w*2; y++)
3114 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3115 }
3116 if(sx+block_w*2 > w){
3117 for(y=sy; y<sy+block_w*2; y++)
3118 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3119 }
3120 }
3121
3122 // intra(black) = neighbors' contribution to the current block
3123 for(i=0; i<3; i++)
3124 color[i]= get_dc(s, mb_x, mb_y, i);
3125
755bfeab 3126 // get previous score (cannot be cached due to OBMC)
48d1b9a1
LM
3127 if(pass > 0 && (block->type&BLOCK_INTRA)){
3128 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3129 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3130 }else
fc8c4992 3131 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
48d1b9a1 3132
8c36eaaa
LM
3133 ref_b= *block;
3134 ref_rd= best_rd;
3135 for(ref=0; ref < s->ref_frames; ref++){
3136 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3137 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3138 continue;
3139 block->ref= ref;
3140 best_rd= INT_MAX;
3141
3142 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3143 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
7f21a9a7 3144 if(tb)
8c36eaaa 3145 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
7f21a9a7 3146 if(lb)
8c36eaaa 3147 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
7f21a9a7 3148 if(rb)
8c36eaaa 3149 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
7f21a9a7 3150 if(bb)
8c36eaaa
LM
3151 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3152
3153 /* fullpel ME */
e6464f8b 3154 //FIXME avoid subpel interpolation / round to nearest integer
8c36eaaa
LM
3155 do{
3156 dia_change=0;
3157 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3158 for(j=0; j<i; j++){
3159 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3160 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3161 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3162 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3163 }
51d6a3cf 3164 }
8c36eaaa
LM
3165 }while(dia_change);
3166 /* subpel ME */
3167 do{
3168 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3169 dia_change=0;
3170 for(i=0; i<8; i++)
3171 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3172 }while(dia_change);
3173 //FIXME or try the standard 2 pass qpel or similar
3174
3175 mvr[0][0]= block->mx;
3176 mvr[0][1]= block->my;
3177 if(ref_rd > best_rd){
3178 ref_rd= best_rd;
3179 ref_b= *block;
51d6a3cf 3180 }
8c36eaaa
LM
3181 }
3182 best_rd= ref_rd;
3183 *block= ref_b;
13705b69 3184#if 1
1015631b 3185 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
51d6a3cf 3186 //FIXME RD style color selection
13705b69 3187#endif
51d6a3cf 3188 if(!same_block(block, &backup)){
7f21a9a7
LM
3189 if(tb ) tb ->type &= ~BLOCK_OPT;
3190 if(lb ) lb ->type &= ~BLOCK_OPT;
3191 if(rb ) rb ->type &= ~BLOCK_OPT;
3192 if(bb ) bb ->type &= ~BLOCK_OPT;
3193 if(tlb) tlb->type &= ~BLOCK_OPT;
3194 if(trb) trb->type &= ~BLOCK_OPT;
3195 if(blb) blb->type &= ~BLOCK_OPT;
3196 if(brb) brb->type &= ~BLOCK_OPT;
51d6a3cf
MN
3197 change ++;
3198 }
3199 }
3200 }
3201 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3202 if(!change)
3203 break;
3204 }
b104969f
LM
3205
3206 if(s->block_max_depth == 1){
3207 int change= 0;
3208 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3209 for(mb_x= 0; mb_x<b_width; mb_x+=2){
7f21a9a7 3210 int i;
b104969f
LM
3211 int best_rd, init_rd;
3212 const int index= mb_x + mb_y * b_stride;
3213 BlockNode *b[4];
3214
3215 b[0]= &s->block[index];
3216 b[1]= b[0]+1;
3217 b[2]= b[0]+b_stride;
3218 b[3]= b[2]+1;
3219 if(same_block(b[0], b[1]) &&
3220 same_block(b[0], b[2]) &&
3221 same_block(b[0], b[3]))
3222 continue;
3223
3224 if(!s->me_cache_generation)
3225 memset(s->me_cache, 0, sizeof(s->me_cache));
3226 s->me_cache_generation += 1<<22;
3227
3228 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3229
8c36eaaa 3230 //FIXME more multiref search?
b104969f
LM
3231 check_4block_inter(s, mb_x, mb_y,
3232 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
8c36eaaa 3233 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
b104969f
LM
3234
3235 for(i=0; i<4; i++)
3236 if(!(b[i]->type&BLOCK_INTRA))
8c36eaaa 3237 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
b104969f
LM
3238
3239 if(init_rd != best_rd)
3240 change++;
3241 }
3242 }
3243 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3244 }
51d6a3cf
MN
3245}
3246
d593e329 3247static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
791e7b83
MN
3248 const int w= b->width;
3249 const int h= b->height;
f66e4f5f 3250 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
b538791b 3251 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
da66b631 3252 int x,y, thres1, thres2;
791e7b83 3253
d593e329
MN
3254 if(s->qlog == LOSSLESS_QLOG){
3255 for(y=0; y<h; y++)
3256 for(x=0; x<w; x++)
3257 dst[x + y*stride]= src[x + y*stride];
3258 return;
3259 }
115329f1 3260
791e7b83 3261 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
3262 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3263 thres2= 2*thres1;
115329f1 3264
791e7b83
MN
3265 if(!bias){
3266 for(y=0; y<h; y++){
3267 for(x=0; x<w; x++){
da66b631 3268 int i= src[x + y*stride];
115329f1 3269
da66b631
MN
3270 if((unsigned)(i+thres1) > thres2){
3271 if(i>=0){
3272 i<<= QEXPSHIFT;
3273 i/= qmul; //FIXME optimize
d593e329 3274 dst[x + y*stride]= i;
da66b631
MN
3275 }else{
3276 i= -i;
3277 i<<= QEXPSHIFT;
3278 i/= qmul; //FIXME optimize
d593e329 3279 dst[x + y*stride]= -i;
da66b631
MN
3280 }
3281 }else
d593e329 3282 dst[x + y*stride]= 0;
791e7b83
MN
3283 }
3284 }
3285 }else{
3286 for(y=0; y<h; y++){
3287 for(x=0; x<w; x++){
115329f1
DB
3288 int i= src[x + y*stride];
3289
da66b631
MN
3290 if((unsigned)(i+thres1) > thres2){