4:4:4 H.264 decoding support
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
2912e87a 4 * This file is part of Libav.
b78e7197 5 *
2912e87a 6 * Libav is free software; you can redistribute it and/or
791e7b83
MN
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
b78e7197 9 * version 2.1 of the License, or (at your option) any later version.
791e7b83 10 *
2912e87a 11 * Libav is distributed in the hope that it will be useful,
791e7b83
MN
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
2912e87a 17 * License along with Libav; if not, write to the Free Software
5509bffa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
MN
19 */
20
94ca624f 21#include "libavutil/intmath.h"
791e7b83 22#include "avcodec.h"
791e7b83 23#include "dsputil.h"
05aec7bb 24#include "dwt.h"
059715a4 25#include "snow.h"
28869757
MN
26
27#include "rangecoder.h"
199436b9 28#include "mathops.h"
791e7b83
MN
29
30#include "mpegvideo.h"
c26e58e3 31#include "h263.h"
791e7b83
MN
32
33#undef NDEBUG
34#include <assert.h>
35
791e7b83
MN
36static const int8_t quant3[256]={
37 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
53};
54static const int8_t quant3b[256]={
55 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71};
538a3841
MN
72static const int8_t quant3bA[256]={
73 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89};
791e7b83
MN
90static const int8_t quant5[256]={
91 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
107};
108static const int8_t quant7[256]={
109 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
120-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
123-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
124-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
125};
126static const int8_t quant9[256]={
127 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
128 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
142-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
143};
144static const int8_t quant11[256]={
145 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
146 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
147 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
156-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
159-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
160-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
161};
162static const int8_t quant13[256]={
163 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
164 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
165 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
166 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
169 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
173-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
174-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
176-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
177-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
178-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179};
180
791e7b83
MN
181#if 0 //64*cubic
182static const uint8_t obmc32[1024]={
fa731ccd
MN
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
185 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
186 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
187 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
188 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
189 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
190 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
191 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
192 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
193 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
194 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
195 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
196 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
199 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
200 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
201 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
202 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
203 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
204 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
205 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
206 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
207 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
208 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
209 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
210 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
211 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
212 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
213 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
791e7b83
MN
215//error:0.000022
216};
217static const uint8_t obmc16[256]={
fa731ccd
MN
218 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
219 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
220 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
221 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
222 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
223 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
226 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
227 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
228 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
229 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
230 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
231 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
232 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
233 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
791e7b83
MN
234//error:0.000033
235};
236#elif 1 // 64*linear
237static const uint8_t obmc32[1024]={
561a18d3
RE
238 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
239 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
240 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
241 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
242 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
243 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
244 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
245 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
246 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
247 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
248 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
249 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
250 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
254 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
255 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
256 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
257 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
258 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
259 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
260 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
261 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
262 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
263 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
264 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
265 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
266 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
267 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
268 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
269 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
791e7b83
MN
270 //error:0.000020
271};
272static const uint8_t obmc16[256]={
561a18d3
RE
273 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
274 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
275 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
276 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
277 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
281 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
282 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
283 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
284 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
285 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
286 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
287 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
288 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
791e7b83
MN
289//error:0.000015
290};
291#else //64*cos
292static const uint8_t obmc32[1024]={
fa731ccd
MN
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
296 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
297 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
298 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
299 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
300 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
301 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
302 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
303 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
304 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
305 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
306 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
309 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
310 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
311 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
312 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
313 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
314 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
315 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
316 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
317 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
318 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
319 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
320 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
321 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
322 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
323 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
791e7b83
MN
325//error:0.000022
326};
327static const uint8_t obmc16[256]={
fa731ccd
MN
328 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
329 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
330 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
331 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
332 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
333 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
336 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
337 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
338 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
339 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
340 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
341 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
342 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
343 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
791e7b83
MN
344//error:0.000022
345};
96e2fbf2 346#endif /* 0 */
791e7b83 347
155ec6ed
MN
348//linear *64
349static const uint8_t obmc8[64]={
561a18d3
RE
350 4, 12, 20, 28, 28, 20, 12, 4,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 20, 60,100,140,140,100, 60, 20,
353 28, 84,140,196,196,140, 84, 28,
354 28, 84,140,196,196,140, 84, 28,
355 20, 60,100,140,140,100, 60, 20,
356 12, 36, 60, 84, 84, 60, 36, 12,
357 4, 12, 20, 28, 28, 20, 12, 4,
155ec6ed
MN
358//error:0.000000
359};
360
361//linear *64
362static const uint8_t obmc4[16]={
561a18d3
RE
363 16, 48, 48, 16,
364 48,144,144, 48,
365 48,144,144, 48,
366 16, 48, 48, 16,
155ec6ed
MN
367//error:0.000000
368};
369
cf2baeb3 370static const uint8_t * const obmc_tab[4]={
155ec6ed
MN
371 obmc32, obmc16, obmc8, obmc4
372};
373
85fc0e75
LM
374static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
375
155ec6ed
MN
376typedef struct BlockNode{
377 int16_t mx;
378 int16_t my;
8c36eaaa 379 uint8_t ref;
155ec6ed
MN
380 uint8_t color[3];
381 uint8_t type;
382//#define TYPE_SPLIT 1
383#define BLOCK_INTRA 1
51d6a3cf 384#define BLOCK_OPT 2
155ec6ed
MN
385//#define TYPE_NOCOLOR 4
386 uint8_t level; //FIXME merge into type?
387}BlockNode;
388
51d6a3cf
MN
389static const BlockNode null_block= { //FIXME add border maybe
390 .color= {128,128,128},
391 .mx= 0,
392 .my= 0,
8c36eaaa 393 .ref= 0,
51d6a3cf
MN
394 .type= 0,
395 .level= 0,
396};
397
155ec6ed
MN
398#define LOG2_MB_SIZE 4
399#define MB_SIZE (1<<LOG2_MB_SIZE)
b538791b 400#define ENCODER_EXTRA_BITS 4
61d6e445 401#define HTAPS_MAX 8
155ec6ed 402
a0d1931c
Y
403typedef struct x_and_coeff{
404 int16_t x;
538a3841 405 uint16_t coeff;
a0d1931c
Y
406} x_and_coeff;
407
791e7b83
MN
408typedef struct SubBand{
409 int level;
410 int stride;
411 int width;
412 int height;
e6464f8b 413 int qlog; ///< log(qscale)/log[2^(1/6)]
791e7b83 414 DWTELEM *buf;
d593e329 415 IDWTELEM *ibuf;
a0d1931c
Y
416 int buf_x_offset;
417 int buf_y_offset;
418 int stride_line; ///< Stride measured in lines, not pixels.
419 x_and_coeff * x_coeff;
791e7b83
MN
420 struct SubBand *parent;
421 uint8_t state[/*7*2*/ 7 + 512][32];
422}SubBand;
423
424typedef struct Plane{
425 int width;
426 int height;
427 SubBand band[MAX_DECOMPOSITIONS][4];
7d7f57d9
MN
428
429 int htaps;
61d6e445 430 int8_t hcoeff[HTAPS_MAX/2];
7d7f57d9
MN
431 int diag_mc;
432 int fast_mc;
433
434 int last_htaps;
61d6e445 435 int8_t last_hcoeff[HTAPS_MAX/2];
7d7f57d9 436 int last_diag_mc;
791e7b83
MN
437}Plane;
438
439typedef struct SnowContext{
791e7b83
MN
440
441 AVCodecContext *avctx;
28869757 442 RangeCoder c;
791e7b83 443 DSPContext dsp;
05aec7bb 444 DWTContext dwt;
51d6a3cf
MN
445 AVFrame new_picture;
446 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83 447 AVFrame current_picture;
8c36eaaa 448 AVFrame last_picture[MAX_REF_FRAMES];
5be3a818 449 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
791e7b83
MN
450 AVFrame mconly_picture;
451// uint8_t q_context[16];
452 uint8_t header_state[32];
155ec6ed 453 uint8_t block_state[128 + 32*128];
791e7b83 454 int keyframe;
19aa028d 455 int always_reset;
791e7b83
MN
456 int version;
457 int spatial_decomposition_type;
396a5e68 458 int last_spatial_decomposition_type;
791e7b83
MN
459 int temporal_decomposition_type;
460 int spatial_decomposition_count;
8db13728 461 int last_spatial_decomposition_count;
791e7b83 462 int temporal_decomposition_count;
8c36eaaa
LM
463 int max_ref_frames;
464 int ref_frames;
465 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
466 uint32_t *ref_scores[MAX_REF_FRAMES];
791e7b83 467 DWTELEM *spatial_dwt_buffer;
d593e329 468 IDWTELEM *spatial_idwt_buffer;
791e7b83
MN
469 int colorspace_type;
470 int chroma_h_shift;
471 int chroma_v_shift;
472 int spatial_scalability;
473 int qlog;
396a5e68 474 int last_qlog;
155ec6ed
MN
475 int lambda;
476 int lambda2;
4e64bead 477 int pass1_rc;
791e7b83 478 int mv_scale;
396a5e68 479 int last_mv_scale;
791e7b83 480 int qbias;
396a5e68 481 int last_qbias;
791e7b83 482#define QBIAS_SHIFT 3
155ec6ed
MN
483 int b_width;
484 int b_height;
485 int block_max_depth;
396a5e68 486 int last_block_max_depth;
791e7b83 487 Plane plane[MAX_PLANES];
155ec6ed 488 BlockNode *block;
51d6a3cf
MN
489#define ME_CACHE_SIZE 1024
490 int me_cache[ME_CACHE_SIZE];
491 int me_cache_generation;
a0d1931c 492 slice_buffer sb;
155ec6ed 493
e6464f8b 494 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
bd2b6b33
MR
495
496 uint8_t *scratchbuf;
791e7b83
MN
497}SnowContext;
498
bb270c08 499#ifdef __sgi
2554db9b 500// Avoid a name clash on SGI IRIX
bb270c08 501#undef qexp
2554db9b 502#endif
034aff03 503#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 504static uint8_t qexp[QROOT];
791e7b83 505
28869757 506static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
507 int i;
508
509 if(v){
c26abfa5 510 const int a= FFABS(v);
791e7b83
MN
511 const int e= av_log2(a);
512#if 1
115329f1 513 const int el= FFMIN(e, 10);
28869757 514 put_rac(c, state+0, 0);
791e7b83
MN
515
516 for(i=0; i<el; i++){
28869757 517 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
518 }
519 for(; i<e; i++){
28869757 520 put_rac(c, state+1+9, 1); //1..10
791e7b83 521 }
28869757 522 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
523
524 for(i=e-1; i>=el; i--){
28869757 525 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
526 }
527 for(; i>=0; i--){
28869757 528 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
529 }
530
531 if(is_signed)
28869757 532 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 533#else
115329f1 534
28869757 535 put_rac(c, state+0, 0);
791e7b83
MN
536 if(e<=9){
537 for(i=0; i<e; i++){
28869757 538 put_rac(c, state+1+i, 1); //1..10
791e7b83 539 }
28869757 540 put_rac(c, state+1+i, 0);
791e7b83
MN
541
542 for(i=e-1; i>=0; i--){
28869757 543 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
544 }
545
546 if(is_signed)
28869757 547 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
548 }else{
549 for(i=0; i<e; i++){
28869757 550 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 551 }
e1b3d272 552 put_rac(c, state+1+9, 0);
791e7b83
MN
553
554 for(i=e-1; i>=0; i--){
28869757 555 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
556 }
557
558 if(is_signed)
e1b3d272 559 put_rac(c, state+11 + 10, v < 0); //11..21
791e7b83 560 }
96e2fbf2 561#endif /* 1 */
791e7b83 562 }else{
28869757 563 put_rac(c, state+0, 1);
791e7b83
MN
564 }
565}
566
28869757
MN
567static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
568 if(get_rac(c, state+0))
791e7b83
MN
569 return 0;
570 else{
7c2425d2
LM
571 int i, e, a;
572 e= 0;
28869757 573 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 574 e++;
791e7b83 575 }
7c2425d2 576
791e7b83 577 a= 1;
7c2425d2 578 for(i=e-1; i>=0; i--){
28869757 579 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
580 }
581
3788e661
MN
582 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
583 return (a^e)-e;
791e7b83
MN
584 }
585}
586
28869757 587static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 588 int i;
0635cbfc 589 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
590
591 assert(v>=0);
0635cbfc
MN
592 assert(log2>=-4);
593
594 while(v >= r){
28869757 595 put_rac(c, state+4+log2, 1);
0635cbfc 596 v -= r;
4f4e9633 597 log2++;
0635cbfc 598 if(log2>0) r+=r;
4f4e9633 599 }
28869757 600 put_rac(c, state+4+log2, 0);
115329f1 601
4f4e9633 602 for(i=log2-1; i>=0; i--){
28869757 603 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 604 }
4f4e9633
MN
605}
606
28869757 607static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 608 int i;
0635cbfc 609 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
610 int v=0;
611
0635cbfc
MN
612 assert(log2>=-4);
613
28869757 614 while(get_rac(c, state+4+log2)){
0635cbfc 615 v+= r;
4f4e9633 616 log2++;
0635cbfc 617 if(log2>0) r+=r;
4f4e9633 618 }
115329f1 619
4f4e9633 620 for(i=log2-1; i>=0; i--){
28869757 621 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
622 }
623
624 return v;
625}
626
a0d1931c 627static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
628 const int w= b->width;
629 const int h= b->height;
630 int x,y;
115329f1 631
4536c8e6
JM
632 int run, runs;
633 x_and_coeff *xc= b->x_coeff;
634 x_and_coeff *prev_xc= NULL;
635 x_and_coeff *prev2_xc= xc;
636 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
637 x_and_coeff *prev_parent_xc= parent_xc;
638
639 runs= get_symbol2(&s->c, b->state[30], 0);
640 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
641 else run= INT_MAX;
642
643 for(y=0; y<h; y++){
644 int v=0;
645 int lt=0, t=0, rt=0;
646
647 if(y && prev_xc->x == 0){
648 rt= prev_xc->coeff;
649 }
650 for(x=0; x<w; x++){
651 int p=0;
652 const int l= v;
653
654 lt= t; t= rt;
655
656 if(y){
657 if(prev_xc->x <= x)
658 prev_xc++;
659 if(prev_xc->x == x + 1)
660 rt= prev_xc->coeff;
661 else
662 rt=0;
0cea8a03 663 }
4536c8e6
JM
664 if(parent_xc){
665 if(x>>1 > parent_xc->x){
666 parent_xc++;
ff765159 667 }
4536c8e6
JM
668 if(x>>1 == parent_xc->x){
669 p= parent_xc->coeff;
78486403 670 }
4536c8e6
JM
671 }
672 if(/*ll|*/l|lt|t|rt|p){
673 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 674
4536c8e6
JM
675 v=get_rac(&s->c, &b->state[0][context]);
676 if(v){
677 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
678 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 679
4536c8e6
JM
680 xc->x=x;
681 (xc++)->coeff= v;
682 }
683 }else{
684 if(!run){
685 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
686 else run= INT_MAX;
687 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
688 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
689
690 xc->x=x;
691 (xc++)->coeff= v;
791e7b83 692 }else{
4536c8e6
JM
693 int max_run;
694 run--;
695 v=0;
696
697 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
698 else max_run= FFMIN(run, w-x-1);
699 if(parent_xc)
700 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
701 x+= max_run;
702 run-= max_run;
791e7b83 703 }
7b49c309 704 }
4536c8e6
JM
705 }
706 (xc++)->x= w+1; //end marker
707 prev_xc= prev2_xc;
708 prev2_xc= xc;
115329f1 709
4536c8e6
JM
710 if(parent_xc){
711 if(y&1){
712 while(parent_xc->x != parent->width+1)
cbb1d2b1 713 parent_xc++;
4536c8e6
JM
714 parent_xc++;
715 prev_parent_xc= parent_xc;
716 }else{
717 parent_xc= prev_parent_xc;
791e7b83
MN
718 }
719 }
4536c8e6 720 }
a0d1931c 721
4536c8e6 722 (xc++)->x= w+1; //end marker
a0d1931c
Y
723}
724
725static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
726 const int w= b->width;
62ab0b78 727 int y;
f66e4f5f 728 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
c97de57c 729 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
730 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
731 int new_index = 0;
115329f1 732
d593e329 733 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
a0d1931c
Y
734 qadd= 0;
735 qmul= 1<<QEXPSHIFT;
736 }
737
738 /* If we are on the second or later slice, restore our index. */
739 if (start_y != 0)
740 new_index = save_state[0];
741
115329f1 742
a0d1931c
Y
743 for(y=start_y; y<h; y++){
744 int x = 0;
745 int v;
d593e329
MN
746 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
747 memset(line, 0, b->width*sizeof(IDWTELEM));
a0d1931c
Y
748 v = b->x_coeff[new_index].coeff;
749 x = b->x_coeff[new_index++].x;
ef3dfbd4 750 while(x < w){
538a3841
MN
751 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
752 register int u= -(v&1);
753 line[x] = (t^u) - u;
754
a0d1931c
Y
755 v = b->x_coeff[new_index].coeff;
756 x = b->x_coeff[new_index++].x;
757 }
791e7b83 758 }
115329f1 759
a0d1931c
Y
760 /* Save our variables for the next slice. */
761 save_state[0] = new_index;
115329f1 762
a0d1931c 763 return;
791e7b83
MN
764}
765
396a5e68 766static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
791e7b83
MN
767 int plane_index, level, orientation;
768
19aa028d 769 for(plane_index=0; plane_index<3; plane_index++){
4f90f33a 770 for(level=0; level<MAX_DECOMPOSITIONS; level++){
791e7b83 771 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 772 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
773 }
774 }
775 }
28869757
MN
776 memset(s->header_state, MID_STATE, sizeof(s->header_state));
777 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
778}
779
780static int alloc_blocks(SnowContext *s){
781 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
782 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 783
155ec6ed
MN
784 s->b_width = w;
785 s->b_height= h;
115329f1 786
dc7f45a0 787 av_free(s->block);
155ec6ed
MN
788 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
789 return 0;
790}
791
28869757
MN
792static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
793 uint8_t *bytestream= d->bytestream;
794 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 795 *d= *s;
28869757
MN
796 d->bytestream= bytestream;
797 d->bytestream_start= bytestream_start;
155ec6ed
MN
798}
799
8c36eaaa 800static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
155ec6ed
MN
801 const int w= s->b_width << s->block_max_depth;
802 const int rem_depth= s->block_max_depth - level;
803 const int index= (x + y*w) << rem_depth;
804 const int block_w= 1<<rem_depth;
805 BlockNode block;
806 int i,j;
115329f1 807
155ec6ed
MN
808 block.color[0]= l;
809 block.color[1]= cb;
810 block.color[2]= cr;
811 block.mx= mx;
812 block.my= my;
8c36eaaa 813 block.ref= ref;
155ec6ed
MN
814 block.type= type;
815 block.level= level;
816
817 for(j=0; j<block_w; j++){
818 for(i=0; i<block_w; i++){
819 s->block[index + i + j*w]= block;
820 }
821 }
822}
823
824static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
825 const int offset[3]= {
826 y*c-> stride + x,
827 ((y*c->uvstride + x)>>1),
828 ((y*c->uvstride + x)>>1),
829 };
830 int i;
831 for(i=0; i<3; i++){
832 c->src[0][i]= src [i];
833 c->ref[0][i]= ref [i] + offset[i];
834 }
835 assert(!ref_index);
836}
837
85fc0e75 838static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
aadcc5ce 839 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
85fc0e75
LM
840 if(s->ref_frames == 1){
841 *mx = mid_pred(left->mx, top->mx, tr->mx);
842 *my = mid_pred(left->my, top->my, tr->my);
843 }else{
844 const int *scale = scale_mv_ref[ref];
6884c36c
PI
845 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
846 (top ->mx * scale[top ->ref] + 128) >>8,
847 (tr ->mx * scale[tr ->ref] + 128) >>8);
848 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
849 (top ->my * scale[top ->ref] + 128) >>8,
850 (tr ->my * scale[tr ->ref] + 128) >>8);
85fc0e75
LM
851 }
852}
853
d773d855
DB
854static av_always_inline int same_block(BlockNode *a, BlockNode *b){
855 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
856 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
857 }else{
858 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
859 }
860}
155ec6ed 861
d773d855
DB
862static void decode_q_branch(SnowContext *s, int level, int x, int y){
863 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
864 const int rem_depth= s->block_max_depth - level;
865 const int index= (x + y*w) << rem_depth;
155ec6ed 866 int trx= (x+1)<<rem_depth;
aadcc5ce
PI
867 const BlockNode *left = x ? &s->block[index-1] : &null_block;
868 const BlockNode *top = y ? &s->block[index-w] : &null_block;
aadcc5ce
PI
869 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
870 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed
MN
871 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
872
155ec6ed 873 if(s->keyframe){
d773d855
DB
874 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
875 return;
155ec6ed
MN
876 }
877
28869757 878 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1e90b34f 879 int type, mx, my;
155ec6ed
MN
880 int l = left->color[0];
881 int cb= left->color[1];
882 int cr= left->color[2];
8c36eaaa
LM
883 int ref = 0;
884 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
885 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
886 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
115329f1 887
28869757 888 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
889
890 if(type){
85fc0e75 891 pred_mv(s, &mx, &my, 0, left, top, tr);
155ec6ed
MN
892 l += get_symbol(&s->c, &s->block_state[32], 1);
893 cb+= get_symbol(&s->c, &s->block_state[64], 1);
894 cr+= get_symbol(&s->c, &s->block_state[96], 1);
895 }else{
8c36eaaa
LM
896 if(s->ref_frames > 1)
897 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
85fc0e75 898 pred_mv(s, &mx, &my, ref, left, top, tr);
8c36eaaa
LM
899 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
900 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
155ec6ed 901 }
8c36eaaa 902 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
155ec6ed
MN
903 }else{
904 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
905 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
906 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
907 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
908 }
909}
910
d773d855 911static void decode_blocks(SnowContext *s){
155ec6ed
MN
912 int x, y;
913 int w= s->b_width;
914 int h= s->b_height;
915
916 for(y=0; y<h; y++){
917 for(x=0; x<w; x++){
918 decode_q_branch(s, 0, x, y);
919 }
920 }
791e7b83
MN
921}
922
5262f7ed 923static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
7942269b 924 static const uint8_t weight[64]={
a68ca08e
MN
925 8,7,6,5,4,3,2,1,
926 7,7,0,0,0,0,0,1,
927 6,0,6,0,0,0,2,0,
928 5,0,0,5,0,3,0,0,
929 4,0,0,0,4,0,0,0,
930 3,0,0,5,0,3,0,0,
931 2,0,6,0,0,0,2,0,
932 1,7,0,0,0,0,0,1,
933 };
934
7942269b 935 static const uint8_t brane[256]={
a68ca08e
MN
936 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
937 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
938 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
939 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
940 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
941 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
942 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
943 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
944 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
945 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
946 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
947 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
948 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
949 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
950 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
951 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
952 };
953
7942269b 954 static const uint8_t needs[16]={
a68ca08e
MN
955 0,1,0,0,
956 2,4,2,0,
957 0,1,0,0,
958 15
959 };
960
961 int x, y, b, r, l;
61d6e445
MN
962 int16_t tmpIt [64*(32+HTAPS_MAX)];
963 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
a68ca08e
MN
964 int16_t *tmpI= tmpIt;
965 uint8_t *tmp2= tmp2t[0];
f0a70840 966 const uint8_t *hpel[11];
a68ca08e
MN
967 assert(dx<16 && dy<16);
968 r= brane[dx + 16*dy]&15;
969 l= brane[dx + 16*dy]>>4;
970
971 b= needs[l] | needs[r];
7d7f57d9
MN
972 if(p && !p->diag_mc)
973 b= 15;
a68ca08e
MN
974
975 if(b&5){
61d6e445 976 for(y=0; y < b_h+HTAPS_MAX-1; y++){
65dc0f53 977 for(x=0; x < b_w; x++){
61d6e445
MN
978 int a_1=src[x + HTAPS_MAX/2-4];
979 int a0= src[x + HTAPS_MAX/2-3];
980 int a1= src[x + HTAPS_MAX/2-2];
981 int a2= src[x + HTAPS_MAX/2-1];
982 int a3= src[x + HTAPS_MAX/2+0];
983 int a4= src[x + HTAPS_MAX/2+1];
984 int a5= src[x + HTAPS_MAX/2+2];
985 int a6= src[x + HTAPS_MAX/2+3];
7d7f57d9
MN
986 int am=0;
987 if(!p || p->fast_mc){
988 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
989 tmpI[x]= am;
990 am= (am+16)>>5;
991 }else{
992 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
993 tmpI[x]= am;
994 am= (am+32)>>6;
995 }
791e7b83 996
65dc0f53
MN
997 if(am&(~255)) am= ~(am>>31);
998 tmp2[x]= am;
999 }
1000 tmpI+= 64;
1001 tmp2+= stride;
1002 src += stride;
791e7b83 1003 }
65dc0f53 1004 src -= stride*y;
a68ca08e 1005 }
61d6e445 1006 src += HTAPS_MAX/2 - 1;
a68ca08e 1007 tmp2= tmp2t[1];
115329f1 1008
a68ca08e 1009 if(b&2){
65dc0f53
MN
1010 for(y=0; y < b_h; y++){
1011 for(x=0; x < b_w+1; x++){
61d6e445
MN
1012 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1013 int a0= src[x + (HTAPS_MAX/2-3)*stride];
1014 int a1= src[x + (HTAPS_MAX/2-2)*stride];
1015 int a2= src[x + (HTAPS_MAX/2-1)*stride];
1016 int a3= src[x + (HTAPS_MAX/2+0)*stride];
1017 int a4= src[x + (HTAPS_MAX/2+1)*stride];
1018 int a5= src[x + (HTAPS_MAX/2+2)*stride];
1019 int a6= src[x + (HTAPS_MAX/2+3)*stride];
7d7f57d9
MN
1020 int am=0;
1021 if(!p || p->fast_mc)
1022 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1023 else
1024 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
791e7b83 1025
65dc0f53
MN
1026 if(am&(~255)) am= ~(am>>31);
1027 tmp2[x]= am;
1028 }
1029 src += stride;
1030 tmp2+= stride;
a68ca08e 1031 }
65dc0f53 1032 src -= stride*y;
a68ca08e 1033 }
61d6e445 1034 src += stride*(HTAPS_MAX/2 - 1);
a68ca08e
MN
1035 tmp2= tmp2t[2];
1036 tmpI= tmpIt;
1037 if(b&4){
1038 for(y=0; y < b_h; y++){
1039 for(x=0; x < b_w; x++){
61d6e445
MN
1040 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1041 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1042 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1043 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1044 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1045 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1046 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1047 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
7d7f57d9
MN
1048 int am=0;
1049 if(!p || p->fast_mc)
1050 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1051 else
1052 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
a68ca08e
MN
1053 if(am&(~255)) am= ~(am>>31);
1054 tmp2[x]= am;
1055 }
1056 tmpI+= 64;
1057 tmp2+= stride;
1058 }
1059 }
115329f1 1060
a68ca08e 1061 hpel[ 0]= src;
61d6e445 1062 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
a68ca08e
MN
1063 hpel[ 2]= src + 1;
1064
1065 hpel[ 4]= tmp2t[1];
1066 hpel[ 5]= tmp2t[2];
1067 hpel[ 6]= tmp2t[1] + 1;
1068
1069 hpel[ 8]= src + stride;
1070 hpel[ 9]= hpel[1] + stride;
1071 hpel[10]= hpel[8] + 1;
1072
1073 if(b==15){
f0a70840
BC
1074 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
1075 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1076 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1077 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
a68ca08e
MN
1078 dx&=7;
1079 dy&=7;
1080 for(y=0; y < b_h; y++){
1081 for(x=0; x < b_w; x++){
1082 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1083 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
1084 }
1085 src1+=stride;
1086 src2+=stride;
1087 src3+=stride;
1088 src4+=stride;
1089 dst +=stride;
1090 }
1091 }else{
f0a70840
BC
1092 const uint8_t *src1= hpel[l];
1093 const uint8_t *src2= hpel[r];
a68ca08e
MN
1094 int a= weight[((dx&7) + (8*(dy&7)))];
1095 int b= 8-a;
1096 for(y=0; y < b_h; y++){
1097 for(x=0; x < b_w; x++){
1098 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1099 }
1100 src1+=stride;
1101 src2+=stride;
1102 dst +=stride;
791e7b83
MN
1103 }
1104 }
1105}
1106
791e7b83 1107#define mca(dx,dy,b_w)\
bad700e3 1108static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
791e7b83 1109 assert(h==b_w);\
5262f7ed 1110 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
791e7b83
MN
1111}
1112
1113mca( 0, 0,16)
1114mca( 8, 0,16)
1115mca( 0, 8,16)
1116mca( 8, 8,16)
d92b5807
MN
1117mca( 0, 0,8)
1118mca( 8, 0,8)
1119mca( 0, 8,8)
1120mca( 8, 8,8)
791e7b83 1121
8c36eaaa 1122static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 1123 if(block->type & BLOCK_INTRA){
ff158dc9 1124 int x, y;
2692ceab
MN
1125 const int color = block->color[plane_index];
1126 const int color4= color*0x01010101;
1015631b
LM
1127 if(b_w==32){
1128 for(y=0; y < b_h; y++){
1129 *(uint32_t*)&dst[0 + y*stride]= color4;
1130 *(uint32_t*)&dst[4 + y*stride]= color4;
1131 *(uint32_t*)&dst[8 + y*stride]= color4;
1132 *(uint32_t*)&dst[12+ y*stride]= color4;
1133 *(uint32_t*)&dst[16+ y*stride]= color4;
1134 *(uint32_t*)&dst[20+ y*stride]= color4;
1135 *(uint32_t*)&dst[24+ y*stride]= color4;
1136 *(uint32_t*)&dst[28+ y*stride]= color4;
1137 }
1138 }else if(b_w==16){
2692ceab
MN
1139 for(y=0; y < b_h; y++){
1140 *(uint32_t*)&dst[0 + y*stride]= color4;
1141 *(uint32_t*)&dst[4 + y*stride]= color4;
1142 *(uint32_t*)&dst[8 + y*stride]= color4;
1143 *(uint32_t*)&dst[12+ y*stride]= color4;
1144 }
1145 }else if(b_w==8){
1146 for(y=0; y < b_h; y++){
1147 *(uint32_t*)&dst[0 + y*stride]= color4;
1148 *(uint32_t*)&dst[4 + y*stride]= color4;
1149 }
1150 }else if(b_w==4){
1151 for(y=0; y < b_h; y++){
1152 *(uint32_t*)&dst[0 + y*stride]= color4;
1153 }
1154 }else{
1155 for(y=0; y < b_h; y++){
1156 for(x=0; x < b_w; x++){
1157 dst[x + y*stride]= color;
1158 }
ff158dc9
MN
1159 }
1160 }
1161 }else{
8c36eaaa 1162 uint8_t *src= s->last_picture[block->ref].data[plane_index];
ff158dc9
MN
1163 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
1164 int mx= block->mx*scale;
1165 int my= block->my*scale;
ec697587
MN
1166 const int dx= mx&15;
1167 const int dy= my&15;
80e44bc3 1168 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
61d6e445
MN
1169 sx += (mx>>4) - (HTAPS_MAX/2-1);
1170 sy += (my>>4) - (HTAPS_MAX/2-1);
ff158dc9 1171 src += sx + sy*stride;
61d6e445
MN
1172 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1173 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2e279598 1174 s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
ff158dc9
MN
1175 src= tmp + MB_SIZE;
1176 }
87f20c2f
MN
1177// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1178// assert(!(b_w&(b_w-1)));
2692ceab 1179 assert(b_w>1 && b_h>1);
89438028 1180 assert((tab_index>=0 && tab_index<4) || b_w==32);
7d7f57d9 1181 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
5262f7ed 1182 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
1015631b
LM
1183 else if(b_w==32){
1184 int y;
1185 for(y=0; y<b_h; y+=16){
7d7f57d9
MN
1186 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1187 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1015631b
LM
1188 }
1189 }else if(b_w==b_h)
7d7f57d9 1190 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2692ceab 1191 else if(b_w==2*b_h){
7d7f57d9
MN
1192 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
1193 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2692ceab
MN
1194 }else{
1195 assert(2*b_w==b_h);
7d7f57d9
MN
1196 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
1197 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2692ceab 1198 }
ff158dc9
MN
1199 }
1200}
1201
9dd6c804 1202void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
059715a4
RE
1203 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1204 int y, x;
d593e329 1205 IDWTELEM * dst;
059715a4 1206 for(y=0; y<b_h; y++){
19032450 1207 //FIXME ugly misuse of obmc_stride
9dd6c804
PI
1208 const uint8_t *obmc1= obmc + y*obmc_stride;
1209 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1210 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1211 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
059715a4
RE
1212 dst = slice_buffer_get_line(sb, src_y + y);
1213 for(x=0; x<b_w; x++){
1214 int v= obmc1[x] * block[3][x + y*src_stride]
1215 +obmc2[x] * block[2][x + y*src_stride]
1216 +obmc3[x] * block[1][x + y*src_stride]
1217 +obmc4[x] * block[0][x + y*src_stride];
1218
1219 v <<= 8 - LOG2_OBMC_MAX;
1220 if(FRAC_BITS != 8){
059715a4
RE
1221 v >>= 8 - FRAC_BITS;
1222 }
1223 if(add){
1224 v += dst[x + src_x];
1225 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1226 if(v&(~255)) v= ~(v>>31);
1227 dst8[x + y*src_stride] = v;
1228 }else{
1229 dst[x + src_x] -= v;
1230 }
1231 }
1232 }
1233}
1234
e6464f8b 1235//FIXME name cleanup (b_w, block_w, b_width stuff)
d593e329 1236static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
a0d1931c
Y
1237 const int b_width = s->b_width << s->block_max_depth;
1238 const int b_height= s->b_height << s->block_max_depth;
1239 const int b_stride= b_width;
1240 BlockNode *lt= &s->block[b_x + b_y*b_stride];
1241 BlockNode *rt= lt+1;
1242 BlockNode *lb= lt+b_stride;
1243 BlockNode *rb= lb+1;
115329f1 1244 uint8_t *block[4];
cc884a35 1245 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
bd2b6b33 1246 uint8_t *tmp = s->scratchbuf;
cc884a35 1247 uint8_t *ptmp;
a0d1931c
Y
1248 int x,y;
1249
1250 if(b_x<0){
1251 lt= rt;
1252 lb= rb;
1253 }else if(b_x + 1 >= b_width){
1254 rt= lt;
1255 rb= lb;
1256 }
1257 if(b_y<0){
1258 lt= lb;
1259 rt= rb;
1260 }else if(b_y + 1 >= b_height){
1261 lb= lt;
1262 rb= rt;
1263 }
115329f1 1264
e6464f8b 1265 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
a0d1931c
Y
1266 obmc -= src_x;
1267 b_w += src_x;
f7e89c73 1268 if(!sliced && !offset_dst)
1015631b 1269 dst -= src_x;
ff158dc9
MN
1270 src_x=0;
1271 }else if(src_x + b_w > w){
1272 b_w = w - src_x;
1273 }
1274 if(src_y<0){
1275 obmc -= src_y*obmc_stride;
1276 b_h += src_y;
f7e89c73 1277 if(!sliced && !offset_dst)
1015631b 1278 dst -= src_y*dst_stride;
ff158dc9
MN
1279 src_y=0;
1280 }else if(src_y + b_h> h){
1281 b_h = h - src_y;
791e7b83 1282 }
115329f1 1283
ff158dc9 1284 if(b_w<=0 || b_h<=0) return;
155ec6ed 1285
94ae6788
DB
1286 assert(src_stride > 2*MB_SIZE + 5);
1287
f7e89c73 1288 if(!sliced && offset_dst)
1015631b 1289 dst += src_x + src_y*dst_stride;
715a97f0 1290 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
1291// src += src_x + src_y*src_stride;
1292
cc884a35
MN
1293 ptmp= tmp + 3*tmp_step;
1294 block[0]= ptmp;
1295 ptmp+=tmp_step;
8c36eaaa 1296 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
1297
1298 if(same_block(lt, rt)){
1299 block[1]= block[0];
791e7b83 1300 }else{
cc884a35
MN
1301 block[1]= ptmp;
1302 ptmp+=tmp_step;
8c36eaaa 1303 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
ff158dc9 1304 }
115329f1 1305
ff158dc9
MN
1306 if(same_block(lt, lb)){
1307 block[2]= block[0];
1308 }else if(same_block(rt, lb)){
1309 block[2]= block[1];
1310 }else{
cc884a35
MN
1311 block[2]= ptmp;
1312 ptmp+=tmp_step;
8c36eaaa 1313 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
ff158dc9 1314 }
791e7b83 1315
ff158dc9
MN
1316 if(same_block(lt, rb) ){
1317 block[3]= block[0];
1318 }else if(same_block(rt, rb)){
1319 block[3]= block[1];
1320 }else if(same_block(lb, rb)){
1321 block[3]= block[2];
1322 }else{
cc884a35 1323 block[3]= ptmp;
8c36eaaa 1324 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
ff158dc9
MN
1325 }
1326#if 0
1327 for(y=0; y<b_h; y++){
1328 for(x=0; x<b_w; x++){
1329 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
1330 if(add) dst[x + y*dst_stride] += v;
1331 else dst[x + y*dst_stride] -= v;
1332 }
1333 }
1334 for(y=0; y<b_h; y++){
1335 uint8_t *obmc2= obmc + (obmc_stride>>1);
1336 for(x=0; x<b_w; x++){
1337 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
1338 if(add) dst[x + y*dst_stride] += v;
1339 else dst[x + y*dst_stride] -= v;
1340 }
1341 }
1342 for(y=0; y<b_h; y++){
1343 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1344 for(x=0; x<b_w; x++){
1345 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
1346 if(add) dst[x + y*dst_stride] += v;
1347 else dst[x + y*dst_stride] -= v;
1348 }
1349 }
1350 for(y=0; y<b_h; y++){
1351 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1352 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1353 for(x=0; x<b_w; x++){
1354 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
1355 if(add) dst[x + y*dst_stride] += v;
1356 else dst[x + y*dst_stride] -= v;
1357 }
1358 }
1359#else
f7e89c73 1360 if(sliced){
05aec7bb 1361 s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
e73e4e75
DB
1362 }else{
1363 for(y=0; y<b_h; y++){
1364 //FIXME ugly misuse of obmc_stride
1365 const uint8_t *obmc1= obmc + y*obmc_stride;
1366 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1367 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1368 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1369 for(x=0; x<b_w; x++){
1370 int v= obmc1[x] * block[3][x + y*src_stride]
1371 +obmc2[x] * block[2][x + y*src_stride]
1372 +obmc3[x] * block[1][x + y*src_stride]
1373 +obmc4[x] * block[0][x + y*src_stride];
1374
1375 v <<= 8 - LOG2_OBMC_MAX;
1376 if(FRAC_BITS != 8){
1377 v >>= 8 - FRAC_BITS;
1378 }
1379 if(add){
1380 v += dst[x + y*dst_stride];
1381 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1382 if(v&(~255)) v= ~(v>>31);
1383 dst8[x + y*src_stride] = v;
1384 }else{
1385 dst[x + y*dst_stride] -= v;
1386 }
715a97f0 1387 }
791e7b83
MN
1388 }
1389 }
96e2fbf2 1390#endif /* 0 */
791e7b83
MN
1391}
1392
d593e329 1393static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
a0d1931c
Y
1394 Plane *p= &s->plane[plane_index];
1395 const int mb_w= s->b_width << s->block_max_depth;
1396 const int mb_h= s->b_height << s->block_max_depth;
1397 int x, y, mb_x;
1398 int block_size = MB_SIZE >> s->block_max_depth;
1399 int block_w = plane_index ? block_size/2 : block_size;
1400 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1401 int obmc_stride= plane_index ? block_size : 2*block_size;
1402 int ref_stride= s->current_picture.linesize[plane_index];
a0d1931c
Y
1403 uint8_t *dst8= s->current_picture.data[plane_index];
1404 int w= p->width;
1405 int h= p->height;
115329f1 1406
a0d1931c
Y
1407 if(s->keyframe || (s->avctx->debug&512)){
1408 if(mb_y==mb_h)
1409 return;
1410
1411 if(add){
ef3dfbd4 1412 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
a0d1931c 1413// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 1414 IDWTELEM * line = sb->line[y];
ef3dfbd4 1415 for(x=0; x<w; x++){
a0d1931c
Y
1416// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1417 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1418 v >>= FRAC_BITS;
1419 if(v&(~255)) v= ~(v>>31);
1420 dst8[x + y*ref_stride]= v;
1421 }
1422 }
1423 }else{
ef3dfbd4 1424 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
a0d1931c 1425// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 1426 IDWTELEM * line = sb->line[y];
ef3dfbd4 1427 for(x=0; x<w; x++){
a0d1931c
Y
1428 line[x] -= 128 << FRAC_BITS;
1429// buf[x + y*w]-= 128<<FRAC_BITS;
1430 }
1431 }
1432 }
1433
1434 return;
1435 }
115329f1 1436
e73e4e75
DB
1437 for(mb_x=0; mb_x<=mb_w; mb_x++){
1438 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1439 block_w*mb_x - block_w/2,
1440 block_w*mb_y - block_w/2,
1441 block_w, block_w,
1442 w, h,
1443 w, ref_stride, obmc_stride,
1444 mb_x - 1, mb_y - 1,
1445 add, 0, plane_index);
1446 }
a0d1931c
Y
1447}
1448
d593e329 1449static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 1450 Plane *p= &s->plane[plane_index];
155ec6ed
MN
1451 const int mb_w= s->b_width << s->block_max_depth;
1452 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 1453 int x, y, mb_x;
155ec6ed
MN
1454 int block_size = MB_SIZE >> s->block_max_depth;
1455 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 1456 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 1457 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 1458 int ref_stride= s->current_picture.linesize[plane_index];
715a97f0 1459 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
1460 int w= p->width;
1461 int h= p->height;
115329f1 1462
ff158dc9 1463 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
1464 if(mb_y==mb_h)
1465 return;
1466
715a97f0 1467 if(add){
86e59cc0 1468 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
1469 for(x=0; x<w; x++){
1470 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1471 v >>= FRAC_BITS;
1472 if(v&(~255)) v= ~(v>>31);
1473 dst8[x + y*ref_stride]= v;
1474 }
1475 }
1476 }else{
86e59cc0 1477 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
1478 for(x=0; x<w; x++){
1479 buf[x + y*w]-= 128<<FRAC_BITS;
1480 }
ff158dc9 1481 }
791e7b83 1482 }
ff158dc9
MN
1483
1484 return;
791e7b83 1485 }
115329f1 1486
94ae6788
DB
1487 for(mb_x=0; mb_x<=mb_w; mb_x++){
1488 add_yblock(s, 0, NULL, buf, dst8, obmc,
1489 block_w*mb_x - block_w/2,
1490 block_w*mb_y - block_w/2,
1491 block_w, block_w,
1492 w, h,
1493 w, ref_stride, obmc_stride,
1494 mb_x - 1, mb_y - 1,
1495 add, 1, plane_index);
1496 }
f9e6ebf7
LM
1497}
1498
d593e329 1499static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
f9e6ebf7
LM
1500 const int mb_h= s->b_height << s->block_max_depth;
1501 int mb_y;
1502 for(mb_y=0; mb_y<=mb_h; mb_y++)
1503 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
1504}
1505
d773d855
DB
1506static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1507 const int w= b->width;
1508 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1509 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1510 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1511 int x,y;
51d6a3cf 1512
d773d855 1513 if(s->qlog == LOSSLESS_QLOG) return;
51d6a3cf 1514
d773d855
DB
1515 for(y=start_y; y<end_y; y++){
1516// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1517 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1518 for(x=0; x<w; x++){
1519 int i= line[x];
1520 if(i<0){
1521 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1522 }else if(i>0){
1523 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
1524 }
1525 }
1526 }
1527}
51d6a3cf 1528
d773d855
DB
1529static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1530 const int w= b->width;
1531 int x,y;
51d6a3cf 1532
d773d855
DB
1533 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1534 IDWTELEM * prev;
51d6a3cf 1535
d773d855
DB
1536 if (start_y != 0)
1537 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1538
1539 for(y=start_y; y<end_y; y++){
1540 prev = line;
1541// line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1542 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1543 for(x=0; x<w; x++){
1544 if(x){
1545 if(use_median){
1546 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1547 else line[x] += line[x - 1];
1548 }else{
1549 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1550 else line[x] += line[x - 1];
1551 }
1552 }else{
1553 if(y) line[x] += prev[x];
51d6a3cf
MN
1554 }
1555 }
1556 }
51d6a3cf
MN
1557}
1558
d773d855
DB
1559static void decode_qlogs(SnowContext *s){
1560 int plane_index, level, orientation;
b104969f 1561
d773d855
DB
1562 for(plane_index=0; plane_index<3; plane_index++){
1563 for(level=0; level<s->spatial_decomposition_count; level++){
1564 for(orientation=level ? 1:0; orientation<4; orientation++){
1565 int q;
1566 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1567 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1568 else q= get_symbol(&s->c, s->header_state, 1);
1569 s->plane[plane_index].band[level][orientation].qlog= q;
1570 }
1571 }
85fc0e75 1572 }
b104969f
LM
1573}
1574
d773d855
DB
1575#define GET_S(dst, check) \
1576 tmp= get_symbol(&s->c, s->header_state, 0);\
1577 if(!(check)){\
1578 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1579 return -1;\
1580 }\
1581 dst= tmp;
1015631b 1582
d773d855
DB
1583static int decode_header(SnowContext *s){
1584 int plane_index, tmp;
1585 uint8_t kstate[32];
1015631b 1586
d773d855
DB
1587 memset(kstate, MID_STATE, sizeof(kstate));
1588
1589 s->keyframe= get_rac(&s->c, kstate);
1590 if(s->keyframe || s->always_reset){
1591 reset_contexts(s);
1592 s->spatial_decomposition_type=
1593 s->qlog=
1594 s->qbias=
1595 s->mv_scale=
1596 s->block_max_depth= 0;
1015631b 1597 }
d773d855
DB
1598 if(s->keyframe){
1599 GET_S(s->version, tmp <= 0U)
1600 s->always_reset= get_rac(&s->c, s->header_state);
1601 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1602 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1603 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1604 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1605 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1606 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1607 s->spatial_scalability= get_rac(&s->c, s->header_state);
1608// s->rate_scalability= get_rac(&s->c, s->header_state);
1609 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1610 s->max_ref_frames++;
51d6a3cf 1611
d773d855 1612 decode_qlogs(s);
561a18d3
RE
1613 }
1614
d773d855
DB
1615 if(!s->keyframe){
1616 if(get_rac(&s->c, s->header_state)){
1617 for(plane_index=0; plane_index<2; plane_index++){
1618 int htaps, i, sum=0;
1619 Plane *p= &s->plane[plane_index];
1620 p->diag_mc= get_rac(&s->c, s->header_state);
1621 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1622 if((unsigned)htaps > HTAPS_MAX || htaps==0)
1623 return -1;
1624 p->htaps= htaps;
1625 for(i= htaps/2; i; i--){
1626 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1627 sum += p->hcoeff[i];
1628 }
1629 p->hcoeff[0]= 32-sum;
1630 }
1631 s->plane[2].diag_mc= s->plane[1].diag_mc;
1632 s->plane[2].htaps = s->plane[1].htaps;
1633 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1015631b 1634 }
d773d855
DB
1635 if(get_rac(&s->c, s->header_state)){
1636 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1637 decode_qlogs(s);
b104969f
LM
1638 }
1639 }
b104969f 1640
d773d855
DB
1641 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1642 if(s->spatial_decomposition_type > 1U){
1643 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1644 return -1;
1645 }
1646 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1647 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1648 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1649 return -1;
1650 }
b104969f 1651
d773d855
DB
1652 s->qlog += get_symbol(&s->c, s->header_state, 1);
1653 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
1654 s->qbias += get_symbol(&s->c, s->header_state, 1);
1655 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1656 if(s->block_max_depth > 1 || s->block_max_depth < 0){
1657 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1658 s->block_max_depth= 0;
1659 return -1;
51d6a3cf
MN
1660 }
1661
d773d855
DB
1662 return 0;
1663}
b104969f 1664
d773d855
DB
1665static void init_qexp(void){
1666 int i;
1667 double v=128;
1668
1669 for(i=0; i<QROOT; i++){
1670 qexp[i]= lrintf(v);
1671 v *= pow(2, 1.0 / QROOT);
b104969f 1672 }
51d6a3cf
MN
1673}
1674
d773d855
DB
1675static av_cold int common_init(AVCodecContext *avctx){
1676 SnowContext *s = avctx->priv_data;
1677 int width, height;
1678 int i, j;
51d6a3cf 1679
d773d855
DB
1680 s->avctx= avctx;
1681 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
51d6a3cf 1682
d773d855 1683 dsputil_init(&s->dsp, avctx);
05aec7bb 1684 ff_dwt_init(&s->dwt);
51d6a3cf 1685
d773d855
DB
1686#define mcf(dx,dy)\
1687 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
1688 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1689 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1690 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
1691 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1692 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
51d6a3cf 1693
d773d855
DB
1694 mcf( 0, 0)
1695 mcf( 4, 0)
1696 mcf( 8, 0)
1697 mcf(12, 0)
1698 mcf( 0, 4)
1699 mcf( 4, 4)
1700 mcf( 8, 4)
1701 mcf(12, 4)
1702 mcf( 0, 8)
1703 mcf( 4, 8)
1704 mcf( 8, 8)
1705 mcf(12, 8)
1706 mcf( 0,12)
1707 mcf( 4,12)
1708 mcf( 8,12)
1709 mcf(12,12)
51d6a3cf 1710
d773d855
DB
1711#define mcfh(dx,dy)\
1712 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
1713 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1714 mc_block_hpel ## dx ## dy ## 16;\
1715 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
1716 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1717 mc_block_hpel ## dx ## dy ## 8;
51d6a3cf 1718
d773d855
DB
1719 mcfh(0, 0)
1720 mcfh(8, 0)
1721 mcfh(0, 8)
1722 mcfh(8, 8)
52137f2f 1723
d773d855
DB
1724 if(!qexp[0])
1725 init_qexp();
b104969f 1726
d773d855 1727// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
b104969f 1728
d773d855
DB
1729 width= s->avctx->width;
1730 height= s->avctx->height;
b104969f 1731
d773d855
DB
1732 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1733 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
b104969f 1734
d773d855
DB
1735 for(i=0; i<MAX_REF_FRAMES; i++)
1736 for(j=0; j<MAX_REF_FRAMES; j++)
1737 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
b104969f 1738
d773d855
DB
1739 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1740 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1741
1742 return 0;
b104969f
LM
1743}
1744
d773d855
DB
1745static int common_init_after_header(AVCodecContext *avctx){
1746 SnowContext *s = avctx->priv_data;
1747 int plane_index, level, orientation;
51d6a3cf 1748
d773d855
DB
1749 for(plane_index=0; plane_index<3; plane_index++){
1750 int w= s->avctx->width;
1751 int h= s->avctx->height;
1752
1753 if(plane_index){
1754 w>>= s->chroma_h_shift;
1755 h>>= s->chroma_v_shift;
1756 }
1757 s->plane[plane_index].width = w;
1758 s->plane[plane_index].height= h;
1759
1760 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1761 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1762 SubBand *b= &s->plane[plane_index].band[level][orientation];
1763
1764 b->buf= s->spatial_dwt_buffer;
1765 b->level= level;
1766 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1767 b->width = (w + !(orientation&1))>>1;
1768 b->height= (h + !(orientation>1))>>1;
1769
1770 b->stride_line = 1 << (s->spatial_decomposition_count - level);
1771 b->buf_x_offset = 0;
1772 b->buf_y_offset = 0;
1773
1774 if(orientation&1){
1775 b->buf += (w+1)>>1;
1776 b->buf_x_offset = (w+1)>>1;
1777 }
1778 if(orientation>1){
1779 b->buf += b->stride>>1;
1780 b->buf_y_offset = b->stride_line >> 1;
1781 }
1782 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1783
1784 if(level)
1785 b->parent= &s->plane[plane_index].band[level-1][orientation];
1786 //FIXME avoid this realloc
1787 av_freep(&b->x_coeff);
1788 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1789 }
1790 w= (w+1)>>1;
1791 h= (h+1)>>1;
1792 }
1793 }
1794
1795 return 0;
1796}
1797
1798#define QUANTIZE2 0
1799
1800#if QUANTIZE2==1
1801#define Q2_STEP 8
1802
1803static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1804 SubBand *b= &p->band[level][orientation];
1805 int x, y;
1806 int xo=0;
1807 int yo=0;
1808 int step= 1 << (s->spatial_decomposition_count - level);
1809
1810 if(orientation&1)
1811 xo= step>>1;
1812 if(orientation&2)
1813 yo= step>>1;
1814
1815 //FIXME bias for nonzero ?
1816 //FIXME optimize
1817 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1818 for(y=0; y<p->height; y++){
1819 for(x=0; x<p->width; x++){
1820 int sx= (x-xo + step/2) / step / Q2_STEP;
1821 int sy= (y-yo + step/2) / step / Q2_STEP;
1822 int v= r0[x + y*p->width] - r1[x + y*p->width];
1823 assert(sx>=0 && sy>=0 && sx < score_stride);
1824 v= ((v+8)>>4)<<4;
1825 score[sx + sy*score_stride] += v*v;
1826 assert(score[sx + sy*score_stride] >= 0);
1827 }
1828 }
1829}
1830
1831static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1832 int level, orientation;
1833
1834 for(level=0; level<s->spatial_decomposition_count; level++){
1835 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1836 SubBand *b= &p->band[level][orientation];
1837 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1838
1839 dequantize(s, b, dst, b->stride);
1840 }
1841 }
1842}
1843
1844static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1845 int level, orientation, ys, xs, x, y, pass;
1846 IDWTELEM best_dequant[height * stride];
1847 IDWTELEM idwt2_buffer[height * stride];
1848 const int score_stride= (width + 10)/Q2_STEP;
1849 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1850 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1851 int threshold= (s->m.lambda * s->m.lambda) >> 6;
1852
1853 //FIXME pass the copy cleanly ?
1854
1855// memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1856 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1857
1858 for(level=0; level<s->spatial_decomposition_count; level++){
1859 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1860 SubBand *b= &p->band[level][orientation];
1861 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1862 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
1863 assert(src == b->buf); // code does not depend on this but it is true currently
1864
1865 quantize(s, b, dst, src, b->stride, s->qbias);
1866 }
1867 }
1868 for(pass=0; pass<1; pass++){
1869 if(s->qbias == 0) //keyframe
1870 continue;
1871 for(level=0; level<s->spatial_decomposition_count; level++){
1872 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1873 SubBand *b= &p->band[level][orientation];
1874 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1875 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1876
1877 for(ys= 0; ys<Q2_STEP; ys++){
1878 for(xs= 0; xs<Q2_STEP; xs++){
1879 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1880 dequantize_all(s, p, idwt2_buffer, width, height);
1881 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1882 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1883 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1884 for(y=ys; y<b->height; y+= Q2_STEP){
1885 for(x=xs; x<b->width; x+= Q2_STEP){
1886 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1887 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1888 //FIXME try more than just --
1889 }
1890 }
1891 dequantize_all(s, p, idwt2_buffer, width, height);
1892 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1893 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1894 for(y=ys; y<b->height; y+= Q2_STEP){
1895 for(x=xs; x<b->width; x+= Q2_STEP){
1896 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1897 if(score[score_idx] <= best_score[score_idx] + threshold){
1898 best_score[score_idx]= score[score_idx];
1899 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1900 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1901 //FIXME copy instead
1902 }
1903 }
1904 }
1905 }
1906 }
1907 }
1908 }
1909 }
1910 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1911}
1912
1913#endif /* QUANTIZE2==1 */
1914
1915#define USE_HALFPEL_PLANE 0
1916
1917static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1918 int p,x,y;
1919
1920 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
1921
1922 for(p=0; p<3; p++){
1923 int is_chroma= !!p;
1924 int w= s->avctx->width >>is_chroma;
1925 int h= s->avctx->height >>is_chroma;
1926 int ls= frame->linesize[p];
1927 uint8_t *src= frame->data[p];
1928
1929 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1930 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1931 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1932
1933 halfpel[0][p]= src;
1934 for(y=0; y<h; y++){
1935 for(x=0; x<w; x++){
1936 int i= y*ls + x;
1937
1938 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
1939 }
1940 }
1941 for(y=0; y<h; y++){
1942 for(x=0; x<w; x++){
1943 int i= y*ls + x;
1944
1945 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1946 }
1947 }
1948 src= halfpel[1][p];
1949 for(y=0; y<h; y++){
1950 for(x=0; x<w; x++){
1951 int i= y*ls + x;
1952
1953 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1954 }
1955 }
1956
1957//FIXME border!
1958 }
1959}
1960
1961static void release_buffer(AVCodecContext *avctx){
1962 SnowContext *s = avctx->priv_data;
1963 int i;
1964
1965 if(s->last_picture[s->max_ref_frames-1].data[0]){
1966 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
1967 for(i=0; i<9; i++)
1968 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
1969 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
1970 }
1971}
1972
1973static int frame_start(SnowContext *s){
1974 AVFrame tmp;
1975 int w= s->avctx->width; //FIXME round up to x16 ?
1976 int h= s->avctx->height;
1977
1978 if(s->current_picture.data[0]){
1500be13
AS
1979 s->dsp.draw_edges(s->current_picture.data[0],
1980 s->current_picture.linesize[0], w , h ,
c9c49387 1981 EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM);
1500be13
AS
1982 s->dsp.draw_edges(s->current_picture.data[1],
1983 s->current_picture.linesize[1], w>>1, h>>1,
c9c49387 1984 EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1500be13
AS
1985 s->dsp.draw_edges(s->current_picture.data[2],
1986 s->current_picture.linesize[2], w>>1, h>>1,
c9c49387 1987 EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
d773d855
DB
1988 }
1989
1990 release_buffer(s->avctx);
1991
1992 tmp= s->last_picture[s->max_ref_frames-1];
1993 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
1994 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
1995 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
1996 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
1997 s->last_picture[0]= s->current_picture;
1998 s->current_picture= tmp;
1999
2000 if(s->keyframe){
2001 s->ref_frames= 0;
2002 }else{
2003 int i;
2004 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
2005 if(i && s->last_picture[i-1].key_frame)
2006 break;
2007 s->ref_frames= i;
2008 if(s->ref_frames==0){
2009 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2010 return -1;
2011 }
2012 }
2013
2014 s->current_picture.reference= 1;
2015 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2016 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2017 return -1;
2018 }
2019
2020 s->current_picture.key_frame= s->keyframe;
2021
2022 return 0;
2023}
2024
2025static av_cold void common_end(SnowContext *s){
2026 int plane_index, level, orientation, i;
2027
2028 av_freep(&s->spatial_dwt_buffer);
2029 av_freep(&s->spatial_idwt_buffer);
2030
2031 s->m.me.temp= NULL;
2032 av_freep(&s->m.me.scratchpad);
2033 av_freep(&s->m.me.map);
2034 av_freep(&s->m.me.score_map);
2035 av_freep(&s->m.obmc_scratchpad);
2036
2037 av_freep(&s->block);
2038 av_freep(&s->scratchbuf);
2039
2040 for(i=0; i<MAX_REF_FRAMES; i++){
2041 av_freep(&s->ref_mvs[i]);
2042 av_freep(&s->ref_scores[i]);
2043 if(s->last_picture[i].data[0])
2044 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2045 }
2046
2047 for(plane_index=0; plane_index<3; plane_index++){
2048 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2049 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2050 SubBand *b= &s->plane[plane_index].band[level][orientation];
2051
2052 av_freep(&b->x_coeff);
2053 }
2054 }
2055 }
e8c6411c
JM
2056 if (s->mconly_picture.data[0])
2057 s->avctx->release_buffer(s->avctx, &s->mconly_picture);
2058 if (s->current_picture.data[0])
2059 s->avctx->release_buffer(s->avctx, &s->current_picture);
d773d855
DB
2060}
2061
2062static av_cold int decode_init(AVCodecContext *avctx)
2063{
2064 avctx->pix_fmt= PIX_FMT_YUV420P;
2065
2066 common_init(avctx);
2067
2068 return 0;
2069}
2070
2071static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2072 const uint8_t *buf = avpkt->data;
2073 int buf_size = avpkt->size;
2074 SnowContext *s = avctx->priv_data;
2075 RangeCoder * const c= &s->c;
2076 int bytes_read;
2077 AVFrame *picture = data;
2078 int level, orientation, plane_index;
2079
2080 ff_init_range_decoder(c, buf, buf_size);
2081 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2082
975a1447 2083 s->current_picture.pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
d773d855
DB
2084 if(decode_header(s)<0)
2085 return -1;
2086 common_init_after_header(avctx);
2087
2088 // realloc slice buffer for the case that spatial_decomposition_count changed
33996217
MR
2089 ff_slice_buffer_destroy(&s->sb);
2090 ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
d773d855
DB
2091
2092 for(plane_index=0; plane_index<3; plane_index++){
2093 Plane *p= &s->plane[plane_index];
2094 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2095 && p->hcoeff[1]==-10
2096 && p->hcoeff[2]==2;
2097 }
2098
2099 alloc_blocks(s);
2100
2101 if(frame_start(s) < 0)
2102 return -1;
2103 //keyframe flag duplication mess FIXME
2104 if(avctx->debug&FF_DEBUG_PICT_INFO)
2105 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2106
2107 decode_blocks(s);
2108
2109 for(plane_index=0; plane_index<3; plane_index++){
2110 Plane *p= &s->plane[plane_index];
2111 int w= p->width;
2112 int h= p->height;
2113 int x, y;
2114 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2115
2116 if(s->avctx->debug&2048){
2117 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2118 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2119
2120 for(y=0; y<h; y++){
2121 for(x=0; x<w; x++){
2122 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2123 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2124 }
2125 }
2126 }
2127
2128 {
2129 for(level=0; level<s->spatial_decomposition_count; level++){
2130 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2131 SubBand *b= &p->band[level][orientation];
2132 unpack_coeffs(s, b, b->parent, orientation);
2133 }
2134 }
2135 }
2136
2137 {
2138 const int mb_h= s->b_height << s->block_max_depth;
2139 const int block_size = MB_SIZE >> s->block_max_depth;
2140 const int block_w = plane_index ? block_size/2 : block_size;
2141 int mb_y;
2142 DWTCompose cs[MAX_DECOMPOSITIONS];
2143 int yd=0, yq=0;
2144 int y;
2145 int end_y;
2146
2147 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2148 for(mb_y=0; mb_y<=mb_h; mb_y++){
2149
2150 int slice_starty = block_w*mb_y;
2151 int slice_h = block_w*(mb_y+1);
2152 if (!(s->keyframe || s->avctx->debug&512)){
2153 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2154 slice_h -= (block_w >> 1);
2155 }
2156
2157 for(level=0; level<s->spatial_decomposition_count; level++){
2158 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2159 SubBand *b= &p->band[level][orientation];
2160 int start_y;
2161 int end_y;
2162 int our_mb_start = mb_y;
2163 int our_mb_end = (mb_y + 1);
2164 const int extra= 3;
2165 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2166 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2167 if (!(s->keyframe || s->avctx->debug&512)){
2168 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2169 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2170 }
2171 start_y = FFMIN(b->height, start_y);
2172 end_y = FFMIN(b->height, end_y);
2173
2174 if (start_y != end_y){
2175 if (orientation == 0){
2176 SubBand * correlate_band = &p->band[0][0];
2177 int correlate_end_y = FFMIN(b->height, end_y + 1);
2178 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2179 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2180 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2181 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2182 }
2183 else
2184 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2185 }
2186 }
2187 }
2188
2189 for(; yd<slice_h; yd+=4){
05aec7bb 2190 ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
d773d855
DB
2191 }
2192
2193 if(s->qlog == LOSSLESS_QLOG){
2194 for(; yq<slice_h && yq<h; yq++){
2195 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2196 for(x=0; x<w; x++){
2197 line[x] <<= FRAC_BITS;
2198 }
2199 }
2200 }
2201
2202 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2203
2204 y = FFMIN(p->height, slice_starty);
2205 end_y = FFMIN(p->height, slice_h);
2206 while(y < end_y)
33996217 2207 ff_slice_buffer_release(&s->sb, y++);
d773d855
DB
2208 }
2209
33996217 2210 ff_slice_buffer_flush(&s->sb);
d773d855
DB
2211 }
2212
2213 }
2214
2215 emms_c();
2216
2217 release_buffer(avctx);
2218
2219 if(!(s->avctx->debug&2048))
2220 *picture= s->current_picture;
2221 else
2222 *picture= s->mconly_picture;
2223
2224 *data_size = sizeof(AVFrame);
2225
2226 bytes_read= c->bytestream - c->bytestream_start;
2227 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2228
2229 return bytes_read;
2230}
2231
2232static av_cold int decode_end(AVCodecContext *avctx)
2233{
2234 SnowContext *s = avctx->priv_data;
2235
33996217 2236 ff_slice_buffer_destroy(&s->sb);
d773d855
DB
2237
2238 common_end(s);
2239
2240 return 0;
2241}
2242
d36beb3f 2243AVCodec ff_snow_decoder = {
d773d855 2244 "snow",
72415b2a 2245 AVMEDIA_TYPE_VIDEO,
d773d855
DB
2246 CODEC_ID_SNOW,
2247 sizeof(SnowContext),
2248 decode_init,
2249 NULL,
2250 decode_end,
2251 decode_frame,
2252 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2253 NULL,
2254 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2255};
2256
2257#if CONFIG_SNOW_ENCODER
2258static av_cold int encode_init(AVCodecContext *avctx)
2259{
2260 SnowContext *s = avctx->priv_data;
2261 int plane_index;
2262
2263 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2264 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2265 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2266 return -1;
2267 }
2268
2269 if(avctx->prediction_method == DWT_97
2270 && (avctx->flags & CODEC_FLAG_QSCALE)
2271 && avctx->global_quality == 0){
2272 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2273 return -1;
2274 }
2275
2276 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2277
2278 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2279 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2280
2281 for(plane_index=0; plane_index<3; plane_index++){
2282 s->plane[plane_index].diag_mc= 1;
2283 s->plane[plane_index].htaps= 6;
2284 s->plane[plane_index].hcoeff[0]= 40;
2285 s->plane[plane_index].hcoeff[1]= -10;
2286 s->plane[plane_index].hcoeff[2]= 2;
2287 s->plane[plane_index].fast_mc= 1;
2288 }
2289
2290 common_init(avctx);
2291 alloc_blocks(s);
2292
2293 s->version=0;
2294
2295 s->m.avctx = avctx;
2296 s->m.flags = avctx->flags;
2297 s->m.bit_rate= avctx->bit_rate;
2298
2299 s->m.me.temp =
2300 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2301 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2302 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2303 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2304 h263_encode_init(&s->m); //mv_penalty
2305
2306 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2307
2308 if(avctx->flags&CODEC_FLAG_PASS1){
2309 if(!avctx->stats_out)
2310 avctx->stats_out = av_mallocz(256);
2311 }
2312 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2313 if(ff_rate_control_init(&s->m) < 0)
2314 return -1;
2315 }
2316 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2317
2318 avctx->coded_frame= &s->current_picture;
2319 switch(avctx->pix_fmt){
2320// case PIX_FMT_YUV444P:
2321// case PIX_FMT_YUV422P:
2322 case PIX_FMT_YUV420P:
2323 case PIX_FMT_GRAY8:
2324// case PIX_FMT_YUV411P:
2325// case PIX_FMT_YUV410P:
2326 s->colorspace_type= 0;
2327 break;
2328/* case PIX_FMT_RGB32:
2329 s->colorspace= 1;
2330 break;*/
2331 default:
2332 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2333 return -1;
2334 }
2335// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2336 s->chroma_h_shift= 1;
2337 s->chroma_v_shift= 1;
2338
2339 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
2340 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
2341
2342 s->avctx->get_buffer(s->avctx, &s->input_picture);
2343
2344 if(s->avctx->me_method == ME_ITER){
2345 int i;
2346 int size= s->b_width * s->b_height << 2*s->block_max_depth;
2347 for(i=0; i<s->max_ref_frames; i++){
2348 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
2349 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
2350 }
2351 }
2352
2353 return 0;
2354}
2355
2356//near copy & paste from dsputil, FIXME
2357static int pix_sum(uint8_t * pix, int line_size, int w)
2358{
2359 int s, i, j;
2360
2361 s = 0;
2362 for (i = 0; i < w; i++) {
2363 for (j = 0; j < w; j++) {
2364 s += pix[0];
2365 pix ++;
2366 }
2367 pix += line_size - w;
2368 }
2369 return s;
2370}
2371
2372//near copy & paste from dsputil, FIXME
2373static int pix_norm1(uint8_t * pix, int line_size, int w)
2374{
2375 int s, i, j;
2376 uint32_t *sq = ff_squareTbl + 256;
2377
2378 s = 0;
2379 for (i = 0; i < w; i++) {
2380 for (j = 0; j < w; j ++) {
2381 s += sq[pix[0]];
2382 pix ++;
2383 }
2384 pix += line_size - w;
2385 }
2386 return s;
2387}
2388
2389//FIXME copy&paste
2390#define P_LEFT P[1]
2391#define P_TOP P[2]
2392#define P_TOPRIGHT P[3]
2393#define P_MEDIAN P[4]
2394#define P_MV1 P[9]
2395#define FLAG_QPEL 1 //must be 1
2396
2397static int encode_q_branch(SnowContext *s, int level, int x, int y){
2398 uint8_t p_buffer[1024];
2399 uint8_t i_buffer[1024];
2400 uint8_t p_state[sizeof(s->block_state)];
2401 uint8_t i_state[sizeof(s->block_state)];
2402 RangeCoder pc, ic;
2403 uint8_t *pbbak= s->c.bytestream;
2404 uint8_t *pbbak_start= s->c.bytestream_start;
2405 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2406 const int w= s->b_width << s->block_max_depth;
2407 const int h= s->b_height << s->block_max_depth;
2408 const int rem_depth= s->block_max_depth - level;
2409 const int index= (x + y*w) << rem_depth;
2410 const int block_w= 1<<(LOG2_MB_SIZE - level);
2411 int trx= (x+1)<<rem_depth;
2412 int try= (y+1)<<rem_depth;
2413 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2414 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2415 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2416 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2417 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2418 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2419 int pl = left->color[0];
2420 int pcb= left->color[1];
2421 int pcr= left->color[2];
2422 int pmx, pmy;
2423 int mx=0, my=0;
2424 int l,cr,cb;
2425 const int stride= s->current_picture.linesize[0];
2426 const int uvstride= s->current_picture.linesize[1];
2427 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2428 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2429 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2430 int P[10][2];
2431 int16_t last_mv[3][2];
2432 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2433 const int shift= 1+qpel;
2434 MotionEstContext *c= &s->m.me;
2435 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2436 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2437 int my_context= av_log2(2*FFABS(left->my - top->my));
2438 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2439 int ref, best_ref, ref_score, ref_mx, ref_my;
2440
2441 assert(sizeof(s->block_state) >= 256);
2442 if(s->keyframe){
2443 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2444 return 0;
8f8ae495
LM
2445 }
2446
d773d855 2447// clip predictors / edge ?
51d6a3cf 2448
d773d855
DB
2449 P_LEFT[0]= left->mx;
2450 P_LEFT[1]= left->my;
2451 P_TOP [0]= top->mx;
2452 P_TOP [1]= top->my;
2453 P_TOPRIGHT[0]= tr->mx;
2454 P_TOPRIGHT[1]= tr->my;
51d6a3cf 2455
d773d855
DB
2456 last_mv[0][0]= s->block[index].mx;
2457 last_mv[0][1]= s->block[index].my;
2458 last_mv[1][0]= right->mx;
2459 last_mv[1][1]= right->my;
2460 last_mv[2][0]= bottom->mx;
2461 last_mv[2][1]= bottom->my;
51d6a3cf 2462
d773d855
DB
2463 s->m.mb_stride=2;
2464 s->m.mb_x=
2465 s->m.mb_y= 0;
2466 c->skip= 0;
51d6a3cf 2467
d773d855
DB
2468 assert(c-> stride == stride);
2469 assert(c->uvstride == uvstride);
51d6a3cf 2470
d773d855
DB
2471 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2472 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2473 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2474 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1015631b 2475
d773d855
DB
2476 c->xmin = - x*block_w - 16+3;
2477 c->ymin = - y*block_w - 16+3;
2478 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2479 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
1015631b 2480
d773d855
DB
2481 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2482 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2483 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2484 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2485 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2486 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2487 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1015631b 2488
d773d855
DB
2489 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2490 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1015631b 2491
d773d855
DB
2492 if (!y) {
2493 c->pred_x= P_LEFT[0];
2494 c->pred_y= P_LEFT[1];
2495 } else {
2496 c->pred_x = P_MEDIAN[0];
2497 c->pred_y = P_MEDIAN[1];
2498 }
48d1b9a1 2499
d773d855
DB
2500 score= INT_MAX;
2501 best_ref= 0;
2502 for(ref=0; ref<s->ref_frames; ref++){
2503 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
8c36eaaa 2504
d773d855
DB
2505 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2506 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
8c36eaaa 2507
d773d855
DB
2508 assert(ref_mx >= c->xmin);
2509 assert(ref_mx <= c->xmax);
2510 assert(ref_my >= c->ymin);
2511 assert(ref_my <= c->ymax);
8c36eaaa 2512
d773d855
DB
2513 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2514 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2515 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2516 if(s->ref_mvs[ref]){
2517 s->ref_mvs[ref][index][0]= ref_mx;
2518 s->ref_mvs[ref][index][1]= ref_my;
2519 s->ref_scores[ref][index]= ref_score;
2520 }
2521 if(score > ref_score){
2522 score= ref_score;
2523 best_ref= ref;
2524 mx= ref_mx;
2525 my= ref_my;
51d6a3cf 2526 }
51d6a3cf 2527 }
d773d855 2528 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
b104969f 2529
d773d855
DB
2530 // subpel search
2531 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2532 pc= s->c;
2533 pc.bytestream_start=
2534 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2535 memcpy(p_state, s->block_state, sizeof(s->block_state));
b104969f 2536
d773d855
DB
2537 if(level!=s->block_max_depth)
2538 put_rac(&pc, &p_state[4 + s_context], 1);
2539 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2540 if(s->ref_frames > 1)
2541 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2542 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2543 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2544 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2545 p_len= pc.bytestream - pc.bytestream_start;
2546 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
b104969f 2547
d773d855
DB
2548 block_s= block_w*block_w;
2549 sum = pix_sum(current_data[0], stride, block_w);
2550 l= (sum + block_s/2)/block_s;
2551 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
b104969f 2552
d773d855
DB
2553 block_s= block_w*block_w>>2;
2554 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2555 cb= (sum + block_s/2)/block_s;
2556// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2557 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2558 cr= (sum + block_s/2)/block_s;
2559// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
b104969f 2560
d773d855
DB
2561 ic= s->c;
2562 ic.bytestream_start=
2563 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2564 memcpy(i_state, s->block_state, sizeof(s->block_state));
2565 if(level!=s->block_max_depth)
2566 put_rac(&ic, &i_state[4 + s_context], 1);
2567 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2568 put_symbol(&ic, &i_state[32], l-pl , 1);
2569 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2570 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2571 i_len= ic.bytestream - ic.bytestream_start;
2572 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
b104969f 2573
d773d855
DB
2574// assert(score==256*256*256*64-1);
2575 assert(iscore < 255*255*256 + s->lambda2*10);
2576 assert(iscore >= 0);
2577 assert(l>=0 && l<=255);
2578 assert(pl>=0 && pl<=255);
b104969f 2579
d773d855
DB
2580 if(level==0){
2581 int varc= iscore >> 8;
2582 int vard= score >> 8;
2583 if (vard <= 64 || vard < varc)
2584 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2585 else
2586 c->scene_change_score+= s->m.qscale;
b104969f 2587 }
51d6a3cf 2588
d773d855
DB
2589 if(level!=s->block_max_depth){
2590 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2591 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2592 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2593 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2594 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2595 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
791e7b83 2596
d773d855
DB
2597 if(score2 < score && score2 < iscore)
2598 return score2;
d593e329 2599 }
115329f1 2600
d773d855
DB
2601 if(iscore < score){
2602 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2603 memcpy(pbbak, i_buffer, i_len);
2604 s->c= ic;
2605 s->c.bytestream_start= pbbak_start;
2606 s->c.bytestream= pbbak + i_len;
2607 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2608 memcpy(s->block_state, i_state, sizeof(s->block_state));
2609 return iscore;
2610 }else{
2611 memcpy(pbbak, p_buffer, p_len);
2612 s->c= pc;
2613 s->c.bytestream_start= pbbak_start;
2614 s->c.bytestream= pbbak + p_len;
2615 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2616 memcpy(s->block_state, p_state, sizeof(s->block_state));
2617 return score;
2618 }
2619}
115329f1 2620
d773d855
DB
2621static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2622 const int w= s->b_width << s->block_max_depth;
2623 const int rem_depth= s->block_max_depth - level;
2624 const int index= (x + y*w) << rem_depth;
2625 int trx= (x+1)<<rem_depth;
2626 BlockNode *b= &s->block[index];
2627 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2628 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2629 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2630 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2631 int pl = left->color[0];
2632 int pcb= left->color[1];
2633 int pcr= left->color[2];
2634 int pmx, pmy;
2635 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2636 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2637 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2638 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 2639
d773d855
DB
2640 if(s->keyframe){
2641 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2642 return;
2643 }
115329f1 2644
d773d855
DB
2645 if(level!=s->block_max_depth){
2646 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2647 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2648 }else{
2649 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2650 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2651 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2652 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2653 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2654 return;
791e7b83
MN
2655 }
2656 }
d773d855
DB
2657 if(b->type & BLOCK_INTRA){
2658 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2659 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2660 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2661 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2662 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2663 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2664 }else{
2665 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2666 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2667 if(s->ref_frames > 1)
2668 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2669 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2670 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2671 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2672 }
791e7b83
MN
2673}
2674
d773d855
DB
2675static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2676 int i, x2, y2;
2677 Plane *p= &s->plane[plane_index];
2678 const int block_size = MB_SIZE >> s->block_max_depth;
2679 const int block_w = plane_index ? block_size/2 : block_size;
2680 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2681 const int obmc_stride= plane_index ? block_size : 2*block_size;
2682 const int ref_stride= s->current_picture.linesize[plane_index];
2683 uint8_t *src= s-> input_picture.data[plane_index];
2684 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2685 const int b_stride = s->b_width << s->block_max_depth;
2686 const int w= p->width;
2687 const int h= p->height;
2688 int index= mb_x + mb_y*b_stride;
2689 BlockNode *b= &s->block[index];
2690 BlockNode backup= *b;
2691 int ab=0;
2692 int aa=0;
115329f1 2693
d773d855
DB
2694 b->type|= BLOCK_INTRA;
2695 b->color[plane_index]= 0;
2696 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2697
2698 for(i=0; i<4; i++){
2699 int mb_x2= mb_x + (i &1) - 1;
2700 int mb_y2= mb_y + (i>>1) - 1;
2701 int x= block_w*mb_x2 + block_w/2;
2702 int y= block_w*mb_y2 + block_w/2;
2703
2704 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2705 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2706
2707 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2708 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2709 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2710 int obmc_v= obmc[index];
2711 int d;
2712 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2713 if(x<0) obmc_v += obmc[index + block_w];
2714 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2715 if(x+block_w>w) obmc_v += obmc[index - block_w];
2716 //FIXME precalculate this or simplify it somehow else
115329f1 2717
d773d855
DB
2718 d = -dst[index] + (1<<(FRAC_BITS-1));
2719 dst[index] = d;
2720 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2721 aa += obmc_v * obmc_v; //FIXME precalculate this
a0d1931c
Y
2722 }
2723 }
2724 }
d773d855 2725 *b= backup;
a0d1931c 2726
d773d855
DB
2727 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2728}
115329f1 2729
d773d855
DB
2730static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2731 const int b_stride = s->b_width << s->block_max_depth;
2732 const int b_height = s->b_height<< s->block_max_depth;
2733 int index= x + y*b_stride;
2734 const BlockNode *b = &s->block[index];
2735 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2736 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2737 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2738 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2739 int dmx, dmy;
2740// int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2741// int my_context= av_log2(2*FFABS(left->my - top->my));
115329f1 2742
d773d855
DB
2743 if(x<0 || x>=b_stride || y>=b_height)
2744 return 0;
2745/*
27461 0 0
274701X 1-2 1
2748001XX 3-6 2-3
27490001XXX 7-14 4-7
275000001XXXX 15-30 8-15
2751*/
2752//FIXME try accurate rate
2753//FIXME intra and inter predictors if surrounding blocks are not the same type
2754 if(b->type & BLOCK_INTRA){
2755 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2756 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2757 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2758 }else{
2759 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2760 dmx-= b->mx;
2761 dmy-= b->my;
2762 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2763 + av_log2(2*FFABS(dmy))
2764 + av_log2(2*b->ref));
791e7b83
MN
2765 }
2766}
2767
d773d855
DB
2768static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2769 Plane *p= &s->plane[plane_index];
2770 const int block_size = MB_SIZE >> s->block_max_depth;
2771 const int block_w = plane_index ? block_size/2 : block_size;
2772 const int obmc_stride= plane_index ? block_size : 2*block_size;
2773 const int ref_stride= s->current_picture.linesize[plane_index];
2774 uint8_t *dst= s->current_picture.data[plane_index];
2775 uint8_t *src= s-> input_picture.data[plane_index];
2776 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2777 uint8_t *cur = s->scratchbuf;
2778 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2779 const int b_stride = s->b_width << s->block_max_depth;
2780 const int b_height = s->b_height<< s->block_max_depth;
2781 const int w= p->width;
2782 const int h= p->height;
2783 int distortion;
2784 int rate= 0;
2785 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2786 int sx= block_w*mb_x - block_w/2;
2787 int sy= block_w*mb_y - block_w/2;
2788 int x0= FFMAX(0,-sx);
2789 int y0= FFMAX(0,-sy);
2790 int x1= FFMIN(block_w*2, w-sx);
2791 int y1= FFMIN(block_w*2, h-sy);
2792 int i,x,y;
115329f1 2793
d773d855 2794 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
115329f1 2795
d773d855
DB
2796 for(y=y0; y<y1; y++){
2797 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2798 const IDWTELEM *pred1 = pred + y*obmc_stride;
2799 uint8_t *cur1 = cur + y*ref_stride;
2800 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2801 for(x=x0; x<x1; x++){
2802#if FRAC_BITS >= LOG2_OBMC_MAX
2803 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2804#else
2805 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2806#endif
2807 v = (v + pred1[x]) >> FRAC_BITS;
2808 if(v&(~255)) v= ~(v>>31);
2809 dst1[x] = v;
791e7b83
MN
2810 }
2811 }
115329f1 2812
d773d855
DB
2813 /* copy the regions where obmc[] = (uint8_t)256 */
2814 if(LOG2_OBMC_MAX == 8
2815 && (mb_x == 0 || mb_x == b_stride-1)
2816 && (mb_y == 0 || mb_y == b_height-1)){
2817 if(mb_x == 0)
2818 x1 = block_w;
2819 else
2820 x0 = block_w;
2821 if(mb_y == 0)
2822 y1 = block_w;
2823 else
2824 y0 = block_w;
2825 for(y=y0; y<y1; y++)
2826 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2827 }
115329f1 2828
d773d855
DB
2829 if(block_w==16){
2830 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2831 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2832 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2833 * So improving the score of one block is not strictly guaranteed
2834 * to improve the score of the whole frame, thus iterative motion
2835 * estimation does not always converge. */
2836 if(s->avctx->me_cmp == FF_CMP_W97)
33996217 2837 distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
d773d855 2838 else if(s->avctx->me_cmp == FF_CMP_W53)
33996217 2839 distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
d773d855
DB
2840 else{
2841 distortion = 0;
2842 for(i=0; i<4; i++){
2843 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2844 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
a0d1931c
Y
2845 }
2846 }
d773d855
DB
2847 }else{
2848 assert(block_w==8);
2849 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2850 }
2851
2852 if(plane_index==0){
2853 for(i=0; i<4; i++){
2854/* ..RRr
2855 * .RXx.
2856 * rxx..
2857 */
2858 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2859 }
2860 if(mb_x == b_stride-2)
2861 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
a0d1931c 2862 }
d773d855 2863 return distortion + rate*penalty_factor;
a0d1931c
Y
2864}
2865
d773d855
DB
2866static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2867 int i, y2;
2868 Plane *p= &s->plane[plane_index];
2869 const int block_size = MB_SIZE >> s->block_max_depth;
2870 const int block_w = plane_index ? block_size/2 : block_size;
2871 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2872 const int obmc_stride= plane_index ? block_size : 2*block_size;
2873 const int ref_stride= s->current_picture.linesize[plane_index];
2874 uint8_t *dst= s->current_picture.data[plane_index];
2875 uint8_t *src= s-> input_picture.data[plane_index];
2876 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2877 // const has only been removed from zero_dst to suppress a warning
2878 static IDWTELEM zero_dst[4096]; //FIXME
2879 const int b_stride = s->b_width << s->block_max_depth;
2880 const int w= p->width;
2881 const int h= p->height;
2882 int distortion= 0;
2883 int rate= 0;
2884 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
115329f1 2885
d773d855
DB
2886 for(i=0; i<9; i++){
2887 int mb_x2= mb_x + (i%3) - 1;
2888 int mb_y2= mb_y + (i/3) - 1;
2889 int x= block_w*mb_x2 + block_w/2;
2890 int y= block_w*mb_y2 + block_w/2;
115329f1 2891
d773d855
DB
2892 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2893 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2894
2895 //FIXME find a cleaner/simpler way to skip the outside stuff
2896 for(y2= y; y2<0; y2++)
2897 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2898 for(y2= h; y2<y+block_w; y2++)
2899 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2900 if(x<0){
2901 for(y2= y; y2<y+block_w; y2++)
2902 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2903 }
2904 if(x+block_w > w){
2905 for(y2= y; y2<y+block_w; y2++)
2906 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
791e7b83 2907 }
d773d855
DB
2908
2909 assert(block_w== 8 || block_w==16);
2910 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
791e7b83 2911 }
791e7b83 2912
d773d855
DB
2913 if(plane_index==0){
2914 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2915 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
e559c30a 2916
d773d855
DB
2917/* ..RRRr
2918 * .RXXx.
2919 * .RXXx.
2920 * rxxx.
2921 */
2922 if(merged)
2923 rate = get_block_bits(s, mb_x, mb_y, 2);
2924 for(i=merged?4:0; i<9; i++){
2925 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2926 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
e559c30a
MN
2927 }
2928 }
d773d855 2929 return distortion + rate*penalty_factor;
e559c30a
MN
2930}
2931
d773d855
DB
2932static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2933 const int w= b->width;
2934 const int h= b->height;
2935 int x, y;
526e037b 2936
d773d855
DB
2937 if(1){
2938 int run=0;
2939 int runs[w*h];
2940 int run_index=0;
2941 int max_index;
791e7b83 2942
d773d855
DB
2943 for(y=0; y<h; y++){
2944 for(x=0; x<w; x++){
2945 int v, p=0;
2946 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2947 v= src[x + y*stride];
e559c30a 2948
d773d855
DB
2949 if(y){
2950 t= src[x + (y-1)*stride];
2951 if(x){
2952 lt= src[x - 1 + (y-1)*stride];
2953 }
2954 if(x + 1 < w){
2955 rt= src[x + 1 + (y-1)*stride];
2956 }
2957 }
2958 if(x){
2959 l= src[x - 1 + y*stride];
2960 /*if(x > 1){
2961 if(orientation==1) ll= src[y + (x-2)*stride];
2962 else ll= src[x - 2 + y*stride];
2963 }*/
2964 }
2965 if(parent){
2966 int px= x>>1;
2967 int py= y>>1;
2968 if(px<b->parent->width && py<b->parent->height)
2969 p= parent[px + py*2*stride];
2970 }
2971 if(!(/*ll|*/l|lt|t|rt|p)){
2972 if(v){
2973 runs[run_index++]= run;
2974 run=0;
2975 }else{
2976 run++;
2977 }
2978 }
e559c30a
MN
2979 }
2980 }
d773d855
DB
2981 max_index= run_index;
2982 runs[run_index++]= run;
2983 run_index=0;
2984 run= runs[run_index++];
158f189f 2985
d773d855
DB
2986 put_symbol2(&s->c, b->state[30], max_index, 0);
2987 if(run_index <= max_index)
2988 put_symbol2(&s->c, b->state[1], run, 3);
28869757 2989
d773d855
DB
2990 for(y=0; y<h; y++){
2991 if(s->c.bytestream_end - s->c.bytestream < w*40){
2992 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2993 return -1;
2994 }
2995 for(x=0; x<w; x++){
2996 int v, p=0;
2997 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2998 v= src[x + y*stride];
791e7b83 2999
d773d855
DB
3000 if(y){
3001 t= src[x + (y-1)*stride];
3002 if(x){
3003 lt= src[x - 1 + (y-1)*stride];
3004 }
3005 if(x + 1 < w){
3006 rt= src[x + 1 + (y-1)*stride];
3007 }
3008 }
3009 if(x){
3010 l= src[x - 1 + y*stride];
3011 /*if(x > 1){
3012 if(orientation==1) ll= src[y + (x-2)*stride];
3013 else ll= src[x - 2 + y*stride];
3014 }*/
3015 }
3016 if(parent){
3017 int px= x>>1;
3018 int py= y>>1;
3019 if(px<b->parent->width && py<b->parent->height)
3020 p= parent[px + py*2*stride];
3021 }
3022 if(/*ll|*/l|lt|t|rt|p){
3023 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
791e7b83 3024
d773d855
DB
3025 put_rac(&s->c, &b->state[0][context], !!v);
3026 }else{
3027 if(!run){
3028 run= runs[run_index++];
115329f1 3029
d773d855
DB
3030 if(run_index <= max_index)
3031 put_symbol2(&s->c, b->state[1], run, 3);
3032 assert(v);
3033 }else{
3034 run--;
3035 assert(!v);
3036 }
3037 }
3038 if(v){
3039 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3040 int l2= 2*FFABS(l) + (l<0);
3041 int t2= 2*FFABS(t) + (t<0);
3042
3043 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
3044 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
7d7f57d9 3045 }
7d7f57d9 3046 }
e559c30a 3047 }
7d7f57d9 3048 }
791e7b83
MN
3049 return 0;
3050}
3051
d773d855
DB
3052static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3053// encode_subband_qtree(s, b, src, parent, stride, orientation);
3054// encode_subband_z0run(s, b, src, parent, stride, orientation);
3055 return encode_subband_c0run(s, b, src, parent, stride, orientation);
3056// encode_subband_dzr(s, b, src, parent, stride, orientation);
c97de57c
MN
3057}
3058
d773d855
DB
3059static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3060 const int b_stride= s->b_width << s->block_max_depth;
3061 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3062 BlockNode backup= *block;
3063 int rd, index, value;
115329f1 3064
d773d855
DB
3065 assert(mb_x>=0 && mb_y>=0);
3066 assert(mb_x<b_stride);
791e7b83 3067
d773d855
DB
3068 if(intra){
3069 block->color[0] = p[0];
3070 block->color[1] = p[1];
3071 block->color[2] = p[2];
3072 block->type |= BLOCK_INTRA;
3073 }else{
3074 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3075 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3076 if(s->me_cache[index] == value)
3077 return 0;
3078 s->me_cache[index]= value;
791e7b83 3079
d773d855
DB
3080 block->mx= p[0];
3081 block->my= p[1];
3082 block->type &= ~BLOCK_INTRA;
3083 }
791e7b83 3084
d773d855 3085 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
791e7b83 3086
d773d855
DB
3087//FIXME chroma
3088 if(rd < *best_rd){
3089 *best_rd= rd;
3090 return 1;
3091 }else{
3092 *block= backup;
3093 return 0;
3094 }
3095}
c97de57c 3096
d773d855
DB
3097/* special case for int[2] args we discard afterwards,
3098 * fixes compilation problem with gcc 2.95 */
3099static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3100 int p[2] = {p0, p1};
3101 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3102}
c97de57c 3103
d773d855
DB
3104static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3105 const int b_stride= s->b_width << s->block_max_depth;
3106 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3107 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3108 int rd, index, value;
115329f1 3109
d773d855
DB
3110 assert(mb_x>=0 && mb_y>=0);
3111 assert(mb_x<b_stride);
3112 assert(((mb_x|mb_y)&1) == 0);
155ec6ed 3113
d773d855
DB
3114 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3115 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3116 if(s->me_cache[index] == value)
3117 return 0;
3118 s->me_cache[index]= value;
115329f1 3119
d773d855
DB
3120 block->mx= p0;
3121 block->my= p1;
3122 block->ref= ref;
3123 block->type &= ~BLOCK_INTRA;
3124 block[1]= block[b_stride]= block[b_stride+1]= *block;
4f90f33a 3125
d773d855 3126 rd= get_4block_rd(s, mb_x, mb_y, 0);
4f90f33a 3127
d773d855
DB
3128//FIXME chroma
3129 if(rd < *best_rd){
3130 *best_rd= rd;
3131 return 1;
3132 }else{
3133 block[0]= backup[0];
3134 block[1]= backup[1];
3135 block[b_stride]= backup[2];
3136 block[b_stride+1]= backup[3];
3137 return 0;
3138 }
4f90f33a
MN
3139}
3140
d773d855
DB
3141static void iterative_me(SnowContext *s){
3142 int pass, mb_x, mb_y;
3143 const int b_width = s->b_width << s->block_max_depth;
3144 const int b_height= s->b_height << s->block_max_depth;
3145 const int b_stride= b_width;
3146 int color[3];
7d7f57d9 3147
d773d855
DB
3148 {
3149 RangeCoder r = s->c;
3150 uint8_t state[sizeof(s->block_state)];
3151 memcpy(state, s->block_state, sizeof(s->block_state));
3152 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3153 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3154 encode_q_branch(s, 0, mb_x, mb_y);
3155 s->c = r;
3156 memcpy(s->block_state, state, sizeof(s->block_state));
3157 }
115329f1 3158
d773d855
DB
3159 for(pass=0; pass<25; pass++){
3160 int change= 0;
115329f1 3161
d773d855
DB
3162 for(mb_y= 0; mb_y<b_height; mb_y++){
3163 for(mb_x= 0; mb_x<b_width; mb_x++){
3164 int dia_change, i, j, ref;
3165 int best_rd= INT_MAX, ref_rd;
3166 BlockNode backup, ref_b;
3167 const int index= mb_x + mb_y * b_stride;
3168 BlockNode *block= &s->block[index];
3169 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3170 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3171 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3172 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3173 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3174 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3175 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3176 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3177 const int b_w= (MB_SIZE >> s->block_max_depth);
3178 uint8_t obmc_edged[b_w*2][b_w*2];
115329f1 3179
d773d855
DB
3180 if(pass && (block->type & BLOCK_OPT))
3181 continue;
3182 block->type |= BLOCK_OPT;
115329f1 3183
d773d855 3184 backup= *block;
115329f1 3185
d773d855
DB
3186 if(!s->me_cache_generation)
3187 memset(s->me_cache, 0, sizeof(s->me_cache));
3188 s->me_cache_generation += 1<<22;
791e7b83 3189
d773d855
DB
3190 //FIXME precalculate
3191 {
3192 int x, y;
3193 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3194 if(mb_x==0)
3195 for(y=0; y<b_w*2; y++)
3196 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3197 if(mb_x==b_stride-1)
3198 for(y=0; y<b_w*2; y++)
3199 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3200 if(mb_y==0){
3201 for(x=0; x<b_w*2; x++)
3202 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3203 for(y=1; y<b_w; y++)
3204 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3205 }
3206 if(mb_y==b_height-1){
3207 for(x=0; x<b_w*2; x++)
3208 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3209 for(y=b_w; y<b_w*2-1; y++)
3210 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3211 }
3212 }
383f62fd 3213
d773d855
DB
3214 //skip stuff outside the picture
3215 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3216 uint8_t *src= s-> input_picture.data[0];
3217 uint8_t *dst= s->current_picture.data[0];
3218 const int stride= s->current_picture.linesize[0];
3219 const int block_w= MB_SIZE >> s->block_max_depth;
3220 const int sx= block_w*mb_x - block_w/2;
3221 const int sy= block_w*mb_y - block_w/2;
3222 const int w= s->plane[0].width;
3223 const int h= s->plane[0].height;
3224 int y;
4e64bead 3225
d773d855
DB
3226 for(y=sy; y<0; y++)
3227 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3228 for(y=h; y<sy+block_w*2; y++)
3229 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3230 if(sx<0){
3231 for(y=sy; y<sy+block_w*2; y++)
3232 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3233 }
3234 if(sx+block_w*2 > w){
3235 for(y=sy; y<sy+block_w*2; y++)
3236 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3237 }
3238 }
4e64bead 3239
d773d855
DB
3240 // intra(black) = neighbors' contribution to the current block
3241 for(i=0; i<3; i++)
3242 color[i]= get_dc(s, mb_x, mb_y, i);
4e64bead 3243
d773d855
DB
3244 // get previous score (cannot be cached due to OBMC)
3245 if(pass > 0 && (block->type&BLOCK_INTRA)){
3246 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3247 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3248 }else
3249 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
4e64bead 3250
d773d855
DB
3251 ref_b= *block;
3252 ref_rd= best_rd;
3253 for(ref=0; ref < s->ref_frames; ref++){
3254 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3255 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3256 continue;
3257 block->ref= ref;
3258 best_rd= INT_MAX;
791e7b83 3259
d773d855
DB
3260 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3261 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3262 if(tb)
3263 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3264 if(lb)
3265 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3266 if(rb)
3267 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3268 if(bb)
3269 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
791e7b83 3270
d773d855
DB
3271 /* fullpel ME */
3272 //FIXME avoid subpel interpolation / round to nearest integer
3273 do{
3274 dia_change=0;
3275 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3276 for(j=0; j<i; j++){
3277 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3278 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3279 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3280 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3281 }
3282 }
3283 }while(dia_change);
3284 /* subpel ME */
3285 do{
3286 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3287 dia_change=0;
3288 for(i=0; i<8; i++)
3289 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3290 }while(dia_change);
3291 //FIXME or try the standard 2 pass qpel or similar
115329f1 3292
d773d855
DB
3293 mvr[0][0]= block->mx;
3294 mvr[0][1]= block->my;
3295 if(ref_rd > best_rd){
3296 ref_rd= best_rd;
3297 ref_b= *block;
3298 }
3299 }
3300 best_rd= ref_rd;
3301 *block= ref_b;
d773d855
DB
3302 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3303 //FIXME RD style color selection
d773d855
DB
3304 if(!same_block(block, &backup)){
3305 if(tb ) tb ->type &= ~BLOCK_OPT;
3306 if(lb ) lb ->type &= ~BLOCK_OPT;
3307 if(rb ) rb ->type &= ~BLOCK_OPT;
3308 if(bb ) bb ->type &= ~BLOCK_OPT;
3309 if(tlb) tlb->type &= ~BLOCK_OPT;
3310 if(trb) trb->type &= ~BLOCK_OPT;
3311 if(blb) blb->type &= ~BLOCK_OPT;
3312 if(brb) brb->type &= ~BLOCK_OPT;
3313 change ++;
791e7b83
MN
3314 }
3315 }
791e7b83 3316 }
2da16f28 3317 av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
d773d855
DB
3318 if(!change)
3319 break;
791e7b83 3320 }
791e7b83 3321
d773d855
DB
3322 if(s->block_max_depth == 1){
3323 int change= 0;
3324 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3325 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3326 int i;
3327 int best_rd, init_rd;
3328 const int index= mb_x + mb_y * b_stride;
3329 BlockNode *b[4];
ff193e64 3330
d773d855
DB
3331 b[0]= &s->block[index];
3332 b[1]= b[0]+1;
3333 b[2]= b[0]+b_stride;
3334 b[3]= b[2]+1;
3335 if(same_block(b[0], b[1]) &&
3336 same_block(b[0], b[2]) &&
3337 same_block(b[0], b[3]))
3338 continue;
ff193e64 3339
d773d855
DB
3340 if(!s->me_cache_generation)
3341 memset(s->me_cache, 0, sizeof(s->me_cache));
3342 s->me_cache_generation += 1<<22;
ff193e64 3343
d773d855 3344 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
ff193e64 3345
d773d855
DB
3346 //FIXME more multiref search?
3347 check_4block_inter(s, mb_x, mb_y,
3348 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3349 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3350
3351 for(i=0; i<4; i++)
3352 if(!(b[i]->type&BLOCK_INTRA))
3353 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3354
3355 if(init_rd != best_rd)
3356 change++;
3357 }
ff193e64 3358 }
2da16f28 3359 av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
ff193e64
MN
3360 }
3361}
3362
d773d855
DB
3363static void encode_blocks(SnowContext *s, int search){
3364 int x, y;
3365 int w= s->b_width;
3366 int h= s->b_height;
ff193e64 3367
d773d855
DB
3368 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
3369 iterative_me(s);
ff193e64 3370
d773d855
DB
3371 for(y=0; y<h; y++){
3372 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
3373 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3374 return;
3375 }
3376 for(x=0; x<w; x++){
3377 if(s->avctx->me_method == ME_ITER || !search)
3378 encode_q_branch2(s, 0, x, y);
3379 else
3380 encode_q_branch (s, 0, x, y);
ff193e64
MN
3381 }
3382 }
3383}
3384
d773d855
DB
3385static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3386 const int w= b->width;
3387 const int h= b->height;
3388 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3389 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3390 int x,y, thres1, thres2;
ff193e64 3391
d773d855
DB
3392 if(s->qlog == LOSSLESS_QLOG){
3393 for(y=0; y<h; y++)
3394 for(x=0; x<w; x++)
3395 dst[x + y*stride]= src[x + y*stride];
3396 return;
3397 }
ff193e64 3398
d773d855
DB
3399 bias= bias ? 0 : (3*qmul)>>3;
3400 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3401 thres2= 2*thres1;
ff193e64 3402
d773d855
DB
3403 if(!bias){
3404 for(y=0; y<h; y++){
3405 for(x=0; x<w; x++){
3406 int i= src[x + y*stride];
ff193e64 3407
d773d855
DB
3408 if((unsigned)(i+thres1) > thres2){
3409 if(i>=0){
3410 i<<= QEXPSHIFT;
3411 i/= qmul; //FIXME optimize
3412 dst[x + y*stride]= i;
3413 }else{
3414 i= -i;
3415 i<<= QEXPSHIFT;
3416 i/= qmul; //FIXME optimize
3417 dst[x + y*stride]= -i;
3418 }
3419 }else
3420 dst[x + y*stride]= 0;
3421 }
ff193e64 3422 }
d773d855
DB
3423 }else{
3424 for(y=0; y<h; y++){
3425 for(x=0; x<w; x++){
3426 int i= src[x + y*stride];
ff193e64 3427
d773d855
DB
3428 if((unsigned)(i+thres1) > thres2){
3429 if(i>=0){
3430 i<<= QEXPSHIFT;
3431 i= (i + bias) / qmul; //FIXME optimize
3432 dst[x + y*stride]= i;
3433 }else{
3434 i= -i;
3435 i<<= QEXPSHIFT;
3436 i= (i + bias) / qmul; //FIXME optimize
3437 dst[x + y*stride]= -i;
ff193e64 3438 }
d773d855
DB
3439 }else
3440 dst[x + y*stride]= 0;
ff193e64
MN
3441 }
3442 }
3443 }
ff193e64
MN
3444}
3445
d773d855
DB
3446static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3447 const int w= b->width;
3448 const int h= b->height;
3449 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3450 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3451 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3452 int x,y;
4f90f33a 3453
d773d855 3454 if(s->qlog == LOSSLESS_QLOG) return;
4f90f33a 3455
d773d855
DB
3456 for(y=0; y<h; y++){
3457 for(x=0; x<w; x++){
3458 int i= src[x + y*stride];
3459 if(i<0){
3460 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3461 }else if(i>0){
3462 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3463 }
3464 }
4f90f33a 3465 }
d773d855 3466}
4f90f33a 3467
d773d855
DB
3468static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3469 const int w= b->width;
3470 const int h= b->height;
3471 int x,y;
2cd34043 3472
d773d855
DB
3473 for(y=h-1; y>=0; y--){
3474 for(x=w-1; x>=0; x--){
3475 int i= x + y*stride;
3476
3477 if(x){
3478 if(use_median){
3479 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3480 else src[i] -= src[i - 1];
3481 }else{
3482 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3483 else src[i] -= src[i - 1];
3484 }
3485 }else{
3486 if(y) src[i] -= src[i - stride];
3487 }
3488 }
791e7b83 3489 }
d773d855 3490}
51d6a3cf 3491
d773d855
DB
3492static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3493 const int w= b->width;
3494 const int h= b->height;
3495 int x,y;
51d6a3cf 3496
d773d855
DB
3497 for(y=0; y<h; y++){
3498 for(x=0; x<w; x++){
3499 int i= x + y*stride;
51d6a3cf 3500
d773d855
DB
3501 if(x){
3502 if(use_median){
3503 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3504 else src[i] += src[i - 1];
3505 }else{
3506 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3507 else src[i] += src[i - 1];
3508 }
3509 }else{
3510 if(y) src[i] += src[i - stride];
3511 }
8c36eaaa
LM
3512 }
3513 }
791e7b83
MN
3514}
3515
d773d855
DB
3516static void encode_qlogs(SnowContext *s){
3517 int plane_index, level, orientation;
87246a01 3518
d773d855
DB
3519 for(plane_index=0; plane_index<2; plane_index++){
3520 for(level=0; level<s->spatial_decomposition_count; level++){
3521 for(orientation=level ? 1:0; orientation<4; orientation++){
3522 if(orientation==2) continue;
3523 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3524 }
3525 }
3526 }
3527}
5be3a818 3528
d773d855
DB
3529static void encode_header(SnowContext *s){
3530 int plane_index, i;
3531 uint8_t kstate[32];
5be3a818 3532
d773d855 3533 memset(kstate, MID_STATE, sizeof(kstate));
5be3a818 3534
d773d855
DB
3535 put_rac(&s->c, kstate, s->keyframe);
3536 if(s->keyframe || s->always_reset){
3537 reset_contexts(s);
3538 s->last_spatial_decomposition_type=
3539 s->last_qlog=
3540 s->last_qbias=
3541 s->last_mv_scale=
3542 s->last_block_max_depth= 0;
3543 for(plane_index=0; plane_index<2; plane_index++){
3544 Plane *p= &s->plane[plane_index];
3545 p->last_htaps=0;
3546 p->last_diag_mc=0;
3547 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));