Move ff_sqrt() to libavutil/intmath.h
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
b78e7197
DB
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
791e7b83
MN
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
b78e7197 9 * version 2.1 of the License, or (at your option) any later version.
791e7b83 10 *
b78e7197 11 * FFmpeg is distributed in the hope that it will be useful,
791e7b83
MN
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
b78e7197 17 * License along with FFmpeg; if not, write to the Free Software
5509bffa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
MN
19 */
20
94ca624f 21#include "libavutil/intmath.h"
791e7b83 22#include "avcodec.h"
791e7b83 23#include "dsputil.h"
059715a4 24#include "snow.h"
28869757
MN
25
26#include "rangecoder.h"
199436b9 27#include "mathops.h"
791e7b83
MN
28
29#include "mpegvideo.h"
c26e58e3 30#include "h263.h"
791e7b83
MN
31
32#undef NDEBUG
33#include <assert.h>
34
791e7b83
MN
35static const int8_t quant3[256]={
36 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52};
53static const int8_t quant3b[256]={
54 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70};
538a3841
MN
71static const int8_t quant3bA[256]={
72 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88};
791e7b83
MN
89static const int8_t quant5[256]={
90 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
106};
107static const int8_t quant7[256]={
108 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
120-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
122-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
123-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
124};
125static const int8_t quant9[256]={
126 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
127 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
141-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
142};
143static const int8_t quant11[256]={
144 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
145 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
146 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
156-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
158-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
159-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
160};
161static const int8_t quant13[256]={
162 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
163 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
164 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
165 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
169 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
173-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
174-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
175-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
176-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
177-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178};
179
791e7b83
MN
180#if 0 //64*cubic
181static const uint8_t obmc32[1024]={
fa731ccd
MN
182 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
183 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
184 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
185 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
186 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
187 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
188 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
189 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
190 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
191 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
192 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
193 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
194 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
195 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
198 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
199 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
200 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
201 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
202 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
203 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
204 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
205 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
206 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
207 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
208 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
209 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
210 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
211 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
212 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
213 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
791e7b83
MN
214//error:0.000022
215};
216static const uint8_t obmc16[256]={
fa731ccd
MN
217 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
218 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
219 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
220 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
221 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
222 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
225 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
226 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
227 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
228 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
229 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
230 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
231 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
232 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
791e7b83
MN
233//error:0.000033
234};
235#elif 1 // 64*linear
236static const uint8_t obmc32[1024]={
561a18d3
RE
237 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
238 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
239 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
240 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
241 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
242 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
243 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
244 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
245 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
246 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
247 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
248 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
249 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
250 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
253 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
254 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
255 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
256 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
257 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
258 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
259 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
260 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
261 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
262 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
263 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
264 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
265 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
266 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
267 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
268 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
791e7b83
MN
269 //error:0.000020
270};
271static const uint8_t obmc16[256]={
561a18d3
RE
272 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
273 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
274 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
275 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
276 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
277 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
280 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
281 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
282 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
283 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
284 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
285 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
286 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
287 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
791e7b83
MN
288//error:0.000015
289};
290#else //64*cos
291static const uint8_t obmc32[1024]={
fa731ccd
MN
292 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
293 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
295 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
296 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
297 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
298 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
299 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
300 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
301 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
302 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
303 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
304 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
305 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
308 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
309 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
310 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
311 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
312 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
313 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
314 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
315 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
316 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
317 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
318 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
319 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
320 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
321 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
322 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
323 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
791e7b83
MN
324//error:0.000022
325};
326static const uint8_t obmc16[256]={
fa731ccd
MN
327 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
328 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
329 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
330 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
331 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
332 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
335 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
336 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
337 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
338 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
339 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
340 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
341 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
342 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
791e7b83
MN
343//error:0.000022
344};
96e2fbf2 345#endif /* 0 */
791e7b83 346
155ec6ed
MN
347//linear *64
348static const uint8_t obmc8[64]={
561a18d3
RE
349 4, 12, 20, 28, 28, 20, 12, 4,
350 12, 36, 60, 84, 84, 60, 36, 12,
351 20, 60,100,140,140,100, 60, 20,
352 28, 84,140,196,196,140, 84, 28,
353 28, 84,140,196,196,140, 84, 28,
354 20, 60,100,140,140,100, 60, 20,
355 12, 36, 60, 84, 84, 60, 36, 12,
356 4, 12, 20, 28, 28, 20, 12, 4,
155ec6ed
MN
357//error:0.000000
358};
359
360//linear *64
361static const uint8_t obmc4[16]={
561a18d3
RE
362 16, 48, 48, 16,
363 48,144,144, 48,
364 48,144,144, 48,
365 16, 48, 48, 16,
155ec6ed
MN
366//error:0.000000
367};
368
cf2baeb3 369static const uint8_t * const obmc_tab[4]={
155ec6ed
MN
370 obmc32, obmc16, obmc8, obmc4
371};
372
85fc0e75
LM
373static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
374
155ec6ed
MN
375typedef struct BlockNode{
376 int16_t mx;
377 int16_t my;
8c36eaaa 378 uint8_t ref;
155ec6ed
MN
379 uint8_t color[3];
380 uint8_t type;
381//#define TYPE_SPLIT 1
382#define BLOCK_INTRA 1
51d6a3cf 383#define BLOCK_OPT 2
155ec6ed
MN
384//#define TYPE_NOCOLOR 4
385 uint8_t level; //FIXME merge into type?
386}BlockNode;
387
51d6a3cf
MN
388static const BlockNode null_block= { //FIXME add border maybe
389 .color= {128,128,128},
390 .mx= 0,
391 .my= 0,
8c36eaaa 392 .ref= 0,
51d6a3cf
MN
393 .type= 0,
394 .level= 0,
395};
396
155ec6ed
MN
397#define LOG2_MB_SIZE 4
398#define MB_SIZE (1<<LOG2_MB_SIZE)
b538791b 399#define ENCODER_EXTRA_BITS 4
61d6e445 400#define HTAPS_MAX 8
155ec6ed 401
a0d1931c
Y
402typedef struct x_and_coeff{
403 int16_t x;
538a3841 404 uint16_t coeff;
a0d1931c
Y
405} x_and_coeff;
406
791e7b83
MN
407typedef struct SubBand{
408 int level;
409 int stride;
410 int width;
411 int height;
e6464f8b 412 int qlog; ///< log(qscale)/log[2^(1/6)]
791e7b83 413 DWTELEM *buf;
d593e329 414 IDWTELEM *ibuf;
a0d1931c
Y
415 int buf_x_offset;
416 int buf_y_offset;
417 int stride_line; ///< Stride measured in lines, not pixels.
418 x_and_coeff * x_coeff;
791e7b83
MN
419 struct SubBand *parent;
420 uint8_t state[/*7*2*/ 7 + 512][32];
421}SubBand;
422
423typedef struct Plane{
424 int width;
425 int height;
426 SubBand band[MAX_DECOMPOSITIONS][4];
7d7f57d9
MN
427
428 int htaps;
61d6e445 429 int8_t hcoeff[HTAPS_MAX/2];
7d7f57d9
MN
430 int diag_mc;
431 int fast_mc;
432
433 int last_htaps;
61d6e445 434 int8_t last_hcoeff[HTAPS_MAX/2];
7d7f57d9 435 int last_diag_mc;
791e7b83
MN
436}Plane;
437
438typedef struct SnowContext{
791e7b83
MN
439
440 AVCodecContext *avctx;
28869757 441 RangeCoder c;
791e7b83 442 DSPContext dsp;
51d6a3cf
MN
443 AVFrame new_picture;
444 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83 445 AVFrame current_picture;
8c36eaaa 446 AVFrame last_picture[MAX_REF_FRAMES];
5be3a818 447 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
791e7b83
MN
448 AVFrame mconly_picture;
449// uint8_t q_context[16];
450 uint8_t header_state[32];
155ec6ed 451 uint8_t block_state[128 + 32*128];
791e7b83 452 int keyframe;
19aa028d 453 int always_reset;
791e7b83
MN
454 int version;
455 int spatial_decomposition_type;
396a5e68 456 int last_spatial_decomposition_type;
791e7b83
MN
457 int temporal_decomposition_type;
458 int spatial_decomposition_count;
8db13728 459 int last_spatial_decomposition_count;
791e7b83 460 int temporal_decomposition_count;
8c36eaaa
LM
461 int max_ref_frames;
462 int ref_frames;
463 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
464 uint32_t *ref_scores[MAX_REF_FRAMES];
791e7b83 465 DWTELEM *spatial_dwt_buffer;
d593e329 466 IDWTELEM *spatial_idwt_buffer;
791e7b83
MN
467 int colorspace_type;
468 int chroma_h_shift;
469 int chroma_v_shift;
470 int spatial_scalability;
471 int qlog;
396a5e68 472 int last_qlog;
155ec6ed
MN
473 int lambda;
474 int lambda2;
4e64bead 475 int pass1_rc;
791e7b83 476 int mv_scale;
396a5e68 477 int last_mv_scale;
791e7b83 478 int qbias;
396a5e68 479 int last_qbias;
791e7b83 480#define QBIAS_SHIFT 3
155ec6ed
MN
481 int b_width;
482 int b_height;
483 int block_max_depth;
396a5e68 484 int last_block_max_depth;
791e7b83 485 Plane plane[MAX_PLANES];
155ec6ed 486 BlockNode *block;
51d6a3cf
MN
487#define ME_CACHE_SIZE 1024
488 int me_cache[ME_CACHE_SIZE];
489 int me_cache_generation;
a0d1931c 490 slice_buffer sb;
155ec6ed 491
e6464f8b 492 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
bd2b6b33
MR
493
494 uint8_t *scratchbuf;
791e7b83
MN
495}SnowContext;
496
f9e6ebf7 497typedef struct {
d593e329
MN
498 IDWTELEM *b0;
499 IDWTELEM *b1;
500 IDWTELEM *b2;
501 IDWTELEM *b3;
f9e6ebf7 502 int y;
fe5c7e58 503} DWTCompose;
f9e6ebf7 504
a0d1931c
Y
505#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
506//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
507
d593e329 508static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
a0d1931c
Y
509{
510 int i;
115329f1 511
a0d1931c
Y
512 buf->base_buffer = base_buffer;
513 buf->line_count = line_count;
514 buf->line_width = line_width;
515 buf->data_count = max_allocated_lines;
d593e329
MN
516 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
517 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
115329f1 518
ef3dfbd4 519 for(i = 0; i < max_allocated_lines; i++){
d593e329 520 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
a0d1931c 521 }
115329f1 522
a0d1931c
Y
523 buf->data_stack_top = max_allocated_lines - 1;
524}
525
d593e329 526static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
a0d1931c 527{
d593e329 528 IDWTELEM * buffer;
115329f1 529
a0d1931c
Y
530 assert(buf->data_stack_top >= 0);
531// assert(!buf->line[line]);
532 if (buf->line[line])
533 return buf->line[line];
115329f1 534
a0d1931c
Y
535 buffer = buf->data_stack[buf->data_stack_top];
536 buf->data_stack_top--;
537 buf->line[line] = buffer;
115329f1 538
a0d1931c
Y
539 return buffer;
540}
541
542static void slice_buffer_release(slice_buffer * buf, int line)
543{
d593e329 544 IDWTELEM * buffer;
a0d1931c
Y
545
546 assert(line >= 0 && line < buf->line_count);
547 assert(buf->line[line]);
548
a0d1931c
Y
549 buffer = buf->line[line];
550 buf->data_stack_top++;
551 buf->data_stack[buf->data_stack_top] = buffer;
552 buf->line[line] = NULL;
a0d1931c
Y
553}
554
555static void slice_buffer_flush(slice_buffer * buf)
556{
557 int i;
ef3dfbd4 558 for(i = 0; i < buf->line_count; i++){
a0d1931c 559 if (buf->line[i])
a0d1931c 560 slice_buffer_release(buf, i);
a0d1931c
Y
561 }
562}
563
564static void slice_buffer_destroy(slice_buffer * buf)
565{
566 int i;
567 slice_buffer_flush(buf);
115329f1 568
ef3dfbd4 569 for(i = buf->data_count - 1; i >= 0; i--){
e7c8206e 570 av_freep(&buf->data_stack[i]);
a0d1931c 571 }
e7c8206e 572 av_freep(&buf->data_stack);
e7c8206e 573 av_freep(&buf->line);
a0d1931c
Y
574}
575
bb270c08 576#ifdef __sgi
2554db9b 577// Avoid a name clash on SGI IRIX
bb270c08 578#undef qexp
2554db9b 579#endif
034aff03 580#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 581static uint8_t qexp[QROOT];
791e7b83
MN
582
583static inline int mirror(int v, int m){
13705b69
MN
584 while((unsigned)v > (unsigned)m){
585 v=-v;
586 if(v<0) v+= 2*m;
587 }
588 return v;
791e7b83
MN
589}
590
28869757 591static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
592 int i;
593
594 if(v){
c26abfa5 595 const int a= FFABS(v);
791e7b83
MN
596 const int e= av_log2(a);
597#if 1
115329f1 598 const int el= FFMIN(e, 10);
28869757 599 put_rac(c, state+0, 0);
791e7b83
MN
600
601 for(i=0; i<el; i++){
28869757 602 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
603 }
604 for(; i<e; i++){
28869757 605 put_rac(c, state+1+9, 1); //1..10
791e7b83 606 }
28869757 607 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
608
609 for(i=e-1; i>=el; i--){
28869757 610 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
611 }
612 for(; i>=0; i--){
28869757 613 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
614 }
615
616 if(is_signed)
28869757 617 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 618#else
115329f1 619
28869757 620 put_rac(c, state+0, 0);
791e7b83
MN
621 if(e<=9){
622 for(i=0; i<e; i++){
28869757 623 put_rac(c, state+1+i, 1); //1..10
791e7b83 624 }
28869757 625 put_rac(c, state+1+i, 0);
791e7b83
MN
626
627 for(i=e-1; i>=0; i--){
28869757 628 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
629 }
630
631 if(is_signed)
28869757 632 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
633 }else{
634 for(i=0; i<e; i++){
28869757 635 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 636 }
e1b3d272 637 put_rac(c, state+1+9, 0);
791e7b83
MN
638
639 for(i=e-1; i>=0; i--){
28869757 640 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
641 }
642
643 if(is_signed)
e1b3d272 644 put_rac(c, state+11 + 10, v < 0); //11..21
791e7b83 645 }
96e2fbf2 646#endif /* 1 */
791e7b83 647 }else{
28869757 648 put_rac(c, state+0, 1);
791e7b83
MN
649 }
650}
651
28869757
MN
652static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
653 if(get_rac(c, state+0))
791e7b83
MN
654 return 0;
655 else{
7c2425d2
LM
656 int i, e, a;
657 e= 0;
28869757 658 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 659 e++;
791e7b83 660 }
7c2425d2 661
791e7b83 662 a= 1;
7c2425d2 663 for(i=e-1; i>=0; i--){
28869757 664 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
665 }
666
3788e661
MN
667 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
668 return (a^e)-e;
791e7b83
MN
669 }
670}
671
28869757 672static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 673 int i;
0635cbfc 674 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
675
676 assert(v>=0);
0635cbfc
MN
677 assert(log2>=-4);
678
679 while(v >= r){
28869757 680 put_rac(c, state+4+log2, 1);
0635cbfc 681 v -= r;
4f4e9633 682 log2++;
0635cbfc 683 if(log2>0) r+=r;
4f4e9633 684 }
28869757 685 put_rac(c, state+4+log2, 0);
115329f1 686
4f4e9633 687 for(i=log2-1; i>=0; i--){
28869757 688 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 689 }
4f4e9633
MN
690}
691
28869757 692static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 693 int i;
0635cbfc 694 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
695 int v=0;
696
0635cbfc
MN
697 assert(log2>=-4);
698
28869757 699 while(get_rac(c, state+4+log2)){
0635cbfc 700 v+= r;
4f4e9633 701 log2++;
0635cbfc 702 if(log2>0) r+=r;
4f4e9633 703 }
115329f1 704
4f4e9633 705 for(i=log2-1; i>=0; i--){
28869757 706 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
707 }
708
709 return v;
710}
711
9d14ffbc
LB
712static av_always_inline void
713lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
714 int dst_step, int src_step, int ref_step,
715 int width, int mul, int add, int shift,
716 int highpass, int inverse){
791e7b83
MN
717 const int mirror_left= !highpass;
718 const int mirror_right= (width&1) ^ highpass;
719 const int w= (width>>1) - 1 + (highpass & width);
720 int i;
721
722#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
723 if(mirror_left){
724 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
725 dst += dst_step;
726 src += src_step;
727 }
115329f1 728
791e7b83 729 for(i=0; i<w; i++){
9d14ffbc
LB
730 dst[i*dst_step] =
731 LIFT(src[i*src_step],
732 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
733 inverse);
791e7b83 734 }
115329f1 735
791e7b83 736 if(mirror_right){
9d14ffbc
LB
737 dst[w*dst_step] =
738 LIFT(src[w*src_step],
739 ((mul*2*ref[w*ref_step]+add)>>shift),
740 inverse);
791e7b83
MN
741 }
742}
743
9d14ffbc
LB
744static av_always_inline void
745inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
746 int dst_step, int src_step, int ref_step,
747 int width, int mul, int add, int shift,
748 int highpass, int inverse){
d593e329
MN
749 const int mirror_left= !highpass;
750 const int mirror_right= (width&1) ^ highpass;
751 const int w= (width>>1) - 1 + (highpass & width);
752 int i;
753
754#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
755 if(mirror_left){
756 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
757 dst += dst_step;
758 src += src_step;
759 }
760
761 for(i=0; i<w; i++){
9d14ffbc
LB
762 dst[i*dst_step] =
763 LIFT(src[i*src_step],
764 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
765 inverse);
d593e329
MN
766 }
767
768 if(mirror_right){
9d14ffbc
LB
769 dst[w*dst_step] =
770 LIFT(src[w*src_step],
771 ((mul*2*ref[w*ref_step]+add)>>shift),
772 inverse);
d593e329
MN
773 }
774}
775
059715a4 776#ifndef liftS
9d14ffbc
LB
777static av_always_inline void
778liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
779 int dst_step, int src_step, int ref_step,
780 int width, int mul, int add, int shift,
781 int highpass, int inverse){
f5a71928
MN
782 const int mirror_left= !highpass;
783 const int mirror_right= (width&1) ^ highpass;
784 const int w= (width>>1) - 1 + (highpass & width);
785 int i;
786
787 assert(shift == 4);
9d14ffbc
LB
788#define LIFTS(src, ref, inv) \
789 ((inv) ? \
790 (src) + (((ref) + 4*(src))>>shift): \
791 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
f5a71928
MN
792 if(mirror_left){
793 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
794 dst += dst_step;
795 src += src_step;
796 }
115329f1 797
f5a71928 798 for(i=0; i<w; i++){
9d14ffbc
LB
799 dst[i*dst_step] =
800 LIFTS(src[i*src_step],
801 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
802 inverse);
f5a71928 803 }
115329f1 804
f5a71928 805 if(mirror_right){
9d14ffbc
LB
806 dst[w*dst_step] =
807 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
f5a71928
MN
808 }
809}
9d14ffbc
LB
810static av_always_inline void
811inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
812 int dst_step, int src_step, int ref_step,
813 int width, int mul, int add, int shift,
814 int highpass, int inverse){
d593e329
MN
815 const int mirror_left= !highpass;
816 const int mirror_right= (width&1) ^ highpass;
817 const int w= (width>>1) - 1 + (highpass & width);
818 int i;
819
820 assert(shift == 4);
9d14ffbc
LB
821#define LIFTS(src, ref, inv) \
822 ((inv) ? \
823 (src) + (((ref) + 4*(src))>>shift): \
824 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
d593e329
MN
825 if(mirror_left){
826 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
827 dst += dst_step;
828 src += src_step;
829 }
830
831 for(i=0; i<w; i++){
9d14ffbc
LB
832 dst[i*dst_step] =
833 LIFTS(src[i*src_step],
834 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
835 inverse);
d593e329
MN
836 }
837
838 if(mirror_right){
9d14ffbc
LB
839 dst[w*dst_step] =
840 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
d593e329
MN
841 }
842}
301df480 843#endif /* ! liftS */
f5a71928 844
aa25a462
RFI
845static void horizontal_decompose53i(DWTELEM *b, int width){
846 DWTELEM temp[width];
791e7b83 847 const int width2= width>>1;
62ab0b78 848 int x;
791e7b83
MN
849 const int w2= (width+1)>>1;
850
851 for(x=0; x<width2; x++){
852 temp[x ]= b[2*x ];
853 temp[x+w2]= b[2*x + 1];
854 }
855 if(width&1)
856 temp[x ]= b[2*x ];
857#if 0
62ab0b78
AJ
858 {
859 int A1,A2,A3,A4;
791e7b83
MN
860 A2= temp[1 ];
861 A4= temp[0 ];
862 A1= temp[0+width2];
863 A1 -= (A2 + A4)>>1;
864 A4 += (A1 + 1)>>1;
865 b[0+width2] = A1;
866 b[0 ] = A4;
867 for(x=1; x+1<width2; x+=2){
868 A3= temp[x+width2];
869 A4= temp[x+1 ];
870 A3 -= (A2 + A4)>>1;
871 A2 += (A1 + A3 + 2)>>2;
872 b[x+width2] = A3;
873 b[x ] = A2;
874
875 A1= temp[x+1+width2];
876 A2= temp[x+2 ];
877 A1 -= (A2 + A4)>>1;
878 A4 += (A1 + A3 + 2)>>2;
879 b[x+1+width2] = A1;
880 b[x+1 ] = A4;
881 }
882 A3= temp[width-1];
883 A3 -= A2;
884 A2 += (A1 + A3 + 2)>>2;
885 b[width -1] = A3;
886 b[width2-1] = A2;
62ab0b78 887 }
115329f1 888#else
791e7b83
MN
889 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
890 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
301df480 891#endif /* 0 */
791e7b83
MN
892}
893
aa25a462 894static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 895 int i;
115329f1 896
791e7b83
MN
897 for(i=0; i<width; i++){
898 b1[i] -= (b0[i] + b2[i])>>1;
899 }
900}
901
aa25a462 902static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 903 int i;
115329f1 904
791e7b83
MN
905 for(i=0; i<width; i++){
906 b1[i] += (b0[i] + b2[i] + 2)>>2;
907 }
908}
909
aa25a462 910static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 911 int y;
791e7b83
MN
912 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
913 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
115329f1 914
791e7b83
MN
915 for(y=-2; y<height; y+=2){
916 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
917 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
918
13705b69
MN
919 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
920 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
115329f1 921
13705b69
MN
922 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
923 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
115329f1 924
791e7b83
MN
925 b0=b2;
926 b1=b3;
927 }
928}
929
aa25a462
RFI
930static void horizontal_decompose97i(DWTELEM *b, int width){
931 DWTELEM temp[width];
791e7b83
MN
932 const int w2= (width+1)>>1;
933
ce611a27
MN
934 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
935 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
ff06e067 936 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
791e7b83
MN
937 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
938}
939
940
aa25a462 941static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 942 int i;
115329f1 943
791e7b83
MN
944 for(i=0; i<width; i++){
945 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
946 }
947}
948
aa25a462 949static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 950 int i;
115329f1 951
791e7b83 952 for(i=0; i<width; i++){
791e7b83 953 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
791e7b83
MN
954 }
955}
956
aa25a462 957static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 958 int i;
115329f1 959
791e7b83 960 for(i=0; i<width; i++){
f5a71928 961#ifdef liftS
791e7b83 962 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928 963#else
ce611a27 964 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
f5a71928 965#endif
791e7b83
MN
966 }
967}
968
aa25a462 969static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 970 int i;
115329f1 971
791e7b83
MN
972 for(i=0; i<width; i++){
973 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
974 }
975}
976
aa25a462 977static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 978 int y;
791e7b83
MN
979 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
980 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
981 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
982 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
115329f1 983
791e7b83
MN
984 for(y=-4; y<height; y+=2){
985 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
986 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
987
13705b69
MN
988 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
989 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
115329f1 990
13705b69
MN
991 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
992 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
993 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
994 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
791e7b83 995
791e7b83
MN
996 b0=b2;
997 b1=b3;
998 b2=b4;
999 b3=b5;
1000 }
1001}
1002
aa25a462 1003void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83 1004 int level;
115329f1 1005
46c281e8
MN
1006 for(level=0; level<decomposition_count; level++){
1007 switch(type){
d4b287ed
LM
1008 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1009 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
791e7b83
MN
1010 }
1011 }
1012}
1013
d593e329
MN
1014static void horizontal_compose53i(IDWTELEM *b, int width){
1015 IDWTELEM temp[width];
791e7b83
MN
1016 const int width2= width>>1;
1017 const int w2= (width+1)>>1;
62ab0b78 1018 int x;
791e7b83 1019
791e7b83 1020 for(x=0; x<width2; x++){
2a84512a
MN
1021 temp[2*x ]= b[x ];
1022 temp[2*x + 1]= b[x+w2];
791e7b83
MN
1023 }
1024 if(width&1)
2a84512a
MN
1025 temp[2*x ]= b[x ];
1026
1027 b[0] = temp[0] - ((temp[1]+1)>>1);
1028 for(x=2; x<width-1; x+=2){
1029 b[x ] = temp[x ] - ((temp[x-1] + temp[x+1]+2)>>2);
1030 b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1);
1031 }
1032 if(width&1){
1033 b[x ] = temp[x ] - ((temp[x-1]+1)>>1);
1034 b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1);
1035 }else
1036 b[x-1] = temp[x-1] + b[x-2];
791e7b83
MN
1037}
1038
d593e329 1039static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1040 int i;
115329f1 1041
791e7b83
MN
1042 for(i=0; i<width; i++){
1043 b1[i] += (b0[i] + b2[i])>>1;
1044 }
1045}
1046
d593e329 1047static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1048 int i;
115329f1 1049
791e7b83
MN
1050 for(i=0; i<width; i++){
1051 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1052 }
1053}
1054
fe5c7e58 1055static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
a0d1931c
Y
1056 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1057 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1058 cs->y = -1;
1059}
1060
fe5c7e58 1061static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
f9e6ebf7
LM
1062 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1063 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1064 cs->y = -1;
1065}
1066
fe5c7e58 1067static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1068 int y= cs->y;
115329f1 1069
d593e329
MN
1070 IDWTELEM *b0= cs->b0;
1071 IDWTELEM *b1= cs->b1;
1072 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1073 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
a0d1931c 1074
233a8b3d
MN
1075 if(y+1<(unsigned)height && y<(unsigned)height){
1076 int x;
1077
1078 for(x=0; x<width; x++){
1079 b2[x] -= (b1[x] + b3[x] + 2)>>2;
1080 b1[x] += (b0[x] + b2[x])>>1;
1081 }
1082 }else{
13705b69
MN
1083 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1084 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
233a8b3d 1085 }
a0d1931c 1086
13705b69
MN
1087 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1088 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
a0d1931c
Y
1089
1090 cs->b0 = b2;
1091 cs->b1 = b3;
1092 cs->y += 2;
1093}
1094
fe5c7e58 1095static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7 1096 int y= cs->y;
d593e329
MN
1097 IDWTELEM *b0= cs->b0;
1098 IDWTELEM *b1= cs->b1;
1099 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1100 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83 1101
13705b69
MN
1102 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1103 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
791e7b83 1104
13705b69
MN
1105 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1106 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
791e7b83 1107
f9e6ebf7
LM
1108 cs->b0 = b2;
1109 cs->b1 = b3;
1110 cs->y += 2;
1111}
1112
1918057c 1113static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
fe5c7e58 1114 DWTCompose cs;
9a3bb2b8
MN
1115 spatial_compose53i_init(&cs, buffer, height, stride);
1116 while(cs.y <= height)
1117 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1118}
1119
791e7b83 1120
d593e329
MN
1121void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1122 IDWTELEM temp[width];
791e7b83
MN
1123 const int w2= (width+1)>>1;
1124
2c807762
MN
1125#if 0 //maybe more understadable but slower
1126 inv_lift (temp , b , b +w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1127 inv_lift (temp+1 , b +w2, temp , 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1);
1128
1129 inv_liftS(b , temp , temp+1 , 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1);
1130 inv_lift (b+1 , temp+1 , b , 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0);
1131#else
1132 int x;
1133 temp[0] = b[0] - ((3*b[w2]+2)>>2);
1134 for(x=1; x<(width>>1); x++){
1135 temp[2*x ] = b[x ] - ((3*(b [x+w2-1] + b[x+w2])+4)>>3);
1136 temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
1137 }
1138 if(width&1){
1139 temp[2*x ] = b[x ] - ((3*b [x+w2-1]+2)>>2);
1140 temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
1141 }else
1142 temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2];
1143
1144 b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3);
1145 for(x=2; x<width-1; x+=2){
1146 b[x ] = temp[x ] + ((4*temp[x ] + temp[x-1] + temp[x+1]+8)>>4);
1147 b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
1148 }
1149 if(width&1){
1150 b[x ] = temp[x ] + ((2*temp[x ] + temp[x-1]+4)>>3);
1151 b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
1152 }else
1153 b[x-1] = temp[x-1] + 3*b [x-2];
1154#endif
791e7b83
MN
1155}
1156
d593e329 1157static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1158 int i;
115329f1 1159
791e7b83
MN
1160 for(i=0; i<width; i++){
1161 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1162 }
1163}
1164
d593e329 1165static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1166 int i;
115329f1 1167
791e7b83 1168 for(i=0; i<width; i++){
791e7b83 1169 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
791e7b83
MN
1170 }
1171}
1172
d593e329 1173static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1174 int i;
115329f1 1175
791e7b83 1176 for(i=0; i<width; i++){
f5a71928 1177#ifdef liftS
791e7b83 1178 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1179#else
1180 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1181#endif
791e7b83
MN
1182 }
1183}
1184
d593e329 1185static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1186 int i;
115329f1 1187
791e7b83
MN
1188 for(i=0; i<width; i++){
1189 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1190 }
1191}
1192
d593e329 1193void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
565a45ac 1194 int i;
115329f1 1195
565a45ac 1196 for(i=0; i<width; i++){
565a45ac 1197 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
565a45ac 1198 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
f5a71928 1199#ifdef liftS
565a45ac 1200 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1201#else
1202 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1203#endif
565a45ac
MN
1204 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1205 }
1206}
1207
fe5c7e58 1208static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
a0d1931c
Y
1209 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1210 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1211 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1212 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1213 cs->y = -3;
1214}
1215
fe5c7e58 1216static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
f9e6ebf7
LM
1217 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1218 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1219 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1220 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1221 cs->y = -3;
1222}
791e7b83 1223
fe5c7e58 1224static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1225 int y = cs->y;
115329f1 1226
d593e329
MN
1227 IDWTELEM *b0= cs->b0;
1228 IDWTELEM *b1= cs->b1;
1229 IDWTELEM *b2= cs->b2;
1230 IDWTELEM *b3= cs->b3;
1231 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1232 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
115329f1 1233
565a45ac 1234 if(y>0 && y+4<height){
059715a4 1235 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
565a45ac 1236 }else{
13705b69
MN
1237 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1238 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1239 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1240 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
565a45ac 1241 }
a0d1931c 1242
a4873f7d
LT
1243 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1244 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
a0d1931c
Y
1245
1246 cs->b0=b2;
1247 cs->b1=b3;
1248 cs->b2=b4;
1249 cs->b3=b5;
1250 cs->y += 2;
1251}
1252
fe5c7e58 1253static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7 1254 int y = cs->y;
d593e329
MN
1255 IDWTELEM *b0= cs->b0;
1256 IDWTELEM *b1= cs->b1;
1257 IDWTELEM *b2= cs->b2;
1258 IDWTELEM *b3= cs->b3;
1259 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1260 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83 1261
a4873f7d
LT
1262 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1263 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1264 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1265 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
791e7b83 1266
a4873f7d
LT
1267 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1268 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
f9e6ebf7
LM
1269
1270 cs->b0=b2;
1271 cs->b1=b3;
1272 cs->b2=b4;
1273 cs->b3=b5;
1274 cs->y += 2;
1275}
1276
1918057c 1277static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
fe5c7e58 1278 DWTCompose cs;
9a3bb2b8
MN
1279 spatial_compose97i_init(&cs, buffer, height, stride);
1280 while(cs.y <= height)
1281 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1282}
1283
fe5c7e58 1284static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
a0d1931c
Y
1285 int level;
1286 for(level=decomposition_count-1; level>=0; level--){
1287 switch(type){
d4b287ed
LM
1288 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1289 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
a0d1931c
Y
1290 }
1291 }
1292}
1293
fe5c7e58 1294static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
a0d1931c
Y
1295 const int support = type==1 ? 3 : 5;
1296 int level;
1297 if(type==2) return;
1298
1299 for(level=decomposition_count-1; level>=0; level--){
1300 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1301 switch(type){
d4b287ed 1302 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
94ae6788 1303 break;
d4b287ed 1304 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
94ae6788 1305 break;
a0d1931c
Y
1306 }
1307 }
1308 }
1309}
1310
a0d1931c 1311static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1312 const int w= b->width;
1313 const int h= b->height;
1314 int x,y;
115329f1 1315
4536c8e6
JM
1316 int run, runs;
1317 x_and_coeff *xc= b->x_coeff;
1318 x_and_coeff *prev_xc= NULL;
1319 x_and_coeff *prev2_xc= xc;
1320 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1321 x_and_coeff *prev_parent_xc= parent_xc;
1322
1323 runs= get_symbol2(&s->c, b->state[30], 0);
1324 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1325 else run= INT_MAX;
1326
1327 for(y=0; y<h; y++){
1328 int v=0;
1329 int lt=0, t=0, rt=0;
1330
1331 if(y && prev_xc->x == 0){
1332 rt= prev_xc->coeff;
1333 }
1334 for(x=0; x<w; x++){
1335 int p=0;
1336 const int l= v;
1337
1338 lt= t; t= rt;
1339
1340 if(y){
1341 if(prev_xc->x <= x)
1342 prev_xc++;
1343 if(prev_xc->x == x + 1)
1344 rt= prev_xc->coeff;
1345 else
1346 rt=0;
0cea8a03 1347 }
4536c8e6
JM
1348 if(parent_xc){
1349 if(x>>1 > parent_xc->x){
1350 parent_xc++;
ff765159 1351 }
4536c8e6
JM
1352 if(x>>1 == parent_xc->x){
1353 p= parent_xc->coeff;
78486403 1354 }
4536c8e6
JM
1355 }
1356 if(/*ll|*/l|lt|t|rt|p){
1357 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1358
4536c8e6
JM
1359 v=get_rac(&s->c, &b->state[0][context]);
1360 if(v){
1361 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1362 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 1363
4536c8e6
JM
1364 xc->x=x;
1365 (xc++)->coeff= v;
1366 }
1367 }else{
1368 if(!run){
1369 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1370 else run= INT_MAX;
1371 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1372 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1373
1374 xc->x=x;
1375 (xc++)->coeff= v;
791e7b83 1376 }else{
4536c8e6
JM
1377 int max_run;
1378 run--;
1379 v=0;
1380
1381 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1382 else max_run= FFMIN(run, w-x-1);
1383 if(parent_xc)
1384 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1385 x+= max_run;
1386 run-= max_run;
791e7b83 1387 }
7b49c309 1388 }
4536c8e6
JM
1389 }
1390 (xc++)->x= w+1; //end marker
1391 prev_xc= prev2_xc;
1392 prev2_xc= xc;
115329f1 1393
4536c8e6
JM
1394 if(parent_xc){
1395 if(y&1){
1396 while(parent_xc->x != parent->width+1)
cbb1d2b1 1397 parent_xc++;
4536c8e6
JM
1398 parent_xc++;
1399 prev_parent_xc= parent_xc;
1400 }else{
1401 parent_xc= prev_parent_xc;
791e7b83
MN
1402 }
1403 }
4536c8e6 1404 }
a0d1931c 1405
4536c8e6 1406 (xc++)->x= w+1; //end marker
a0d1931c
Y
1407}
1408
1409static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1410 const int w= b->width;
62ab0b78 1411 int y;
f66e4f5f 1412 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
c97de57c 1413 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1414 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1415 int new_index = 0;
115329f1 1416
d593e329 1417 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
a0d1931c
Y
1418 qadd= 0;
1419 qmul= 1<<QEXPSHIFT;
1420 }
1421
1422 /* If we are on the second or later slice, restore our index. */
1423 if (start_y != 0)
1424 new_index = save_state[0];
1425
115329f1 1426
a0d1931c
Y
1427 for(y=start_y; y<h; y++){
1428 int x = 0;
1429 int v;
d593e329
MN
1430 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1431 memset(line, 0, b->width*sizeof(IDWTELEM));
a0d1931c
Y
1432 v = b->x_coeff[new_index].coeff;
1433 x = b->x_coeff[new_index++].x;
ef3dfbd4 1434 while(x < w){
538a3841
MN
1435 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1436 register int u= -(v&1);
1437 line[x] = (t^u) - u;
1438
a0d1931c
Y
1439 v = b->x_coeff[new_index].coeff;
1440 x = b->x_coeff[new_index++].x;
1441 }
791e7b83 1442 }
115329f1 1443
a0d1931c
Y
1444 /* Save our variables for the next slice. */
1445 save_state[0] = new_index;
115329f1 1446
a0d1931c 1447 return;
791e7b83
MN
1448}
1449
396a5e68 1450static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
791e7b83
MN
1451 int plane_index, level, orientation;
1452
19aa028d 1453 for(plane_index=0; plane_index<3; plane_index++){
4f90f33a 1454 for(level=0; level<MAX_DECOMPOSITIONS; level++){
791e7b83 1455 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1456 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1457 }
1458 }
1459 }
28869757
MN
1460 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1461 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1462}
1463
1464static int alloc_blocks(SnowContext *s){
1465 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1466 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 1467
155ec6ed
MN
1468 s->b_width = w;
1469 s->b_height= h;
115329f1 1470
dc7f45a0 1471 av_free(s->block);
155ec6ed
MN
1472 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1473 return 0;
1474}
1475
28869757
MN
1476static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1477 uint8_t *bytestream= d->bytestream;
1478 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1479 *d= *s;
28869757
MN
1480 d->bytestream= bytestream;
1481 d->bytestream_start= bytestream_start;
155ec6ed
MN
1482}
1483
8c36eaaa 1484static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
155ec6ed
MN
1485 const int w= s->b_width << s->block_max_depth;
1486 const int rem_depth= s->block_max_depth - level;
1487 const int index= (x + y*w) << rem_depth;
1488 const int block_w= 1<<rem_depth;
1489 BlockNode block;
1490 int i,j;
115329f1 1491
155ec6ed
MN
1492 block.color[0]= l;
1493 block.color[1]= cb;
1494 block.color[2]= cr;
1495 block.mx= mx;
1496 block.my= my;
8c36eaaa 1497 block.ref= ref;
155ec6ed
MN
1498 block.type= type;
1499 block.level= level;
1500
1501 for(j=0; j<block_w; j++){
1502 for(i=0; i<block_w; i++){
1503 s->block[index + i + j*w]= block;
1504 }
1505 }
1506}
1507
1508static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1509 const int offset[3]= {
1510 y*c-> stride + x,
1511 ((y*c->uvstride + x)>>1),
1512 ((y*c->uvstride + x)>>1),
1513 };
1514 int i;
1515 for(i=0; i<3; i++){
1516 c->src[0][i]= src [i];
1517 c->ref[0][i]= ref [i] + offset[i];
1518 }
1519 assert(!ref_index);
1520}
1521
85fc0e75 1522static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
aadcc5ce 1523 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
85fc0e75
LM
1524 if(s->ref_frames == 1){
1525 *mx = mid_pred(left->mx, top->mx, tr->mx);
1526 *my = mid_pred(left->my, top->my, tr->my);
1527 }else{
1528 const int *scale = scale_mv_ref[ref];
6884c36c
PI
1529 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1530 (top ->mx * scale[top ->ref] + 128) >>8,
1531 (tr ->mx * scale[tr ->ref] + 128) >>8);
1532 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1533 (top ->my * scale[top ->ref] + 128) >>8,
1534 (tr ->my * scale[tr ->ref] + 128) >>8);
85fc0e75
LM
1535 }
1536}
1537
d773d855
DB
1538static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1539 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1540 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1541 }else{
1542 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1543 }
1544}
155ec6ed 1545
d773d855
DB
1546static void decode_q_branch(SnowContext *s, int level, int x, int y){
1547 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
1548 const int rem_depth= s->block_max_depth - level;
1549 const int index= (x + y*w) << rem_depth;
155ec6ed 1550 int trx= (x+1)<<rem_depth;
aadcc5ce
PI
1551 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1552 const BlockNode *top = y ? &s->block[index-w] : &null_block;
aadcc5ce
PI
1553 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1554 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed
MN
1555 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1556
155ec6ed 1557 if(s->keyframe){
d773d855
DB
1558 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
1559 return;
155ec6ed
MN
1560 }
1561
28869757 1562 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1e90b34f 1563 int type, mx, my;
155ec6ed
MN
1564 int l = left->color[0];
1565 int cb= left->color[1];
1566 int cr= left->color[2];
8c36eaaa
LM
1567 int ref = 0;
1568 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
1569 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
1570 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
115329f1 1571
28869757 1572 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
1573
1574 if(type){
85fc0e75 1575 pred_mv(s, &mx, &my, 0, left, top, tr);
155ec6ed
MN
1576 l += get_symbol(&s->c, &s->block_state[32], 1);
1577 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1578 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1579 }else{
8c36eaaa
LM
1580 if(s->ref_frames > 1)
1581 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
85fc0e75 1582 pred_mv(s, &mx, &my, ref, left, top, tr);
8c36eaaa
LM
1583 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
1584 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
155ec6ed 1585 }
8c36eaaa 1586 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
155ec6ed
MN
1587 }else{
1588 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1589 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1590 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1591 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1592 }
1593}
1594
d773d855 1595static void decode_blocks(SnowContext *s){
155ec6ed
MN
1596 int x, y;
1597 int w= s->b_width;
1598 int h= s->b_height;
1599
1600 for(y=0; y<h; y++){
1601 for(x=0; x<w; x++){
1602 decode_q_branch(s, 0, x, y);
1603 }
1604 }
791e7b83
MN
1605}
1606
7d7f57d9 1607static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
7942269b 1608 static const uint8_t weight[64]={
a68ca08e
MN
1609 8,7,6,5,4,3,2,1,
1610 7,7,0,0,0,0,0,1,
1611 6,0,6,0,0,0,2,0,
1612 5,0,0,5,0,3,0,0,
1613 4,0,0,0,4,0,0,0,
1614 3,0,0,5,0,3,0,0,
1615 2,0,6,0,0,0,2,0,
1616 1,7,0,0,0,0,0,1,
1617 };
1618
7942269b 1619 static const uint8_t brane[256]={
a68ca08e
MN
1620 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
1621 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
1622 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
1623 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
1624 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
1625 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
1626 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
1627 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
1628 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
1629 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
1630 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
1631 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
1632 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
1633 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
1634 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
1635 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
1636 };
1637
7942269b 1638 static const uint8_t needs[16]={
a68ca08e
MN
1639 0,1,0,0,
1640 2,4,2,0,
1641 0,1,0,0,
1642 15
1643 };
1644
1645 int x, y, b, r, l;
61d6e445
MN
1646 int16_t tmpIt [64*(32+HTAPS_MAX)];
1647 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
a68ca08e
MN
1648 int16_t *tmpI= tmpIt;
1649 uint8_t *tmp2= tmp2t[0];
f0a70840 1650 const uint8_t *hpel[11];
a68ca08e
MN
1651 assert(dx<16 && dy<16);
1652 r= brane[dx + 16*dy]&15;
1653 l= brane[dx + 16*dy]>>4;
1654
1655 b= needs[l] | needs[r];
7d7f57d9
MN
1656 if(p && !p->diag_mc)
1657 b= 15;
a68ca08e
MN
1658
1659 if(b&5){
61d6e445 1660 for(y=0; y < b_h+HTAPS_MAX-1; y++){
65dc0f53 1661 for(x=0; x < b_w; x++){
61d6e445
MN
1662 int a_1=src[x + HTAPS_MAX/2-4];
1663 int a0= src[x + HTAPS_MAX/2-3];
1664 int a1= src[x + HTAPS_MAX/2-2];
1665 int a2= src[x + HTAPS_MAX/2-1];
1666 int a3= src[x + HTAPS_MAX/2+0];
1667 int a4= src[x + HTAPS_MAX/2+1];
1668 int a5= src[x + HTAPS_MAX/2+2];
1669 int a6= src[x + HTAPS_MAX/2+3];
7d7f57d9
MN
1670 int am=0;
1671 if(!p || p->fast_mc){
1672 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1673 tmpI[x]= am;
1674 am= (am+16)>>5;
1675 }else{
1676 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
1677 tmpI[x]= am;
1678 am= (am+32)>>6;
1679 }
791e7b83 1680
65dc0f53
MN
1681 if(am&(~255)) am= ~(am>>31);
1682 tmp2[x]= am;
1683 }
1684 tmpI+= 64;
1685 tmp2+= stride;
1686 src += stride;
791e7b83 1687 }
65dc0f53 1688 src -= stride*y;
a68ca08e 1689 }
61d6e445 1690 src += HTAPS_MAX/2 - 1;
a68ca08e 1691 tmp2= tmp2t[1];
115329f1 1692
a68ca08e 1693 if(b&2){
65dc0f53
MN
1694 for(y=0; y < b_h; y++){
1695 for(x=0; x < b_w+1; x++){
61d6e445
MN
1696 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1697 int a0= src[x + (HTAPS_MAX/2-3)*stride];
1698 int a1= src[x + (HTAPS_MAX/2-2)*stride];
1699 int a2= src[x + (HTAPS_MAX/2-1)*stride];
1700 int a3= src[x + (HTAPS_MAX/2+0)*stride];
1701 int a4= src[x + (HTAPS_MAX/2+1)*stride];
1702 int a5= src[x + (HTAPS_MAX/2+2)*stride];
1703 int a6= src[x + (HTAPS_MAX/2+3)*stride];
7d7f57d9
MN
1704 int am=0;
1705 if(!p || p->fast_mc)
1706 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1707 else
1708 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
791e7b83 1709
65dc0f53
MN
1710 if(am&(~255)) am= ~(am>>31);
1711 tmp2[x]= am;
1712 }
1713 src += stride;
1714 tmp2+= stride;
a68ca08e 1715 }
65dc0f53 1716 src -= stride*y;
a68ca08e 1717 }
61d6e445 1718 src += stride*(HTAPS_MAX/2 - 1);
a68ca08e
MN
1719 tmp2= tmp2t[2];
1720 tmpI= tmpIt;
1721 if(b&4){
1722 for(y=0; y < b_h; y++){
1723 for(x=0; x < b_w; x++){
61d6e445
MN
1724 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1725 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1726 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1727 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1728 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1729 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1730 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1731 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
7d7f57d9
MN
1732 int am=0;
1733 if(!p || p->fast_mc)
1734 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1735 else
1736 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
a68ca08e
MN
1737 if(am&(~255)) am= ~(am>>31);
1738 tmp2[x]= am;
1739 }
1740 tmpI+= 64;
1741 tmp2+= stride;
1742 }
1743 }
115329f1 1744
a68ca08e 1745 hpel[ 0]= src;
61d6e445 1746 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
a68ca08e
MN
1747 hpel[ 2]= src + 1;
1748
1749 hpel[ 4]= tmp2t[1];
1750 hpel[ 5]= tmp2t[2];
1751 hpel[ 6]= tmp2t[1] + 1;
1752
1753 hpel[ 8]= src + stride;
1754 hpel[ 9]= hpel[1] + stride;
1755 hpel[10]= hpel[8] + 1;
1756
1757 if(b==15){
f0a70840
BC
1758 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
1759 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1760 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1761 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
a68ca08e
MN
1762 dx&=7;
1763 dy&=7;
1764 for(y=0; y < b_h; y++){
1765 for(x=0; x < b_w; x++){
1766 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1767 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
1768 }
1769 src1+=stride;
1770 src2+=stride;
1771 src3+=stride;
1772 src4+=stride;
1773 dst +=stride;
1774 }
1775 }else{
f0a70840
BC
1776 const uint8_t *src1= hpel[l];
1777 const uint8_t *src2= hpel[r];
a68ca08e
MN
1778 int a= weight[((dx&7) + (8*(dy&7)))];
1779 int b= 8-a;
1780 for(y=0; y < b_h; y++){
1781 for(x=0; x < b_w; x++){
1782 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1783 }
1784 src1+=stride;
1785 src2+=stride;
1786 dst +=stride;
791e7b83
MN
1787 }
1788 }
1789}
1790
791e7b83 1791#define mca(dx,dy,b_w)\
bad700e3 1792static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
61d6e445 1793 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
791e7b83 1794 assert(h==b_w);\
61d6e445 1795 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
791e7b83
MN
1796}
1797
1798mca( 0, 0,16)
1799mca( 8, 0,16)
1800mca( 0, 8,16)
1801mca( 8, 8,16)
d92b5807
MN
1802mca( 0, 0,8)
1803mca( 8, 0,8)
1804mca( 0, 8,8)
1805mca( 8, 8,8)
791e7b83 1806
8c36eaaa 1807static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 1808 if(block->type & BLOCK_INTRA){
ff158dc9 1809 int x, y;
2692ceab
MN
1810 const int color = block->color[plane_index];
1811 const int color4= color*0x01010101;
1015631b
LM
1812 if(b_w==32){
1813 for(y=0; y < b_h; y++){
1814 *(uint32_t*)&dst[0 + y*stride]= color4;
1815 *(uint32_t*)&dst[4 + y*stride]= color4;
1816 *(uint32_t*)&dst[8 + y*stride]= color4;
1817 *(uint32_t*)&dst[12+ y*stride]= color4;
1818 *(uint32_t*)&dst[16+ y*stride]= color4;
1819 *(uint32_t*)&dst[20+ y*stride]= color4;
1820 *(uint32_t*)&dst[24+ y*stride]= color4;
1821 *(uint32_t*)&dst[28+ y*stride]= color4;
1822 }
1823 }else if(b_w==16){
2692ceab
MN
1824 for(y=0; y < b_h; y++){
1825 *(uint32_t*)&dst[0 + y*stride]= color4;
1826 *(uint32_t*)&dst[4 + y*stride]= color4;
1827 *(uint32_t*)&dst[8 + y*stride]= color4;
1828 *(uint32_t*)&dst[12+ y*stride]= color4;
1829 }
1830 }else if(b_w==8){
1831 for(y=0; y < b_h; y++){
1832 *(uint32_t*)&dst[0 + y*stride]= color4;
1833 *(uint32_t*)&dst[4 + y*stride]= color4;
1834 }
1835 }else if(b_w==4){
1836 for(y=0; y < b_h; y++){
1837 *(uint32_t*)&dst[0 + y*stride]= color4;
1838 }
1839 }else{
1840 for(y=0; y < b_h; y++){
1841 for(x=0; x < b_w; x++){
1842 dst[x + y*stride]= color;
1843 }
ff158dc9
MN
1844 }
1845 }
1846 }else{
8c36eaaa 1847 uint8_t *src= s->last_picture[block->ref].data[plane_index];
ff158dc9
MN
1848 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
1849 int mx= block->mx*scale;
1850 int my= block->my*scale;
ec697587
MN
1851 const int dx= mx&15;
1852 const int dy= my&15;
80e44bc3 1853 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
61d6e445
MN
1854 sx += (mx>>4) - (HTAPS_MAX/2-1);
1855 sy += (my>>4) - (HTAPS_MAX/2-1);
ff158dc9 1856 src += sx + sy*stride;
61d6e445
MN
1857 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1858 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1859 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
ff158dc9
MN
1860 src= tmp + MB_SIZE;
1861 }
87f20c2f
MN
1862// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1863// assert(!(b_w&(b_w-1)));
2692ceab 1864 assert(b_w>1 && b_h>1);
89438028 1865 assert((tab_index>=0 && tab_index<4) || b_w==32);
7d7f57d9
MN
1866 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1867 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1015631b
LM
1868 else if(b_w==32){
1869 int y;
1870 for(y=0; y<b_h; y+=16){
7d7f57d9
MN
1871 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1872 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1015631b
LM
1873 }
1874 }else if(b_w==b_h)
7d7f57d9 1875 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2692ceab 1876 else if(b_w==2*b_h){
7d7f57d9
MN
1877 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
1878 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2692ceab
MN
1879 }else{
1880 assert(2*b_w==b_h);
7d7f57d9
MN
1881 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
1882 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2692ceab 1883 }
ff158dc9
MN
1884 }
1885}
1886
9dd6c804 1887void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
059715a4
RE
1888 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1889 int y, x;
d593e329 1890 IDWTELEM * dst;
059715a4 1891 for(y=0; y<b_h; y++){
19032450 1892 //FIXME ugly misuse of obmc_stride
9dd6c804
PI
1893 const uint8_t *obmc1= obmc + y*obmc_stride;
1894 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1895 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1896 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
059715a4
RE
1897 dst = slice_buffer_get_line(sb, src_y + y);
1898 for(x=0; x<b_w; x++){
1899 int v= obmc1[x] * block[3][x + y*src_stride]
1900 +obmc2[x] * block[2][x + y*src_stride]
1901 +obmc3[x] * block[1][x + y*src_stride]
1902 +obmc4[x] * block[0][x + y*src_stride];
1903
1904 v <<= 8 - LOG2_OBMC_MAX;
1905 if(FRAC_BITS != 8){
059715a4
RE
1906 v >>= 8 - FRAC_BITS;
1907 }
1908 if(add){
1909 v += dst[x + src_x];
1910 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1911 if(v&(~255)) v= ~(v>>31);
1912 dst8[x + y*src_stride] = v;
1913 }else{
1914 dst[x + src_x] -= v;
1915 }
1916 }
1917 }
1918}
1919
e6464f8b 1920//FIXME name cleanup (b_w, block_w, b_width stuff)
d593e329 1921static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
a0d1931c
Y
1922 const int b_width = s->b_width << s->block_max_depth;
1923 const int b_height= s->b_height << s->block_max_depth;
1924 const int b_stride= b_width;
1925 BlockNode *lt= &s->block[b_x + b_y*b_stride];
1926 BlockNode *rt= lt+1;
1927 BlockNode *lb= lt+b_stride;
1928 BlockNode *rb= lb+1;
115329f1 1929 uint8_t *block[4];
cc884a35 1930 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
bd2b6b33 1931 uint8_t *tmp = s->scratchbuf;
cc884a35 1932 uint8_t *ptmp;
a0d1931c
Y
1933 int x,y;
1934
1935 if(b_x<0){
1936 lt= rt;
1937 lb= rb;
1938 }else if(b_x + 1 >= b_width){
1939 rt= lt;
1940 rb= lb;
1941 }
1942 if(b_y<0){
1943 lt= lb;
1944 rt= rb;
1945 }else if(b_y + 1 >= b_height){
1946 lb= lt;
1947 rb= rt;
1948 }
115329f1 1949
e6464f8b 1950 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
a0d1931c
Y
1951 obmc -= src_x;
1952 b_w += src_x;
f7e89c73 1953 if(!sliced && !offset_dst)
1015631b 1954 dst -= src_x;
ff158dc9
MN
1955 src_x=0;
1956 }else if(src_x + b_w > w){
1957 b_w = w - src_x;
1958 }
1959 if(src_y<0){
1960 obmc -= src_y*obmc_stride;
1961 b_h += src_y;
f7e89c73 1962 if(!sliced && !offset_dst)
1015631b 1963 dst -= src_y*dst_stride;
ff158dc9
MN
1964 src_y=0;
1965 }else if(src_y + b_h> h){
1966 b_h = h - src_y;
791e7b83 1967 }
115329f1 1968
ff158dc9 1969 if(b_w<=0 || b_h<=0) return;
155ec6ed 1970
94ae6788
DB
1971 assert(src_stride > 2*MB_SIZE + 5);
1972
f7e89c73 1973 if(!sliced && offset_dst)
1015631b 1974 dst += src_x + src_y*dst_stride;
715a97f0 1975 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
1976// src += src_x + src_y*src_stride;
1977
cc884a35
MN
1978 ptmp= tmp + 3*tmp_step;
1979 block[0]= ptmp;
1980 ptmp+=tmp_step;
8c36eaaa 1981 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
1982
1983 if(same_block(lt, rt)){
1984 block[1]= block[0];
791e7b83 1985 }else{
cc884a35
MN
1986 block[1]= ptmp;
1987 ptmp+=tmp_step;
8c36eaaa 1988 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
ff158dc9 1989 }
115329f1 1990
ff158dc9
MN
1991 if(same_block(lt, lb)){
1992 block[2]= block[0];
1993 }else if(same_block(rt, lb)){
1994 block[2]= block[1];
1995 }else{
cc884a35
MN
1996 block[2]= ptmp;
1997 ptmp+=tmp_step;
8c36eaaa 1998 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
ff158dc9 1999 }
791e7b83 2000
ff158dc9
MN
2001 if(same_block(lt, rb) ){
2002 block[3]= block[0];
2003 }else if(same_block(rt, rb)){
2004 block[3]= block[1];
2005 }else if(same_block(lb, rb)){
2006 block[3]= block[2];
2007 }else{
cc884a35 2008 block[3]= ptmp;
8c36eaaa 2009 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
ff158dc9
MN
2010 }
2011#if 0
2012 for(y=0; y<b_h; y++){
2013 for(x=0; x<b_w; x++){
2014 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2015 if(add) dst[x + y*dst_stride] += v;
2016 else dst[x + y*dst_stride] -= v;
2017 }
2018 }
2019 for(y=0; y<b_h; y++){
2020 uint8_t *obmc2= obmc + (obmc_stride>>1);
2021 for(x=0; x<b_w; x++){
2022 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2023 if(add) dst[x + y*dst_stride] += v;
2024 else dst[x + y*dst_stride] -= v;
2025 }
2026 }
2027 for(y=0; y<b_h; y++){
2028 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2029 for(x=0; x<b_w; x++){
2030 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2031 if(add) dst[x + y*dst_stride] += v;
2032 else dst[x + y*dst_stride] -= v;
2033 }
2034 }
2035 for(y=0; y<b_h; y++){
2036 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2037 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2038 for(x=0; x<b_w; x++){
2039 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2040 if(add) dst[x + y*dst_stride] += v;
2041 else dst[x + y*dst_stride] -= v;
2042 }
2043 }
2044#else
f7e89c73 2045 if(sliced){
f7e89c73 2046 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
e73e4e75
DB
2047 }else{
2048 for(y=0; y<b_h; y++){
2049 //FIXME ugly misuse of obmc_stride
2050 const uint8_t *obmc1= obmc + y*obmc_stride;
2051 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2052 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2053 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2054 for(x=0; x<b_w; x++){
2055 int v= obmc1[x] * block[3][x + y*src_stride]
2056 +obmc2[x] * block[2][x + y*src_stride]
2057 +obmc3[x] * block[1][x + y*src_stride]
2058 +obmc4[x] * block[0][x + y*src_stride];
2059
2060 v <<= 8 - LOG2_OBMC_MAX;
2061 if(FRAC_BITS != 8){
2062 v >>= 8 - FRAC_BITS;
2063 }
2064 if(add){
2065 v += dst[x + y*dst_stride];
2066 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2067 if(v&(~255)) v= ~(v>>31);
2068 dst8[x + y*src_stride] = v;
2069 }else{
2070 dst[x + y*dst_stride] -= v;
2071 }
715a97f0 2072 }
791e7b83
MN
2073 }
2074 }
96e2fbf2 2075#endif /* 0 */
791e7b83
MN
2076}
2077
d593e329 2078static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
a0d1931c
Y
2079 Plane *p= &s->plane[plane_index];
2080 const int mb_w= s->b_width << s->block_max_depth;
2081 const int mb_h= s->b_height << s->block_max_depth;
2082 int x, y, mb_x;
2083 int block_size = MB_SIZE >> s->block_max_depth;
2084 int block_w = plane_index ? block_size/2 : block_size;
2085 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2086 int obmc_stride= plane_index ? block_size : 2*block_size;
2087 int ref_stride= s->current_picture.linesize[plane_index];
a0d1931c
Y
2088 uint8_t *dst8= s->current_picture.data[plane_index];
2089 int w= p->width;
2090 int h= p->height;
115329f1 2091
a0d1931c
Y
2092 if(s->keyframe || (s->avctx->debug&512)){
2093 if(mb_y==mb_h)
2094 return;
2095
2096 if(add){
ef3dfbd4 2097 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
a0d1931c 2098// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 2099 IDWTELEM * line = sb->line[y];
ef3dfbd4 2100 for(x=0; x<w; x++){
a0d1931c
Y
2101// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2102 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2103 v >>= FRAC_BITS;
2104 if(v&(~255)) v= ~(v>>31);
2105 dst8[x + y*ref_stride]= v;
2106 }
2107 }
2108 }else{
ef3dfbd4 2109 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
a0d1931c 2110// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 2111 IDWTELEM * line = sb->line[y];
ef3dfbd4 2112 for(x=0; x<w; x++){
a0d1931c
Y
2113 line[x] -= 128 << FRAC_BITS;
2114// buf[x + y*w]-= 128<<FRAC_BITS;
2115 }
2116 }
2117 }
2118
2119 return;
2120 }
115329f1 2121
e73e4e75
DB
2122 for(mb_x=0; mb_x<=mb_w; mb_x++){
2123 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2124 block_w*mb_x - block_w/2,
2125 block_w*mb_y - block_w/2,
2126 block_w, block_w,
2127 w, h,
2128 w, ref_stride, obmc_stride,
2129 mb_x - 1, mb_y - 1,
2130 add, 0, plane_index);
2131 }
a0d1931c
Y
2132}
2133
d593e329 2134static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2135 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2136 const int mb_w= s->b_width << s->block_max_depth;
2137 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2138 int x, y, mb_x;
155ec6ed
MN
2139 int block_size = MB_SIZE >> s->block_max_depth;
2140 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2141 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 2142 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2143 int ref_stride= s->current_picture.linesize[plane_index];
715a97f0 2144 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2145 int w= p->width;
2146 int h= p->height;
115329f1 2147
ff158dc9 2148 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2149 if(mb_y==mb_h)
2150 return;
2151
715a97f0 2152 if(add){
86e59cc0 2153 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2154 for(x=0; x<w; x++){
2155 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2156 v >>= FRAC_BITS;
2157 if(v&(~255)) v= ~(v>>31);
2158 dst8[x + y*ref_stride]= v;
2159 }
2160 }
2161 }else{
86e59cc0 2162 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2163 for(x=0; x<w; x++){
2164 buf[x + y*w]-= 128<<FRAC_BITS;
2165 }
ff158dc9 2166 }
791e7b83 2167 }
ff158dc9
MN
2168
2169 return;
791e7b83 2170 }
115329f1 2171
94ae6788
DB
2172 for(mb_x=0; mb_x<=mb_w; mb_x++){
2173 add_yblock(s, 0, NULL, buf, dst8, obmc,
2174 block_w*mb_x - block_w/2,
2175 block_w*mb_y - block_w/2,
2176 block_w, block_w,
2177 w, h,
2178 w, ref_stride, obmc_stride,
2179 mb_x - 1, mb_y - 1,
2180 add, 1, plane_index);
2181 }
f9e6ebf7
LM
2182}
2183
d593e329 2184static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
f9e6ebf7
LM
2185 const int mb_h= s->b_height << s->block_max_depth;
2186 int mb_y;
2187 for(mb_y=0; mb_y<=mb_h; mb_y++)
2188 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2189}
2190
d773d855
DB
2191static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
2192 const int w= b->width;
2193 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
2194 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2195 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2196 int x,y;
51d6a3cf 2197
d773d855 2198 if(s->qlog == LOSSLESS_QLOG) return;
51d6a3cf 2199
d773d855
DB
2200 for(y=start_y; y<end_y; y++){
2201// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2202 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2203 for(x=0; x<w; x++){
2204 int i= line[x];
2205 if(i<0){
2206 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2207 }else if(i>0){
2208 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
2209 }
2210 }
2211 }
2212}
51d6a3cf 2213
d773d855
DB
2214static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
2215 const int w= b->width;
2216 int x,y;
51d6a3cf 2217
d773d855
DB
2218 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
2219 IDWTELEM * prev;
51d6a3cf 2220
d773d855
DB
2221 if (start_y != 0)
2222 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2223
2224 for(y=start_y; y<end_y; y++){
2225 prev = line;
2226// line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2227 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2228 for(x=0; x<w; x++){
2229 if(x){
2230 if(use_median){
2231 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
2232 else line[x] += line[x - 1];
2233 }else{
2234 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
2235 else line[x] += line[x - 1];
2236 }
2237 }else{
2238 if(y) line[x] += prev[x];
51d6a3cf
MN
2239 }
2240 }
2241 }
51d6a3cf
MN
2242}
2243
d773d855
DB
2244static void decode_qlogs(SnowContext *s){
2245 int plane_index, level, orientation;
b104969f 2246
d773d855
DB
2247 for(plane_index=0; plane_index<3; plane_index++){
2248 for(level=0; level<s->spatial_decomposition_count; level++){
2249 for(orientation=level ? 1:0; orientation<4; orientation++){
2250 int q;
2251 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2252 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2253 else q= get_symbol(&s->c, s->header_state, 1);
2254 s->plane[plane_index].band[level][orientation].qlog= q;
2255 }
2256 }
85fc0e75 2257 }
b104969f
LM
2258}
2259
d773d855
DB
2260#define GET_S(dst, check) \
2261 tmp= get_symbol(&s->c, s->header_state, 0);\
2262 if(!(check)){\
2263 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
2264 return -1;\
2265 }\
2266 dst= tmp;
1015631b 2267
d773d855
DB
2268static int decode_header(SnowContext *s){
2269 int plane_index, tmp;
2270 uint8_t kstate[32];
1015631b 2271
d773d855
DB
2272 memset(kstate, MID_STATE, sizeof(kstate));
2273
2274 s->keyframe= get_rac(&s->c, kstate);
2275 if(s->keyframe || s->always_reset){
2276 reset_contexts(s);
2277 s->spatial_decomposition_type=
2278 s->qlog=
2279 s->qbias=
2280 s->mv_scale=
2281 s->block_max_depth= 0;
1015631b 2282 }
d773d855
DB
2283 if(s->keyframe){
2284 GET_S(s->version, tmp <= 0U)
2285 s->always_reset= get_rac(&s->c, s->header_state);
2286 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2287 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2288 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
2289 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2290 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2291 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2292 s->spatial_scalability= get_rac(&s->c, s->header_state);
2293// s->rate_scalability= get_rac(&s->c, s->header_state);
2294 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
2295 s->max_ref_frames++;
51d6a3cf 2296
d773d855 2297 decode_qlogs(s);
561a18d3
RE
2298 }
2299
d773d855
DB
2300 if(!s->keyframe){
2301 if(get_rac(&s->c, s->header_state)){
2302 for(plane_index=0; plane_index<2; plane_index++){
2303 int htaps, i, sum=0;
2304 Plane *p= &s->plane[plane_index];
2305 p->diag_mc= get_rac(&s->c, s->header_state);
2306 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
2307 if((unsigned)htaps > HTAPS_MAX || htaps==0)
2308 return -1;
2309 p->htaps= htaps;
2310 for(i= htaps/2; i; i--){
2311 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
2312 sum += p->hcoeff[i];
2313 }
2314 p->hcoeff[0]= 32-sum;
2315 }
2316 s->plane[2].diag_mc= s->plane[1].diag_mc;
2317 s->plane[2].htaps = s->plane[1].htaps;
2318 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1015631b 2319 }
d773d855
DB
2320 if(get_rac(&s->c, s->header_state)){
2321 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
2322 decode_qlogs(s);
b104969f
LM
2323 }
2324 }
b104969f 2325
d773d855
DB
2326 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
2327 if(s->spatial_decomposition_type > 1U){
2328 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2329 return -1;
2330 }
2331 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
2332 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
2333 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
2334 return -1;
2335 }
b104969f 2336
d773d855
DB
2337 s->qlog += get_symbol(&s->c, s->header_state, 1);
2338 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
2339 s->qbias += get_symbol(&s->c, s->header_state, 1);
2340 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
2341 if(s->block_max_depth > 1 || s->block_max_depth < 0){
2342 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
2343 s->block_max_depth= 0;
2344 return -1;
51d6a3cf
MN
2345 }
2346
d773d855
DB
2347 return 0;
2348}
b104969f 2349
d773d855
DB
2350static void init_qexp(void){
2351 int i;
2352 double v=128;
2353
2354 for(i=0; i<QROOT; i++){
2355 qexp[i]= lrintf(v);
2356 v *= pow(2, 1.0 / QROOT);
b104969f 2357 }
51d6a3cf
MN
2358}
2359
d773d855
DB
2360static av_cold int common_init(AVCodecContext *avctx){
2361 SnowContext *s = avctx->priv_data;
2362 int width, height;
2363 int i, j;
51d6a3cf 2364
d773d855
DB
2365 s->avctx= avctx;
2366 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
51d6a3cf 2367
d773d855 2368 dsputil_init(&s->dsp, avctx);
51d6a3cf 2369
d773d855
DB
2370#define mcf(dx,dy)\
2371 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
2372 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2373 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
2374 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
2375 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
2376 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
51d6a3cf 2377
d773d855
DB
2378 mcf( 0, 0)
2379 mcf( 4, 0)
2380 mcf( 8, 0)
2381 mcf(12, 0)
2382 mcf( 0, 4)
2383 mcf( 4, 4)
2384 mcf( 8, 4)
2385 mcf(12, 4)
2386 mcf( 0, 8)
2387 mcf( 4, 8)
2388 mcf( 8, 8)
2389 mcf(12, 8)
2390 mcf( 0,12)
2391 mcf( 4,12)
2392 mcf( 8,12)
2393 mcf(12,12)
51d6a3cf 2394
d773d855
DB
2395#define mcfh(dx,dy)\
2396 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
2397 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2398 mc_block_hpel ## dx ## dy ## 16;\
2399 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
2400 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
2401 mc_block_hpel ## dx ## dy ## 8;
51d6a3cf 2402
d773d855
DB
2403 mcfh(0, 0)
2404 mcfh(8, 0)
2405 mcfh(0, 8)
2406 mcfh(8, 8)
52137f2f 2407
d773d855
DB
2408 if(!qexp[0])
2409 init_qexp();
b104969f 2410
d773d855 2411// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
b104969f 2412
d773d855
DB
2413 width= s->avctx->width;
2414 height= s->avctx->height;
b104969f 2415
d773d855
DB
2416 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
2417 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
b104969f 2418
d773d855
DB
2419 for(i=0; i<MAX_REF_FRAMES; i++)
2420 for(j=0; j<MAX_REF_FRAMES; j++)
2421 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
b104969f 2422
d773d855
DB
2423 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2424 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
2425
2426 return 0;
b104969f
LM
2427}
2428
d773d855
DB
2429static int common_init_after_header(AVCodecContext *avctx){
2430 SnowContext *s = avctx->priv_data;
2431 int plane_index, level, orientation;
51d6a3cf 2432
d773d855
DB
2433 for(plane_index=0; plane_index<3; plane_index++){
2434 int w= s->avctx->width;
2435 int h= s->avctx->height;
2436
2437 if(plane_index){
2438 w>>= s->chroma_h_shift;
2439 h>>= s->chroma_v_shift;
2440 }
2441 s->plane[plane_index].width = w;
2442 s->plane[plane_index].height= h;
2443
2444 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2445 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2446 SubBand *b= &s->plane[plane_index].band[level][orientation];
2447
2448 b->buf= s->spatial_dwt_buffer;
2449 b->level= level;
2450 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2451 b->width = (w + !(orientation&1))>>1;
2452 b->height= (h + !(orientation>1))>>1;
2453
2454 b->stride_line = 1 << (s->spatial_decomposition_count - level);
2455 b->buf_x_offset = 0;
2456 b->buf_y_offset = 0;
2457
2458 if(orientation&1){
2459 b->buf += (w+1)>>1;
2460 b->buf_x_offset = (w+1)>>1;
2461 }
2462 if(orientation>1){
2463 b->buf += b->stride>>1;
2464 b->buf_y_offset = b->stride_line >> 1;
2465 }
2466 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
2467
2468 if(level)
2469 b->parent= &s->plane[plane_index].band[level-1][orientation];
2470 //FIXME avoid this realloc
2471 av_freep(&b->x_coeff);
2472 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
2473 }
2474 w= (w+1)>>1;
2475 h= (h+1)>>1;
2476 }
2477 }
2478
2479 return 0;
2480}
2481
2482#define QUANTIZE2 0
2483
2484#if QUANTIZE2==1
2485#define Q2_STEP 8
2486
2487static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
2488 SubBand *b= &p->band[level][orientation];
2489 int x, y;
2490 int xo=0;
2491 int yo=0;
2492 int step= 1 << (s->spatial_decomposition_count - level);
2493
2494 if(orientation&1)
2495 xo= step>>1;
2496 if(orientation&2)
2497 yo= step>>1;
2498
2499 //FIXME bias for nonzero ?
2500 //FIXME optimize
2501 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
2502 for(y=0; y<p->height; y++){
2503 for(x=0; x<p->width; x++){
2504 int sx= (x-xo + step/2) / step / Q2_STEP;
2505 int sy= (y-yo + step/2) / step / Q2_STEP;
2506 int v= r0[x + y*p->width] - r1[x + y*p->width];
2507 assert(sx>=0 && sy>=0 && sx < score_stride);
2508 v= ((v+8)>>4)<<4;
2509 score[sx + sy*score_stride] += v*v;
2510 assert(score[sx + sy*score_stride] >= 0);
2511 }
2512 }
2513}
2514
2515static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
2516 int level, orientation;
2517
2518 for(level=0; level<s->spatial_decomposition_count; level++){
2519 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2520 SubBand *b= &p->band[level][orientation];
2521 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
2522
2523 dequantize(s, b, dst, b->stride);
2524 }
2525 }
2526}
2527
2528static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
2529 int level, orientation, ys, xs, x, y, pass;
2530 IDWTELEM best_dequant[height * stride];
2531 IDWTELEM idwt2_buffer[height * stride];
2532 const int score_stride= (width + 10)/Q2_STEP;
2533 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
2534 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
2535 int threshold= (s->m.lambda * s->m.lambda) >> 6;
2536
2537 //FIXME pass the copy cleanly ?
2538
2539// memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
2540 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
2541
2542 for(level=0; level<s->spatial_decomposition_count; level++){
2543 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2544 SubBand *b= &p->band[level][orientation];
2545 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
2546 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
2547 assert(src == b->buf); // code does not depend on this but it is true currently
2548
2549 quantize(s, b, dst, src, b->stride, s->qbias);
2550 }
2551 }
2552 for(pass=0; pass<1; pass++){
2553 if(s->qbias == 0) //keyframe
2554 continue;
2555 for(level=0; level<s->spatial_decomposition_count; level++){
2556 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2557 SubBand *b= &p->band[level][orientation];
2558 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
2559 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
2560
2561 for(ys= 0; ys<Q2_STEP; ys++){
2562 for(xs= 0; xs<Q2_STEP; xs++){
2563 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
2564 dequantize_all(s, p, idwt2_buffer, width, height);
2565 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
2566 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
2567 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
2568 for(y=ys; y<b->height; y+= Q2_STEP){
2569 for(x=xs; x<b->width; x+= Q2_STEP){
2570 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
2571 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
2572 //FIXME try more than just --
2573 }
2574 }
2575 dequantize_all(s, p, idwt2_buffer, width, height);
2576 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
2577 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
2578 for(y=ys; y<b->height; y+= Q2_STEP){
2579 for(x=xs; x<b->width; x+= Q2_STEP){
2580 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
2581 if(score[score_idx] <= best_score[score_idx] + threshold){
2582 best_score[score_idx]= score[score_idx];
2583 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
2584 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
2585 //FIXME copy instead
2586 }
2587 }
2588 }
2589 }
2590 }
2591 }
2592 }
2593 }
2594 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
2595}
2596
2597#endif /* QUANTIZE2==1 */
2598
2599#define USE_HALFPEL_PLANE 0
2600
2601static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
2602 int p,x,y;
2603
2604 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
2605
2606 for(p=0; p<3; p++){
2607 int is_chroma= !!p;
2608 int w= s->avctx->width >>is_chroma;
2609 int h= s->avctx->height >>is_chroma;
2610 int ls= frame->linesize[p];
2611 uint8_t *src= frame->data[p];
2612
2613 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2614 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2615 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2616
2617 halfpel[0][p]= src;
2618 for(y=0; y<h; y++){
2619 for(x=0; x<w; x++){
2620 int i= y*ls + x;
2621
2622 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
2623 }
2624 }
2625 for(y=0; y<h; y++){
2626 for(x=0; x<w; x++){
2627 int i= y*ls + x;
2628
2629 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
2630 }
2631 }
2632 src= halfpel[1][p];
2633 for(y=0; y<h; y++){
2634 for(x=0; x<w; x++){
2635 int i= y*ls + x;
2636
2637 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
2638 }
2639 }
2640
2641//FIXME border!
2642 }
2643}
2644
2645static void release_buffer(AVCodecContext *avctx){
2646 SnowContext *s = avctx->priv_data;
2647 int i;
2648
2649 if(s->last_picture[s->max_ref_frames-1].data[0]){
2650 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
2651 for(i=0; i<9; i++)
2652 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
2653 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
2654 }
2655}
2656
2657static int frame_start(SnowContext *s){
2658 AVFrame tmp;
2659 int w= s->avctx->width; //FIXME round up to x16 ?
2660 int h= s->avctx->height;
2661
2662 if(s->current_picture.data[0]){
2663 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
2664 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2665 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2666 }
2667
2668 release_buffer(s->avctx);
2669
2670 tmp= s->last_picture[s->max_ref_frames-1];
2671 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
2672 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
2673 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
2674 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
2675 s->last_picture[0]= s->current_picture;
2676 s->current_picture= tmp;
2677
2678 if(s->keyframe){
2679 s->ref_frames= 0;
2680 }else{
2681 int i;
2682 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
2683 if(i && s->last_picture[i-1].key_frame)
2684 break;
2685 s->ref_frames= i;
2686 if(s->ref_frames==0){
2687 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2688 return -1;
2689 }
2690 }
2691
2692 s->current_picture.reference= 1;
2693 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2694 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2695 return -1;
2696 }
2697
2698 s->current_picture.key_frame= s->keyframe;
2699
2700 return 0;
2701}
2702
2703static av_cold void common_end(SnowContext *s){
2704 int plane_index, level, orientation, i;
2705
2706 av_freep(&s->spatial_dwt_buffer);
2707 av_freep(&s->spatial_idwt_buffer);
2708
2709 s->m.me.temp= NULL;
2710 av_freep(&s->m.me.scratchpad);
2711 av_freep(&s->m.me.map);
2712 av_freep(&s->m.me.score_map);
2713 av_freep(&s->m.obmc_scratchpad);
2714
2715 av_freep(&s->block);
2716 av_freep(&s->scratchbuf);
2717
2718 for(i=0; i<MAX_REF_FRAMES; i++){
2719 av_freep(&s->ref_mvs[i]);
2720 av_freep(&s->ref_scores[i]);
2721 if(s->last_picture[i].data[0])
2722 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2723 }
2724
2725 for(plane_index=0; plane_index<3; plane_index++){
2726 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2727 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2728 SubBand *b= &s->plane[plane_index].band[level][orientation];
2729
2730 av_freep(&b->x_coeff);
2731 }
2732 }
2733 }
e8c6411c
JM
2734 if (s->mconly_picture.data[0])
2735 s->avctx->release_buffer(s->avctx, &s->mconly_picture);
2736 if (s->current_picture.data[0])
2737 s->avctx->release_buffer(s->avctx, &s->current_picture);
d773d855
DB
2738}
2739
2740static av_cold int decode_init(AVCodecContext *avctx)
2741{
2742 avctx->pix_fmt= PIX_FMT_YUV420P;
2743
2744 common_init(avctx);
2745
2746 return 0;
2747}
2748
2749static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2750 const uint8_t *buf = avpkt->data;
2751 int buf_size = avpkt->size;
2752 SnowContext *s = avctx->priv_data;
2753 RangeCoder * const c= &s->c;
2754 int bytes_read;
2755 AVFrame *picture = data;
2756 int level, orientation, plane_index;
2757
2758 ff_init_range_decoder(c, buf, buf_size);
2759 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2760
2761 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2762 if(decode_header(s)<0)
2763 return -1;
2764 common_init_after_header(avctx);
2765
2766 // realloc slice buffer for the case that spatial_decomposition_count changed
2767 slice_buffer_destroy(&s->sb);
2768 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
2769
2770 for(plane_index=0; plane_index<3; plane_index++){
2771 Plane *p= &s->plane[plane_index];
2772 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2773 && p->hcoeff[1]==-10
2774 && p->hcoeff[2]==2;
2775 }
2776
2777 alloc_blocks(s);
2778
2779 if(frame_start(s) < 0)
2780 return -1;
2781 //keyframe flag duplication mess FIXME
2782 if(avctx->debug&FF_DEBUG_PICT_INFO)
2783 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2784
2785 decode_blocks(s);
2786
2787 for(plane_index=0; plane_index<3; plane_index++){
2788 Plane *p= &s->plane[plane_index];
2789 int w= p->width;
2790 int h= p->height;
2791 int x, y;
2792 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2793
2794 if(s->avctx->debug&2048){
2795 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2796 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2797
2798 for(y=0; y<h; y++){
2799 for(x=0; x<w; x++){
2800 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2801 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2802 }
2803 }
2804 }
2805
2806 {
2807 for(level=0; level<s->spatial_decomposition_count; level++){
2808 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2809 SubBand *b= &p->band[level][orientation];
2810 unpack_coeffs(s, b, b->parent, orientation);
2811 }
2812 }
2813 }
2814
2815 {
2816 const int mb_h= s->b_height << s->block_max_depth;
2817 const int block_size = MB_SIZE >> s->block_max_depth;
2818 const int block_w = plane_index ? block_size/2 : block_size;
2819 int mb_y;
2820 DWTCompose cs[MAX_DECOMPOSITIONS];
2821 int yd=0, yq=0;
2822 int y;
2823 int end_y;
2824
2825 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2826 for(mb_y=0; mb_y<=mb_h; mb_y++){
2827
2828 int slice_starty = block_w*mb_y;
2829 int slice_h = block_w*(mb_y+1);
2830 if (!(s->keyframe || s->avctx->debug&512)){
2831 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2832 slice_h -= (block_w >> 1);
2833 }
2834
2835 for(level=0; level<s->spatial_decomposition_count; level++){
2836 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2837 SubBand *b= &p->band[level][orientation];
2838 int start_y;
2839 int end_y;
2840 int our_mb_start = mb_y;
2841 int our_mb_end = (mb_y + 1);
2842 const int extra= 3;
2843 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2844 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2845 if (!(s->keyframe || s->avctx->debug&512)){
2846 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2847 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2848 }
2849 start_y = FFMIN(b->height, start_y);
2850 end_y = FFMIN(b->height, end_y);
2851
2852 if (start_y != end_y){
2853 if (orientation == 0){
2854 SubBand * correlate_band = &p->band[0][0];
2855 int correlate_end_y = FFMIN(b->height, end_y + 1);
2856 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2857 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2858 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2859 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2860 }
2861 else
2862 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2863 }
2864 }
2865 }
2866
2867 for(; yd<slice_h; yd+=4){
2868 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2869 }
2870
2871 if(s->qlog == LOSSLESS_QLOG){
2872 for(; yq<slice_h && yq<h; yq++){
2873 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2874 for(x=0; x<w; x++){
2875 line[x] <<= FRAC_BITS;
2876 }
2877 }
2878 }
2879
2880 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2881
2882 y = FFMIN(p->height, slice_starty);
2883 end_y = FFMIN(p->height, slice_h);
2884 while(y < end_y)
2885 slice_buffer_release(&s->sb, y++);
2886 }
2887
2888 slice_buffer_flush(&s->sb);
2889 }
2890
2891 }
2892
2893 emms_c();
2894
2895 release_buffer(avctx);
2896
2897 if(!(s->avctx->debug&2048))
2898 *picture= s->current_picture;
2899 else
2900 *picture= s->mconly_picture;
2901
2902 *data_size = sizeof(AVFrame);
2903
2904 bytes_read= c->bytestream - c->bytestream_start;
2905 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2906
2907 return bytes_read;
2908}
2909
2910static av_cold int decode_end(AVCodecContext *avctx)
2911{
2912 SnowContext *s = avctx->priv_data;
2913
2914 slice_buffer_destroy(&s->sb);
2915
2916 common_end(s);
2917
2918 return 0;
2919}
2920
2921AVCodec snow_decoder = {
2922 "snow",
2923 CODEC_TYPE_VIDEO,
2924 CODEC_ID_SNOW,
2925 sizeof(SnowContext),
2926 decode_init,
2927 NULL,
2928 decode_end,
2929 decode_frame,
2930 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2931 NULL,
2932 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2933};
2934
2935#if CONFIG_SNOW_ENCODER
2936static av_cold int encode_init(AVCodecContext *avctx)
2937{
2938 SnowContext *s = avctx->priv_data;
2939 int plane_index;
2940
2941 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2942 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2943 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2944 return -1;
2945 }
2946
2947 if(avctx->prediction_method == DWT_97
2948 && (avctx->flags & CODEC_FLAG_QSCALE)
2949 && avctx->global_quality == 0){
2950 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2951 return -1;
2952 }
2953
2954 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2955
2956 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2957 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2958
2959 for(plane_index=0; plane_index<3; plane_index++){
2960 s->plane[plane_index].diag_mc= 1;
2961 s->plane[plane_index].htaps= 6;
2962 s->plane[plane_index].hcoeff[0]= 40;
2963 s->plane[plane_index].hcoeff[1]= -10;
2964 s->plane[plane_index].hcoeff[2]= 2;
2965 s->plane[plane_index].fast_mc= 1;
2966 }
2967
2968 common_init(avctx);
2969 alloc_blocks(s);
2970
2971 s->version=0;
2972
2973 s->m.avctx = avctx;
2974 s->m.flags = avctx->flags;
2975 s->m.bit_rate= avctx->bit_rate;
2976
2977 s->m.me.temp =
2978 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2979 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2980 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2981 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2982 h263_encode_init(&s->m); //mv_penalty
2983
2984 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2985
2986 if(avctx->flags&CODEC_FLAG_PASS1){
2987 if(!avctx->stats_out)
2988 avctx->stats_out = av_mallocz(256);
2989 }
2990 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2991 if(ff_rate_control_init(&s->m) < 0)
2992 return -1;
2993 }
2994 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2995
2996 avctx->coded_frame= &s->current_picture;
2997 switch(avctx->pix_fmt){
2998// case PIX_FMT_YUV444P:
2999// case PIX_FMT_YUV422P:
3000 case PIX_FMT_YUV420P:
3001 case PIX_FMT_GRAY8:
3002// case PIX_FMT_YUV411P:
3003// case PIX_FMT_YUV410P:
3004 s->colorspace_type= 0;
3005 break;
3006/* case PIX_FMT_RGB32:
3007 s->colorspace= 1;
3008 break;*/
3009 default:
3010 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
3011 return -1;
3012 }
3013// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3014 s->chroma_h_shift= 1;
3015 s->chroma_v_shift= 1;
3016
3017 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3018 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3019
3020 s->avctx->get_buffer(s->avctx, &s->input_picture);
3021
3022 if(s->avctx->me_method == ME_ITER){
3023 int i;
3024 int size= s->b_width * s->b_height << 2*s->block_max_depth;
3025 for(i=0; i<s->max_ref_frames; i++){
3026 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
3027 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
3028 }
3029 }
3030
3031 return 0;
3032}
3033
3034//near copy & paste from dsputil, FIXME
3035static int pix_sum(uint8_t * pix, int line_size, int w)
3036{
3037 int s, i, j;
3038
3039 s = 0;
3040 for (i = 0; i < w; i++) {
3041 for (j = 0; j < w; j++) {
3042 s += pix[0];
3043 pix ++;
3044 }
3045 pix += line_size - w;
3046 }
3047 return s;
3048}
3049
3050//near copy & paste from dsputil, FIXME
3051static int pix_norm1(uint8_t * pix, int line_size, int w)
3052{
3053 int s, i, j;
3054 uint32_t *sq = ff_squareTbl + 256;
3055
3056 s = 0;
3057 for (i = 0; i < w; i++) {
3058 for (j = 0; j < w; j ++) {
3059 s += sq[pix[0]];
3060 pix ++;
3061 }
3062 pix += line_size - w;
3063 }
3064 return s;
3065}
3066
3067//FIXME copy&paste
3068#define P_LEFT P[1]
3069#define P_TOP P[2]
3070#define P_TOPRIGHT P[3]
3071#define P_MEDIAN P[4]
3072#define P_MV1 P[9]
3073#define FLAG_QPEL 1 //must be 1
3074
3075static int encode_q_branch(SnowContext *s, int level, int x, int y){
3076 uint8_t p_buffer[1024];
3077 uint8_t i_buffer[1024];
3078 uint8_t p_state[sizeof(s->block_state)];
3079 uint8_t i_state[sizeof(s->block_state)];
3080 RangeCoder pc, ic;
3081 uint8_t *pbbak= s->c.bytestream;
3082 uint8_t *pbbak_start= s->c.bytestream_start;
3083 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
3084 const int w= s->b_width << s->block_max_depth;
3085 const int h= s->b_height << s->block_max_depth;
3086 const int rem_depth= s->block_max_depth - level;
3087 const int index= (x + y*w) << rem_depth;
3088 const int block_w= 1<<(LOG2_MB_SIZE - level);
3089 int trx= (x+1)<<rem_depth;
3090 int try= (y+1)<<rem_depth;
3091 const BlockNode *left = x ? &s->block[index-1] : &null_block;
3092 const BlockNode *top = y ? &s->block[index-w] : &null_block;
3093 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
3094 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
3095 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
3096 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
3097 int pl = left->color[0];
3098 int pcb= left->color[1];
3099 int pcr= left->color[2];
3100 int pmx, pmy;
3101 int mx=0, my=0;
3102 int l,cr,cb;
3103 const int stride= s->current_picture.linesize[0];
3104 const int uvstride= s->current_picture.linesize[1];
3105 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
3106 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
3107 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
3108 int P[10][2];
3109 int16_t last_mv[3][2];
3110 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
3111 const int shift= 1+qpel;
3112 MotionEstContext *c= &s->m.me;
3113 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
3114 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
3115 int my_context= av_log2(2*FFABS(left->my - top->my));
3116 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
3117 int ref, best_ref, ref_score, ref_mx, ref_my;
3118
3119 assert(sizeof(s->block_state) >= 256);
3120 if(s->keyframe){
3121 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
3122 return 0;
8f8ae495
LM
3123 }
3124
d773d855 3125// clip predictors / edge ?
51d6a3cf 3126
d773d855
DB
3127 P_LEFT[0]= left->mx;
3128 P_LEFT[1]= left->my;
3129 P_TOP [0]= top->mx;
3130 P_TOP [1]= top->my;
3131 P_TOPRIGHT[0]= tr->mx;
3132 P_TOPRIGHT[1]= tr->my;
51d6a3cf 3133
d773d855
DB
3134 last_mv[0][0]= s->block[index].mx;
3135 last_mv[0][1]= s->block[index].my;
3136 last_mv[1][0]= right->mx;
3137 last_mv[1][1]= right->my;
3138 last_mv[2][0]= bottom->mx;
3139 last_mv[2][1]= bottom->my;
51d6a3cf 3140
d773d855
DB
3141 s->m.mb_stride=2;
3142 s->m.mb_x=
3143 s->m.mb_y= 0;
3144 c->skip= 0;
51d6a3cf 3145
d773d855
DB
3146 assert(c-> stride == stride);
3147 assert(c->uvstride == uvstride);
51d6a3cf 3148
d773d855
DB
3149 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
3150 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
3151 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
3152 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1015631b 3153
d773d855
DB
3154 c->xmin = - x*block_w - 16+3;
3155 c->ymin = - y*block_w - 16+3;
3156 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
3157 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
1015631b 3158
d773d855
DB
3159 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
3160 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
3161 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
3162 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
3163 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
3164 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
3165 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1015631b 3166
d773d855
DB
3167 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
3168 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1015631b 3169
d773d855
DB
3170 if (!y) {
3171 c->pred_x= P_LEFT[0];
3172 c->pred_y= P_LEFT[1];
3173 } else {
3174 c->pred_x = P_MEDIAN[0];
3175 c->pred_y = P_MEDIAN[1];
3176 }
48d1b9a1 3177
d773d855
DB
3178 score= INT_MAX;
3179 best_ref= 0;
3180 for(ref=0; ref<s->ref_frames; ref++){
3181 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
8c36eaaa 3182
d773d855
DB
3183 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
3184 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
8c36eaaa 3185
d773d855
DB
3186 assert(ref_mx >= c->xmin);
3187 assert(ref_mx <= c->xmax);
3188 assert(ref_my >= c->ymin);
3189 assert(ref_my <= c->ymax);
8c36eaaa 3190
d773d855
DB
3191 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
3192 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
3193 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
3194 if(s->ref_mvs[ref]){
3195 s->ref_mvs[ref][index][0]= ref_mx;
3196 s->ref_mvs[ref][index][1]= ref_my;
3197 s->ref_scores[ref][index]= ref_score;
3198 }
3199 if(score > ref_score){
3200 score= ref_score;
3201 best_ref= ref;
3202 mx= ref_mx;
3203 my= ref_my;
51d6a3cf 3204 }
51d6a3cf 3205 }
d773d855 3206 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
b104969f 3207
d773d855
DB
3208 // subpel search
3209 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
3210 pc= s->c;
3211 pc.bytestream_start=
3212 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
3213 memcpy(p_state, s->block_state, sizeof(s->block_state));
b104969f 3214
d773d855
DB
3215 if(level!=s->block_max_depth)
3216 put_rac(&pc, &p_state[4 + s_context], 1);
3217 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
3218 if(s->ref_frames > 1)
3219 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
3220 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
3221 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
3222 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
3223 p_len= pc.bytestream - pc.bytestream_start;
3224 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
b104969f 3225
d773d855
DB
3226 block_s= block_w*block_w;
3227 sum = pix_sum(current_data[0], stride, block_w);
3228 l= (sum + block_s/2)/block_s;
3229 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
b104969f 3230
d773d855
DB
3231 block_s= block_w*block_w>>2;
3232 sum = pix_sum(current_data[1], uvstride, block_w>>1);
3233 cb= (sum + block_s/2)/block_s;
3234// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
3235 sum = pix_sum(current_data[2], uvstride, block_w>>1);
3236 cr= (sum + block_s/2)/block_s;
3237// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
b104969f 3238
d773d855
DB
3239 ic= s->c;
3240 ic.bytestream_start=
3241 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
3242 memcpy(i_state, s->block_state, sizeof(s->block_state));
3243 if(level!=s->block_max_depth)
3244 put_rac(&ic, &i_state[4 + s_context], 1);
3245 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
3246 put_symbol(&ic, &i_state[32], l-pl , 1);
3247 put_symbol(&ic, &i_state[64], cb-pcb, 1);
3248 put_symbol(&ic, &i_state[96], cr-pcr, 1);
3249 i_len= ic.bytestream - ic.bytestream_start;
3250 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
b104969f 3251
d773d855
DB
3252// assert(score==256*256*256*64-1);
3253 assert(iscore < 255*255*256 + s->lambda2*10);
3254 assert(iscore >= 0);
3255 assert(l>=0 && l<=255);
3256 assert(pl>=0 && pl<=255);
b104969f 3257
d773d855
DB
3258 if(level==0){
3259 int varc= iscore >> 8;
3260 int vard= score >> 8;
3261 if (vard <= 64 || vard < varc)
3262 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
3263 else
3264 c->scene_change_score+= s->m.qscale;
b104969f 3265 }
51d6a3cf 3266
d773d855
DB
3267 if(level!=s->block_max_depth){
3268 put_rac(&s->c, &s->block_state[4 + s_context], 0);
3269 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
3270 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
3271 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
3272 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
3273 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
791e7b83 3274
d773d855
DB
3275 if(score2 < score && score2 < iscore)
3276 return score2;
d593e329 3277 }
115329f1 3278
d773d855
DB
3279 if(iscore < score){
3280 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
3281 memcpy(pbbak, i_buffer, i_len);
3282 s->c= ic;
3283 s->c.bytestream_start= pbbak_start;
3284 s->c.bytestream= pbbak + i_len;
3285 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
3286 memcpy(s->block_state, i_state, sizeof(s->block_state));
3287 return iscore;
3288 }else{
3289 memcpy(pbbak, p_buffer, p_len);
3290 s->c= pc;
3291 s->c.bytestream_start= pbbak_start;
3292 s->c.bytestream= pbbak + p_len;
3293 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
3294 memcpy(s->block_state, p_state, sizeof(s->block_state));
3295 return score;
3296 }
3297}
115329f1 3298
d773d855
DB
3299static void encode_q_branch2(SnowContext *s, int level, int x, int y){
3300 const int w= s->b_width << s->block_max_depth;
3301 const int rem_depth= s->block_max_depth - level;
3302 const int index= (x + y*w) << rem_depth;
3303 int trx= (x+1)<<rem_depth;
3304 BlockNode *b= &s->block[index];
3305 const BlockNode *left = x ? &s->block[index-1] : &null_block;
3306 const BlockNode *top = y ? &s->block[index-w] : &null_block;
3307 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
3308 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
3309 int pl = left->color[0];
3310 int pcb= left->color[1];
3311 int pcr= left->color[2];
3312 int pmx, pmy;
3313 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
3314 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
3315 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
3316 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 3317
d773d855
DB
3318 if(s->keyframe){
3319 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
3320 return;
3321 }
115329f1 3322
d773d855
DB
3323 if(level!=s->block_max_depth){
3324 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
3325 put_rac(&s->c, &s->block_state[4 + s_context], 1);
3326 }else{
3327 put_rac(&s->c, &s->block_state[4 + s_context], 0);
3328 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
3329 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
3330 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
3331 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
3332 return;
791e7b83
MN
3333 }
3334 }
d773d855
DB
3335 if(b->type & BLOCK_INTRA){
3336 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
3337 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
3338 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
3339 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
3340 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
3341 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
3342 }else{
3343 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
3344 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
3345 if(s->ref_frames > 1)
3346 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
3347 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
3348 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
3349 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
3350 }
791e7b83
MN
3351}
3352
d773d855
DB
3353static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
3354 int i, x2, y2;
3355 Plane *p= &s->plane[plane_index];
3356 const int block_size = MB_SIZE >> s->block_max_depth;
3357 const int block_w = plane_index ? block_size/2 : block_size;
3358 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3359 const int obmc_stride= plane_index ? block_size : 2*block_size;
3360 const int ref_stride= s->current_picture.linesize[plane_index];
3361 uint8_t *src= s-> input_picture.data[plane_index];
3362 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
3363 const int b_stride = s->b_width << s->block_max_depth;
3364 const int w= p->width;
3365 const int h= p->height;
3366 int index= mb_x + mb_y*b_stride;
3367 BlockNode *b= &s->block[index];
3368 BlockNode backup= *b;
3369 int ab=0;
3370 int aa=0;
115329f1 3371
d773d855
DB
3372 b->type|= BLOCK_INTRA;
3373 b->color[plane_index]= 0;
3374 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
3375
3376 for(i=0; i<4; i++){
3377 int mb_x2= mb_x + (i &1) - 1;
3378 int mb_y2= mb_y + (i>>1) - 1;
3379 int x= block_w*mb_x2 + block_w/2;
3380 int y= block_w*mb_y2 + block_w/2;
3381
3382 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
3383 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
3384
3385 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3386 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3387 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3388 int obmc_v= obmc[index];
3389 int d;
3390 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3391 if(x<0) obmc_v += obmc[index + block_w];
3392 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3393 if(x+block_w>w) obmc_v += obmc[index - block_w];
3394 //FIXME precalculate this or simplify it somehow else
115329f1 3395
d773d855
DB
3396 d = -dst[index] + (1<<(FRAC_BITS-1));
3397 dst[index] = d;
3398 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
3399 aa += obmc_v * obmc_v; //FIXME precalculate this
a0d1931c
Y
3400 }
3401 }
3402 }
d773d855 3403 *b= backup;
a0d1931c 3404
d773d855
DB
3405 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
3406}
115329f1 3407
d773d855
DB
3408static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3409 const int b_stride = s->b_width << s->block_max_depth;
3410 const int b_height = s->b_height<< s->block_max_depth;
3411 int index= x + y*b_stride;
3412 const BlockNode *b = &s->block[index];
3413 const BlockNode *left = x ? &s->block[index-1] : &null_block;
3414 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3415 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3416 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3417 int dmx, dmy;
3418// int mx_context= av_log2(2*FFABS(left->mx - top->mx));
3419// int my_context= av_log2(2*FFABS(left->my - top->my));
115329f1 3420
d773d855
DB
3421 if(x<0 || x>=b_stride || y>=b_height)
3422 return 0;
3423/*
34241 0 0
342501X 1-2 1
3426001XX 3-6 2-3
34270001XXX 7-14 4-7
342800001XXXX 15-30 8-15
3429*/
3430//FIXME try accurate rate
3431//FIXME intra and inter predictors if surrounding blocks are not the same type
3432 if(b->type & BLOCK_INTRA){
3433 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
3434 + av_log2(2*FFABS(left->color[1] - b->color[1]))
3435 + av_log2(2*FFABS(left->color[2] - b->color[2])));
3436 }else{
3437 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
3438 dmx-= b->mx;
3439 dmy-= b->my;
3440 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
3441 + av_log2(2*FFABS(dmy))
3442 + av_log2(2*b->ref));
791e7b83
MN
3443 }
3444}
3445
d773d855
DB
3446static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3447 Plane *p= &s->plane[plane_index];
3448 const int block_size = MB_SIZE >> s->block_max_depth;
3449 const int block_w = plane_index ? block_size/2 : block_size;
3450 const int obmc_stride= plane_index ? block_size : 2*block_size;
3451 const int ref_stride= s->current_picture.linesize[plane_index];
3452 uint8_t *dst= s->current_picture.data[plane_index];
3453 uint8_t *src= s-> input_picture.data[plane_index];
3454 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3455 uint8_t *cur = s->scratchbuf;
3456 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
3457 const int b_stride = s->b_width << s->block_max_depth;
3458 const int b_height = s->b_height<< s->block_max_depth;
3459 const int w= p->width;
3460 const int h= p->height;
3461 int distortion;
3462 int rate= 0;
3463 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3464 int sx= block_w*mb_x - block_w/2;
3465 int sy= block_w*mb_y - block_w/2;
3466 int x0= FFMAX(0,-sx);
3467 int y0= FFMAX(0,-sy);
3468 int x1= FFMIN(block_w*2, w-sx);
3469 int y1= FFMIN(block_w*2, h-sy);
3470 int i,x,y;
115329f1 3471
d773d855 3472 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
115329f1 3473
d773d855
DB
3474 for(y=y0; y<y1; y++){
3475 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3476 const IDWTELEM *pred1 = pred + y*obmc_stride;
3477 uint8_t *cur1 = cur + y*ref_stride;
3478 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3479 for(x=x0; x<x1; x++){
3480#if FRAC_BITS >= LOG2_OBMC_MAX
3481 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3482#else
3483 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
3484#endif
3485 v = (v + pred1[x]) >> FRAC_BITS;
3486 if(v&(~255)) v= ~(v>>31);
3487 dst1[x] = v;
791e7b83
MN
3488 }
3489 }
115329f1 3490
d773d855
DB
3491 /* copy the regions where obmc[] = (uint8_t)256 */
3492 if(LOG2_OBMC_MAX == 8
3493 && (mb_x == 0 || mb_x == b_stride-1)
3494 && (mb_y == 0 || mb_y == b_height-1)){
3495 if(mb_x == 0)
3496 x1 = block_w;
3497 else
3498 x0 = block_w;
3499 if(mb_y == 0)
3500 y1 = block_w;
3501 else
3502 y0 = block_w;
3503 for(y=y0; y<y1; y++)
3504 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3505 }
115329f1 3506
d773d855
DB
3507 if(block_w==16){
3508 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
350