Indentation
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
b78e7197
DB
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
791e7b83
MN
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
b78e7197 9 * version 2.1 of the License, or (at your option) any later version.
791e7b83 10 *
b78e7197 11 * FFmpeg is distributed in the hope that it will be useful,
791e7b83
MN
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
b78e7197 17 * License along with FFmpeg; if not, write to the Free Software
5509bffa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
MN
19 */
20
21#include "avcodec.h"
791e7b83 22#include "dsputil.h"
059715a4 23#include "snow.h"
28869757
MN
24
25#include "rangecoder.h"
791e7b83
MN
26
27#include "mpegvideo.h"
28
29#undef NDEBUG
30#include <assert.h>
31
791e7b83
MN
32static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49};
50static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67};
538a3841
MN
68static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85};
791e7b83
MN
86static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103};
104static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121};
122static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139};
140static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157};
158static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
175};
176
791e7b83
MN
177#if 0 //64*cubic
178static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
181 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
182 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
183 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
184 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
185 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
186 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
187 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
189 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
190 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
191 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
192 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
193 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
197 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
198 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
201 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
202 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
203 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
204 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
205 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
206 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
207 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
208 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
209 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211//error:0.000022
212};
213static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
216 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
217 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
218 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
219 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
220 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
224 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
225 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
228 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
229 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
230//error:0.000033
231};
232#elif 1 // 64*linear
233static const uint8_t obmc32[1024]={
561a18d3
RE
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
791e7b83
MN
266 //error:0.000020
267};
268static const uint8_t obmc16[256]={
561a18d3
RE
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
791e7b83
MN
285//error:0.000015
286};
287#else //64*cos
288static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
292 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
293 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
294 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
295 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
296 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
297 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
299 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
300 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
301 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
302 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
303 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
307 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
308 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
311 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
312 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
313 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
314 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
315 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
316 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
317 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
318 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321//error:0.000022
322};
323static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
326 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
327 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
328 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
329 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
330 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
334 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
335 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
338 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
339 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
340//error:0.000022
341};
342#endif
343
155ec6ed
MN
344//linear *64
345static const uint8_t obmc8[64]={
561a18d3
RE
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
155ec6ed
MN
354//error:0.000000
355};
356
357//linear *64
358static const uint8_t obmc4[16]={
561a18d3
RE
359 16, 48, 48, 16,
360 48,144,144, 48,
361 48,144,144, 48,
362 16, 48, 48, 16,
155ec6ed
MN
363//error:0.000000
364};
365
366static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
368};
369
85fc0e75
LM
370static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371
155ec6ed
MN
372typedef struct BlockNode{
373 int16_t mx;
374 int16_t my;
8c36eaaa 375 uint8_t ref;
155ec6ed
MN
376 uint8_t color[3];
377 uint8_t type;
378//#define TYPE_SPLIT 1
379#define BLOCK_INTRA 1
51d6a3cf 380#define BLOCK_OPT 2
155ec6ed
MN
381//#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
383}BlockNode;
384
51d6a3cf
MN
385static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
387 .mx= 0,
388 .my= 0,
8c36eaaa 389 .ref= 0,
51d6a3cf
MN
390 .type= 0,
391 .level= 0,
392};
393
155ec6ed
MN
394#define LOG2_MB_SIZE 4
395#define MB_SIZE (1<<LOG2_MB_SIZE)
b538791b 396#define ENCODER_EXTRA_BITS 4
71dce72b 397#define HTAPS 6
155ec6ed 398
a0d1931c
Y
399typedef struct x_and_coeff{
400 int16_t x;
538a3841 401 uint16_t coeff;
a0d1931c
Y
402} x_and_coeff;
403
791e7b83
MN
404typedef struct SubBand{
405 int level;
406 int stride;
407 int width;
408 int height;
409 int qlog; ///< log(qscale)/log[2^(1/6)]
410 DWTELEM *buf;
d593e329 411 IDWTELEM *ibuf;
a0d1931c
Y
412 int buf_x_offset;
413 int buf_y_offset;
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
791e7b83
MN
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
418}SubBand;
419
420typedef struct Plane{
421 int width;
422 int height;
423 SubBand band[MAX_DECOMPOSITIONS][4];
424}Plane;
425
426typedef struct SnowContext{
eafcac6a 427// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
791e7b83
MN
428
429 AVCodecContext *avctx;
28869757 430 RangeCoder c;
791e7b83 431 DSPContext dsp;
51d6a3cf
MN
432 AVFrame new_picture;
433 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83 434 AVFrame current_picture;
8c36eaaa 435 AVFrame last_picture[MAX_REF_FRAMES];
5be3a818 436 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
791e7b83
MN
437 AVFrame mconly_picture;
438// uint8_t q_context[16];
439 uint8_t header_state[32];
155ec6ed 440 uint8_t block_state[128 + 32*128];
791e7b83 441 int keyframe;
19aa028d 442 int always_reset;
791e7b83
MN
443 int version;
444 int spatial_decomposition_type;
396a5e68 445 int last_spatial_decomposition_type;
791e7b83
MN
446 int temporal_decomposition_type;
447 int spatial_decomposition_count;
448 int temporal_decomposition_count;
8c36eaaa
LM
449 int max_ref_frames;
450 int ref_frames;
451 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
452 uint32_t *ref_scores[MAX_REF_FRAMES];
791e7b83 453 DWTELEM *spatial_dwt_buffer;
d593e329 454 IDWTELEM *spatial_idwt_buffer;
791e7b83
MN
455 int colorspace_type;
456 int chroma_h_shift;
457 int chroma_v_shift;
458 int spatial_scalability;
459 int qlog;
396a5e68 460 int last_qlog;
155ec6ed
MN
461 int lambda;
462 int lambda2;
4e64bead 463 int pass1_rc;
791e7b83 464 int mv_scale;
396a5e68 465 int last_mv_scale;
791e7b83 466 int qbias;
396a5e68 467 int last_qbias;
791e7b83 468#define QBIAS_SHIFT 3
155ec6ed
MN
469 int b_width;
470 int b_height;
471 int block_max_depth;
396a5e68 472 int last_block_max_depth;
791e7b83 473 Plane plane[MAX_PLANES];
155ec6ed 474 BlockNode *block;
51d6a3cf
MN
475#define ME_CACHE_SIZE 1024
476 int me_cache[ME_CACHE_SIZE];
477 int me_cache_generation;
a0d1931c 478 slice_buffer sb;
155ec6ed 479
eafcac6a 480 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
791e7b83
MN
481}SnowContext;
482
f9e6ebf7 483typedef struct {
d593e329
MN
484 IDWTELEM *b0;
485 IDWTELEM *b1;
486 IDWTELEM *b2;
487 IDWTELEM *b3;
f9e6ebf7
LM
488 int y;
489} dwt_compose_t;
490
a0d1931c
Y
491#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
492//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
493
51d6a3cf
MN
494static void iterative_me(SnowContext *s);
495
d593e329 496static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
a0d1931c
Y
497{
498 int i;
115329f1 499
a0d1931c
Y
500 buf->base_buffer = base_buffer;
501 buf->line_count = line_count;
502 buf->line_width = line_width;
503 buf->data_count = max_allocated_lines;
d593e329
MN
504 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
505 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
115329f1 506
a0d1931c
Y
507 for (i = 0; i < max_allocated_lines; i++)
508 {
d593e329 509 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
a0d1931c 510 }
115329f1 511
a0d1931c
Y
512 buf->data_stack_top = max_allocated_lines - 1;
513}
514
d593e329 515static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
a0d1931c 516{
a0d1931c 517 int offset;
d593e329 518 IDWTELEM * buffer;
115329f1
DB
519
520// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
521
a0d1931c
Y
522 assert(buf->data_stack_top >= 0);
523// assert(!buf->line[line]);
524 if (buf->line[line])
525 return buf->line[line];
115329f1 526
a0d1931c
Y
527 offset = buf->line_width * line;
528 buffer = buf->data_stack[buf->data_stack_top];
529 buf->data_stack_top--;
530 buf->line[line] = buffer;
115329f1 531
a0d1931c 532// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
115329f1 533
a0d1931c
Y
534 return buffer;
535}
536
537static void slice_buffer_release(slice_buffer * buf, int line)
538{
a0d1931c 539 int offset;
d593e329 540 IDWTELEM * buffer;
a0d1931c
Y
541
542 assert(line >= 0 && line < buf->line_count);
543 assert(buf->line[line]);
544
545 offset = buf->line_width * line;
546 buffer = buf->line[line];
547 buf->data_stack_top++;
548 buf->data_stack[buf->data_stack_top] = buffer;
549 buf->line[line] = NULL;
115329f1 550
a0d1931c
Y
551// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
552}
553
554static void slice_buffer_flush(slice_buffer * buf)
555{
556 int i;
557 for (i = 0; i < buf->line_count; i++)
558 {
559 if (buf->line[i])
560 {
561// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
562 slice_buffer_release(buf, i);
563 }
564 }
565}
566
567static void slice_buffer_destroy(slice_buffer * buf)
568{
569 int i;
570 slice_buffer_flush(buf);
115329f1 571
a0d1931c
Y
572 for (i = buf->data_count - 1; i >= 0; i--)
573 {
574 assert(buf->data_stack[i]);
e7c8206e 575 av_freep(&buf->data_stack[i]);
a0d1931c
Y
576 }
577 assert(buf->data_stack);
e7c8206e 578 av_freep(&buf->data_stack);
a0d1931c 579 assert(buf->line);
e7c8206e 580 av_freep(&buf->line);
a0d1931c
Y
581}
582
bb270c08 583#ifdef __sgi
2554db9b 584// Avoid a name clash on SGI IRIX
bb270c08 585#undef qexp
2554db9b 586#endif
034aff03 587#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 588static uint8_t qexp[QROOT];
791e7b83
MN
589
590static inline int mirror(int v, int m){
13705b69
MN
591 while((unsigned)v > (unsigned)m){
592 v=-v;
593 if(v<0) v+= 2*m;
594 }
595 return v;
791e7b83
MN
596}
597
28869757 598static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
599 int i;
600
601 if(v){
c26abfa5 602 const int a= FFABS(v);
791e7b83
MN
603 const int e= av_log2(a);
604#if 1
115329f1 605 const int el= FFMIN(e, 10);
28869757 606 put_rac(c, state+0, 0);
791e7b83
MN
607
608 for(i=0; i<el; i++){
28869757 609 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
610 }
611 for(; i<e; i++){
28869757 612 put_rac(c, state+1+9, 1); //1..10
791e7b83 613 }
28869757 614 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
615
616 for(i=e-1; i>=el; i--){
28869757 617 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
618 }
619 for(; i>=0; i--){
28869757 620 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
621 }
622
623 if(is_signed)
28869757 624 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 625#else
115329f1 626
28869757 627 put_rac(c, state+0, 0);
791e7b83
MN
628 if(e<=9){
629 for(i=0; i<e; i++){
28869757 630 put_rac(c, state+1+i, 1); //1..10
791e7b83 631 }
28869757 632 put_rac(c, state+1+i, 0);
791e7b83
MN
633
634 for(i=e-1; i>=0; i--){
28869757 635 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
636 }
637
638 if(is_signed)
28869757 639 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
640 }else{
641 for(i=0; i<e; i++){
28869757 642 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 643 }
28869757 644 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
645
646 for(i=e-1; i>=0; i--){
28869757 647 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
648 }
649
650 if(is_signed)
28869757 651 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83
MN
652 }
653#endif
654 }else{
28869757 655 put_rac(c, state+0, 1);
791e7b83
MN
656 }
657}
658
28869757
MN
659static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
660 if(get_rac(c, state+0))
791e7b83
MN
661 return 0;
662 else{
7c2425d2
LM
663 int i, e, a;
664 e= 0;
28869757 665 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 666 e++;
791e7b83 667 }
7c2425d2 668
791e7b83 669 a= 1;
7c2425d2 670 for(i=e-1; i>=0; i--){
28869757 671 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
672 }
673
28869757 674 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
675 return -a;
676 else
677 return a;
678 }
679}
680
28869757 681static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 682 int i;
0635cbfc 683 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
684
685 assert(v>=0);
0635cbfc
MN
686 assert(log2>=-4);
687
688 while(v >= r){
28869757 689 put_rac(c, state+4+log2, 1);
0635cbfc 690 v -= r;
4f4e9633 691 log2++;
0635cbfc 692 if(log2>0) r+=r;
4f4e9633 693 }
28869757 694 put_rac(c, state+4+log2, 0);
115329f1 695
4f4e9633 696 for(i=log2-1; i>=0; i--){
28869757 697 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 698 }
4f4e9633
MN
699}
700
28869757 701static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 702 int i;
0635cbfc 703 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
704 int v=0;
705
0635cbfc
MN
706 assert(log2>=-4);
707
28869757 708 while(get_rac(c, state+4+log2)){
0635cbfc 709 v+= r;
4f4e9633 710 log2++;
0635cbfc 711 if(log2>0) r+=r;
4f4e9633 712 }
115329f1 713
4f4e9633 714 for(i=log2-1; i>=0; i--){
28869757 715 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
716 }
717
718 return v;
719}
720
9d14ffbc
LB
721static av_always_inline void
722lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
723 int dst_step, int src_step, int ref_step,
724 int width, int mul, int add, int shift,
725 int highpass, int inverse){
791e7b83
MN
726 const int mirror_left= !highpass;
727 const int mirror_right= (width&1) ^ highpass;
728 const int w= (width>>1) - 1 + (highpass & width);
729 int i;
730
731#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
732 if(mirror_left){
733 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
734 dst += dst_step;
735 src += src_step;
736 }
115329f1 737
791e7b83 738 for(i=0; i<w; i++){
9d14ffbc
LB
739 dst[i*dst_step] =
740 LIFT(src[i*src_step],
741 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
742 inverse);
791e7b83 743 }
115329f1 744
791e7b83 745 if(mirror_right){
9d14ffbc
LB
746 dst[w*dst_step] =
747 LIFT(src[w*src_step],
748 ((mul*2*ref[w*ref_step]+add)>>shift),
749 inverse);
791e7b83
MN
750 }
751}
752
9d14ffbc
LB
753static av_always_inline void
754inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
755 int dst_step, int src_step, int ref_step,
756 int width, int mul, int add, int shift,
757 int highpass, int inverse){
d593e329
MN
758 const int mirror_left= !highpass;
759 const int mirror_right= (width&1) ^ highpass;
760 const int w= (width>>1) - 1 + (highpass & width);
761 int i;
762
763#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
764 if(mirror_left){
765 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
766 dst += dst_step;
767 src += src_step;
768 }
769
770 for(i=0; i<w; i++){
9d14ffbc
LB
771 dst[i*dst_step] =
772 LIFT(src[i*src_step],
773 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
774 inverse);
d593e329
MN
775 }
776
777 if(mirror_right){
9d14ffbc
LB
778 dst[w*dst_step] =
779 LIFT(src[w*src_step],
780 ((mul*2*ref[w*ref_step]+add)>>shift),
781 inverse);
d593e329
MN
782 }
783}
784
059715a4 785#ifndef liftS
9d14ffbc
LB
786static av_always_inline void
787liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
788 int dst_step, int src_step, int ref_step,
789 int width, int mul, int add, int shift,
790 int highpass, int inverse){
f5a71928
MN
791 const int mirror_left= !highpass;
792 const int mirror_right= (width&1) ^ highpass;
793 const int w= (width>>1) - 1 + (highpass & width);
794 int i;
795
796 assert(shift == 4);
9d14ffbc
LB
797#define LIFTS(src, ref, inv) \
798 ((inv) ? \
799 (src) + (((ref) + 4*(src))>>shift): \
800 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
f5a71928
MN
801 if(mirror_left){
802 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
803 dst += dst_step;
804 src += src_step;
805 }
115329f1 806
f5a71928 807 for(i=0; i<w; i++){
9d14ffbc
LB
808 dst[i*dst_step] =
809 LIFTS(src[i*src_step],
810 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
811 inverse);
f5a71928 812 }
115329f1 813
f5a71928 814 if(mirror_right){
9d14ffbc
LB
815 dst[w*dst_step] =
816 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
f5a71928
MN
817 }
818}
9d14ffbc
LB
819static av_always_inline void
820inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
821 int dst_step, int src_step, int ref_step,
822 int width, int mul, int add, int shift,
823 int highpass, int inverse){
d593e329
MN
824 const int mirror_left= !highpass;
825 const int mirror_right= (width&1) ^ highpass;
826 const int w= (width>>1) - 1 + (highpass & width);
827 int i;
828
829 assert(shift == 4);
9d14ffbc
LB
830#define LIFTS(src, ref, inv) \
831 ((inv) ? \
832 (src) + (((ref) + 4*(src))>>shift): \
833 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
d593e329
MN
834 if(mirror_left){
835 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
836 dst += dst_step;
837 src += src_step;
838 }
839
840 for(i=0; i<w; i++){
9d14ffbc
LB
841 dst[i*dst_step] =
842 LIFTS(src[i*src_step],
843 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
844 inverse);
d593e329
MN
845 }
846
847 if(mirror_right){
9d14ffbc
LB
848 dst[w*dst_step] =
849 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
d593e329
MN
850 }
851}
059715a4 852#endif
f5a71928 853
aa25a462
RFI
854static void horizontal_decompose53i(DWTELEM *b, int width){
855 DWTELEM temp[width];
791e7b83 856 const int width2= width>>1;
62ab0b78 857 int x;
791e7b83
MN
858 const int w2= (width+1)>>1;
859
860 for(x=0; x<width2; x++){
861 temp[x ]= b[2*x ];
862 temp[x+w2]= b[2*x + 1];
863 }
864 if(width&1)
865 temp[x ]= b[2*x ];
866#if 0
62ab0b78
AJ
867 {
868 int A1,A2,A3,A4;
791e7b83
MN
869 A2= temp[1 ];
870 A4= temp[0 ];
871 A1= temp[0+width2];
872 A1 -= (A2 + A4)>>1;
873 A4 += (A1 + 1)>>1;
874 b[0+width2] = A1;
875 b[0 ] = A4;
876 for(x=1; x+1<width2; x+=2){
877 A3= temp[x+width2];
878 A4= temp[x+1 ];
879 A3 -= (A2 + A4)>>1;
880 A2 += (A1 + A3 + 2)>>2;
881 b[x+width2] = A3;
882 b[x ] = A2;
883
884 A1= temp[x+1+width2];
885 A2= temp[x+2 ];
886 A1 -= (A2 + A4)>>1;
887 A4 += (A1 + A3 + 2)>>2;
888 b[x+1+width2] = A1;
889 b[x+1 ] = A4;
890 }
891 A3= temp[width-1];
892 A3 -= A2;
893 A2 += (A1 + A3 + 2)>>2;
894 b[width -1] = A3;
895 b[width2-1] = A2;
62ab0b78 896 }
115329f1 897#else
791e7b83
MN
898 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
899 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
900#endif
901}
902
aa25a462 903static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 904 int i;
115329f1 905
791e7b83
MN
906 for(i=0; i<width; i++){
907 b1[i] -= (b0[i] + b2[i])>>1;
908 }
909}
910
aa25a462 911static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 912 int i;
115329f1 913
791e7b83
MN
914 for(i=0; i<width; i++){
915 b1[i] += (b0[i] + b2[i] + 2)>>2;
916 }
917}
918
aa25a462 919static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 920 int y;
791e7b83
MN
921 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
922 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
115329f1 923
791e7b83
MN
924 for(y=-2; y<height; y+=2){
925 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
926 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
927
928{START_TIMER
13705b69
MN
929 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
930 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
791e7b83 931STOP_TIMER("horizontal_decompose53i")}
115329f1 932
791e7b83 933{START_TIMER
13705b69
MN
934 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
935 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
791e7b83 936STOP_TIMER("vertical_decompose53i*")}
115329f1 937
791e7b83
MN
938 b0=b2;
939 b1=b3;
940 }
941}
942
aa25a462
RFI
943static void horizontal_decompose97i(DWTELEM *b, int width){
944 DWTELEM temp[width];
791e7b83
MN
945 const int w2= (width+1)>>1;
946
ce611a27
MN
947 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
948 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
ff06e067 949 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
791e7b83
MN
950 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
951}
952
953
aa25a462 954static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 955 int i;
115329f1 956
791e7b83
MN
957 for(i=0; i<width; i++){
958 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
959 }
960}
961
aa25a462 962static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 963 int i;
115329f1 964
791e7b83 965 for(i=0; i<width; i++){
791e7b83 966 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
791e7b83
MN
967 }
968}
969
aa25a462 970static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 971 int i;
115329f1 972
791e7b83 973 for(i=0; i<width; i++){
f5a71928 974#ifdef liftS
791e7b83 975 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928 976#else
ce611a27 977 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
f5a71928 978#endif
791e7b83
MN
979 }
980}
981
aa25a462 982static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 983 int i;
115329f1 984
791e7b83
MN
985 for(i=0; i<width; i++){
986 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
987 }
988}
989
aa25a462 990static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 991 int y;
791e7b83
MN
992 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
993 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
994 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
995 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
115329f1 996
791e7b83
MN
997 for(y=-4; y<height; y+=2){
998 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
999 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1000
1001{START_TIMER
13705b69
MN
1002 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1003 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
791e7b83
MN
1004if(width>400){
1005STOP_TIMER("horizontal_decompose97i")
1006}}
115329f1 1007
791e7b83 1008{START_TIMER
13705b69
MN
1009 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1010 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1011 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1012 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
791e7b83
MN
1013
1014if(width>400){
1015STOP_TIMER("vertical_decompose97i")
1016}}
115329f1 1017
791e7b83
MN
1018 b0=b2;
1019 b1=b3;
1020 b2=b4;
1021 b3=b5;
1022 }
1023}
1024
aa25a462 1025void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83 1026 int level;
115329f1 1027
46c281e8
MN
1028 for(level=0; level<decomposition_count; level++){
1029 switch(type){
d4b287ed
LM
1030 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1031 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
791e7b83
MN
1032 }
1033 }
1034}
1035
d593e329
MN
1036static void horizontal_compose53i(IDWTELEM *b, int width){
1037 IDWTELEM temp[width];
791e7b83
MN
1038 const int width2= width>>1;
1039 const int w2= (width+1)>>1;
62ab0b78 1040 int x;
791e7b83
MN
1041
1042#if 0
62ab0b78 1043 int A1,A2,A3,A4;
791e7b83
MN
1044 A2= temp[1 ];
1045 A4= temp[0 ];
1046 A1= temp[0+width2];
1047 A1 -= (A2 + A4)>>1;
1048 A4 += (A1 + 1)>>1;
1049 b[0+width2] = A1;
1050 b[0 ] = A4;
1051 for(x=1; x+1<width2; x+=2){
1052 A3= temp[x+width2];
1053 A4= temp[x+1 ];
1054 A3 -= (A2 + A4)>>1;
1055 A2 += (A1 + A3 + 2)>>2;
1056 b[x+width2] = A3;
1057 b[x ] = A2;
1058
1059 A1= temp[x+1+width2];
1060 A2= temp[x+2 ];
1061 A1 -= (A2 + A4)>>1;
1062 A4 += (A1 + A3 + 2)>>2;
1063 b[x+1+width2] = A1;
1064 b[x+1 ] = A4;
1065 }
1066 A3= temp[width-1];
1067 A3 -= A2;
1068 A2 += (A1 + A3 + 2)>>2;
1069 b[width -1] = A3;
1070 b[width2-1] = A2;
115329f1 1071#else
d593e329
MN
1072 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1073 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
791e7b83
MN
1074#endif
1075 for(x=0; x<width2; x++){
1076 b[2*x ]= temp[x ];
1077 b[2*x + 1]= temp[x+w2];
1078 }
1079 if(width&1)
1080 b[2*x ]= temp[x ];
1081}
1082
d593e329 1083static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1084 int i;
115329f1 1085
791e7b83
MN
1086 for(i=0; i<width; i++){
1087 b1[i] += (b0[i] + b2[i])>>1;
1088 }
1089}
1090
d593e329 1091static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1092 int i;
115329f1 1093
791e7b83
MN
1094 for(i=0; i<width; i++){
1095 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1096 }
1097}
1098
a0d1931c
Y
1099static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1100 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1101 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1102 cs->y = -1;
1103}
1104
d593e329 1105static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
f9e6ebf7
LM
1106 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1107 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1108 cs->y = -1;
1109}
1110
a0d1931c
Y
1111static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1112 int y= cs->y;
115329f1 1113
d593e329
MN
1114 IDWTELEM *b0= cs->b0;
1115 IDWTELEM *b1= cs->b1;
1116 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1117 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
a0d1931c
Y
1118
1119{START_TIMER
13705b69
MN
1120 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1121 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
a0d1931c
Y
1122STOP_TIMER("vertical_compose53i*")}
1123
1124{START_TIMER
13705b69
MN
1125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
a0d1931c
Y
1127STOP_TIMER("horizontal_compose53i")}
1128
1129 cs->b0 = b2;
1130 cs->b1 = b3;
1131 cs->y += 2;
1132}
1133
d593e329 1134static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7 1135 int y= cs->y;
d593e329
MN
1136 IDWTELEM *b0= cs->b0;
1137 IDWTELEM *b1= cs->b1;
1138 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1139 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83
MN
1140
1141{START_TIMER
13705b69
MN
1142 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1143 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
791e7b83
MN
1144STOP_TIMER("vertical_compose53i*")}
1145
1146{START_TIMER
13705b69
MN
1147 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1148 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
791e7b83
MN
1149STOP_TIMER("horizontal_compose53i")}
1150
f9e6ebf7
LM
1151 cs->b0 = b2;
1152 cs->b1 = b3;
1153 cs->y += 2;
1154}
1155
d593e329 1156static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7
LM
1157 dwt_compose_t cs;
1158 spatial_compose53i_init(&cs, buffer, height, stride);
1159 while(cs.y <= height)
1160 spatial_compose53i_dy(&cs, buffer, width, height, stride);
115329f1
DB
1161}
1162
791e7b83 1163
d593e329
MN
1164void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1165 IDWTELEM temp[width];
791e7b83
MN
1166 const int w2= (width+1)>>1;
1167
d593e329 1168 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
ff06e067 1169 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
d593e329
MN
1170 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1171 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
791e7b83
MN
1172}
1173
d593e329 1174static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1175 int i;
115329f1 1176
791e7b83
MN
1177 for(i=0; i<width; i++){
1178 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1179 }
1180}
1181
d593e329 1182static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1183 int i;
115329f1 1184
791e7b83 1185 for(i=0; i<width; i++){
791e7b83 1186 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
791e7b83
MN
1187 }
1188}
1189
d593e329 1190static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1191 int i;
115329f1 1192
791e7b83 1193 for(i=0; i<width; i++){
f5a71928 1194#ifdef liftS
791e7b83 1195 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1196#else
1197 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1198#endif
791e7b83
MN
1199 }
1200}
1201
d593e329 1202static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
791e7b83 1203 int i;
115329f1 1204
791e7b83
MN
1205 for(i=0; i<width; i++){
1206 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1207 }
1208}
1209
d593e329 1210void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
565a45ac 1211 int i;
115329f1 1212
565a45ac 1213 for(i=0; i<width; i++){
565a45ac 1214 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
565a45ac 1215 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
f5a71928 1216#ifdef liftS
565a45ac 1217 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1218#else
1219 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1220#endif
565a45ac
MN
1221 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1222 }
1223}
1224
a0d1931c
Y
1225static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1226 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1227 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1228 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1229 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1230 cs->y = -3;
1231}
1232
d593e329 1233static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
f9e6ebf7
LM
1234 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1235 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1236 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1237 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1238 cs->y = -3;
1239}
791e7b83 1240
059715a4 1241static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1242 int y = cs->y;
115329f1 1243
d593e329
MN
1244 IDWTELEM *b0= cs->b0;
1245 IDWTELEM *b1= cs->b1;
1246 IDWTELEM *b2= cs->b2;
1247 IDWTELEM *b3= cs->b3;
1248 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1249 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
115329f1 1250
a0d1931c 1251{START_TIMER
565a45ac 1252 if(y>0 && y+4<height){
059715a4 1253 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
565a45ac 1254 }else{
13705b69
MN
1255 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1256 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1257 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1258 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
565a45ac 1259 }
a0d1931c
Y
1260if(width>400){
1261STOP_TIMER("vertical_compose97i")}}
a0d1931c
Y
1262
1263{START_TIMER
059715a4
RE
1264 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1265 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
3b6ab26c 1266if(width>400 && y+0<(unsigned)height){
a0d1931c
Y
1267STOP_TIMER("horizontal_compose97i")}}
1268
1269 cs->b0=b2;
1270 cs->b1=b3;
1271 cs->b2=b4;
1272 cs->b3=b5;
1273 cs->y += 2;
1274}
1275
d593e329 1276static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7 1277 int y = cs->y;
d593e329
MN
1278 IDWTELEM *b0= cs->b0;
1279 IDWTELEM *b1= cs->b1;
1280 IDWTELEM *b2= cs->b2;
1281 IDWTELEM *b3= cs->b3;
1282 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1283 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83 1284
791e7b83 1285{START_TIMER
13705b69
MN
1286 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1287 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1288 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1289 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
791e7b83
MN
1290if(width>400){
1291STOP_TIMER("vertical_compose97i")}}
1292
1293{START_TIMER
059715a4
RE
1294 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1295 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
791e7b83
MN
1296if(width>400 && b0 <= b2){
1297STOP_TIMER("horizontal_compose97i")}}
f9e6ebf7
LM
1298
1299 cs->b0=b2;
1300 cs->b1=b3;
1301 cs->b2=b4;
1302 cs->b3=b5;
1303 cs->y += 2;
1304}
1305
d593e329 1306static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
f9e6ebf7
LM
1307 dwt_compose_t cs;
1308 spatial_compose97i_init(&cs, buffer, height, stride);
1309 while(cs.y <= height)
1310 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1311}
1312
ceaf1909 1313static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
a0d1931c
Y
1314 int level;
1315 for(level=decomposition_count-1; level>=0; level--){
1316 switch(type){
d4b287ed
LM
1317 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1318 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
a0d1931c
Y
1319 }
1320 }
1321}
1322
d593e329 1323static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1324 int level;
1325 for(level=decomposition_count-1; level>=0; level--){
1326 switch(type){
d4b287ed
LM
1327 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1328 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
f9e6ebf7 1329 }
791e7b83
MN
1330 }
1331}
1332
d593e329 1333static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
f9e6ebf7 1334 const int support = type==1 ? 3 : 5;
791e7b83 1335 int level;
f9e6ebf7 1336 if(type==2) return;
791e7b83 1337
46c281e8 1338 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1339 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1340 switch(type){
d4b287ed 1341 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
f9e6ebf7 1342 break;
d4b287ed 1343 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
f9e6ebf7 1344 break;
f9e6ebf7 1345 }
791e7b83
MN
1346 }
1347 }
1348}
1349
059715a4 1350static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
a0d1931c
Y
1351 const int support = type==1 ? 3 : 5;
1352 int level;
1353 if(type==2) return;
1354
1355 for(level=decomposition_count-1; level>=0; level--){
1356 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1357 switch(type){
d4b287ed 1358 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
a0d1931c 1359 break;
d4b287ed 1360 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
a0d1931c 1361 break;
a0d1931c
Y
1362 }
1363 }
1364 }
1365}
1366
d593e329 1367static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1368 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1369 int y;
1370 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1371 for(y=0; y<height; y+=4)
1372 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
f9e6ebf7
LM
1373}
1374
d593e329 1375static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1376 const int w= b->width;
1377 const int h= b->height;
1378 int x, y;
1379
791e7b83 1380 if(1){
791e7b83 1381 int run=0;
a8d73e56 1382 int runs[w*h];
791e7b83 1383 int run_index=0;
b44985ba 1384 int max_index;
115329f1 1385
791e7b83
MN
1386 for(y=0; y<h; y++){
1387 for(x=0; x<w; x++){
78486403 1388 int v, p=0;
6b2f6646 1389 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1390 v= src[x + y*stride];
791e7b83
MN
1391
1392 if(y){
a8d73e56 1393 t= src[x + (y-1)*stride];
791e7b83 1394 if(x){
a8d73e56 1395 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1396 }
1397 if(x + 1 < w){
a8d73e56 1398 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1399 }
1400 }
1401 if(x){
a8d73e56 1402 l= src[x - 1 + y*stride];
6b2f6646
MN
1403 /*if(x > 1){
1404 if(orientation==1) ll= src[y + (x-2)*stride];
1405 else ll= src[x - 2 + y*stride];
791e7b83
MN
1406 }*/
1407 }
78486403 1408 if(parent){
a8d73e56
MN
1409 int px= x>>1;
1410 int py= y>>1;
115329f1 1411 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1412 p= parent[px + py*2*stride];
1413 }
1414 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1415 if(v){
1416 runs[run_index++]= run;
1417 run=0;
1418 }else{
1419 run++;
1420 }
1421 }
1422 }
1423 }
b44985ba 1424 max_index= run_index;
791e7b83
MN
1425 runs[run_index++]= run;
1426 run_index=0;
1427 run= runs[run_index++];
1428
b44985ba
MN
1429 put_symbol2(&s->c, b->state[30], max_index, 0);
1430 if(run_index <= max_index)
1431 put_symbol2(&s->c, b->state[1], run, 3);
115329f1 1432
791e7b83 1433 for(y=0; y<h; y++){
d06c75a8 1434 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1435 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1436 return -1;
1437 }
791e7b83 1438 for(x=0; x<w; x++){
78486403 1439 int v, p=0;
6b2f6646 1440 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1441 v= src[x + y*stride];
791e7b83
MN
1442
1443 if(y){
a8d73e56 1444 t= src[x + (y-1)*stride];
791e7b83 1445 if(x){
a8d73e56 1446 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1447 }
1448 if(x + 1 < w){
a8d73e56 1449 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1450 }
1451 }
1452 if(x){
a8d73e56 1453 l= src[x - 1 + y*stride];
6b2f6646
MN
1454 /*if(x > 1){
1455 if(orientation==1) ll= src[y + (x-2)*stride];
1456 else ll= src[x - 2 + y*stride];
791e7b83
MN
1457 }*/
1458 }
78486403 1459 if(parent){
a8d73e56
MN
1460 int px= x>>1;
1461 int py= y>>1;
115329f1 1462 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1463 p= parent[px + py*2*stride];
1464 }
1465 if(/*ll|*/l|lt|t|rt|p){
c26abfa5 1466 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
6b2f6646 1467
28869757 1468 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1469 }else{
1470 if(!run){
1471 run= runs[run_index++];
4f4e9633 1472
b44985ba
MN
1473 if(run_index <= max_index)
1474 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1475 assert(v);
1476 }else{
1477 run--;
1478 assert(!v);
1479 }
1480 }
1481 if(v){
c26abfa5
DB
1482 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1483 int l2= 2*FFABS(l) + (l<0);
1484 int t2= 2*FFABS(t) + (t<0);
6b2f6646 1485
c26abfa5 1486 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
538a3841 1487 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1488 }
1489 }
1490 }
791e7b83 1491 }
0ecca7a4 1492 return 0;
791e7b83
MN
1493}
1494
d593e329 1495static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1496// encode_subband_qtree(s, b, src, parent, stride, orientation);
1497// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1498 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1499// encode_subband_dzr(s, b, src, parent, stride, orientation);
1500}
1501
a0d1931c 1502static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1503 const int w= b->width;
1504 const int h= b->height;
1505 int x,y;
115329f1 1506
791e7b83 1507 if(1){
b44985ba 1508 int run, runs;
cbb1d2b1
MN
1509 x_and_coeff *xc= b->x_coeff;
1510 x_and_coeff *prev_xc= NULL;
1511 x_and_coeff *prev2_xc= xc;
1512 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1513 x_and_coeff *prev_parent_xc= parent_xc;
791e7b83 1514
b44985ba
MN
1515 runs= get_symbol2(&s->c, b->state[30], 0);
1516 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1517 else run= INT_MAX;
1518
791e7b83 1519 for(y=0; y<h; y++){
0cea8a03
MN
1520 int v=0;
1521 int lt=0, t=0, rt=0;
1522
cbb1d2b1
MN
1523 if(y && prev_xc->x == 0){
1524 rt= prev_xc->coeff;
0cea8a03 1525 }
791e7b83 1526 for(x=0; x<w; x++){
0cea8a03
MN
1527 int p=0;
1528 const int l= v;
115329f1 1529
0cea8a03 1530 lt= t; t= rt;
791e7b83 1531
ff765159 1532 if(y){
cbb1d2b1
MN
1533 if(prev_xc->x <= x)
1534 prev_xc++;
1535 if(prev_xc->x == x + 1)
1536 rt= prev_xc->coeff;
ff765159
MN
1537 else
1538 rt=0;
1539 }
cbb1d2b1
MN
1540 if(parent_xc){
1541 if(x>>1 > parent_xc->x){
1542 parent_xc++;
7b49c309 1543 }
cbb1d2b1
MN
1544 if(x>>1 == parent_xc->x){
1545 p= parent_xc->coeff;
ff765159 1546 }
78486403
MN
1547 }
1548 if(/*ll|*/l|lt|t|rt|p){
c26abfa5 1549 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1550
28869757 1551 v=get_rac(&s->c, &b->state[0][context]);
3c096ac7
MN
1552 if(v){
1553 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1554 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 1555
cbb1d2b1
MN
1556 xc->x=x;
1557 (xc++)->coeff= v;
3c096ac7 1558 }
791e7b83
MN
1559 }else{
1560 if(!run){
b44985ba
MN
1561 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1562 else run= INT_MAX;
3c096ac7
MN
1563 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1564 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
115329f1 1565
cbb1d2b1
MN
1566 xc->x=x;
1567 (xc++)->coeff= v;
791e7b83 1568 }else{
99cd59e5 1569 int max_run;
791e7b83
MN
1570 run--;
1571 v=0;
3c1adccd 1572
cbb1d2b1 1573 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
99cd59e5 1574 else max_run= FFMIN(run, w-x-1);
cbb1d2b1
MN
1575 if(parent_xc)
1576 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
99cd59e5
MN
1577 x+= max_run;
1578 run-= max_run;
791e7b83
MN
1579 }
1580 }
7b49c309 1581 }
cbb1d2b1
MN
1582 (xc++)->x= w+1; //end marker
1583 prev_xc= prev2_xc;
1584 prev2_xc= xc;
115329f1 1585
cbb1d2b1 1586 if(parent_xc){
7b49c309 1587 if(y&1){
cbb1d2b1
MN
1588 while(parent_xc->x != parent->width+1)
1589 parent_xc++;
1590 parent_xc++;
1591 prev_parent_xc= parent_xc;
7b49c309 1592 }else{
cbb1d2b1 1593 parent_xc= prev_parent_xc;
791e7b83
MN
1594 }
1595 }
1596 }
a0d1931c 1597
cbb1d2b1 1598 (xc++)->x= w+1; //end marker
a0d1931c
Y
1599 }
1600}
1601
1602static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1603 const int w= b->width;
62ab0b78 1604 int y;
f66e4f5f 1605 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
c97de57c 1606 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1607 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1608 int new_index = 0;
115329f1 1609
a0d1931c
Y
1610 START_TIMER
1611
d593e329 1612 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
a0d1931c
Y
1613 qadd= 0;
1614 qmul= 1<<QEXPSHIFT;
1615 }
1616
1617 /* If we are on the second or later slice, restore our index. */
1618 if (start_y != 0)
1619 new_index = save_state[0];
1620
115329f1 1621
a0d1931c
Y
1622 for(y=start_y; y<h; y++){
1623 int x = 0;
1624 int v;
d593e329
MN
1625 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1626 memset(line, 0, b->width*sizeof(IDWTELEM));
a0d1931c
Y
1627 v = b->x_coeff[new_index].coeff;
1628 x = b->x_coeff[new_index++].x;
1629 while(x < w)
1630 {
538a3841
MN
1631 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1632 register int u= -(v&1);
1633 line[x] = (t^u) - u;
1634
a0d1931c
Y
1635 v = b->x_coeff[new_index].coeff;
1636 x = b->x_coeff[new_index++].x;
1637 }
791e7b83 1638 }
a0d1931c
Y
1639 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1640 STOP_TIMER("decode_subband")
1641 }
115329f1 1642
a0d1931c
Y
1643 /* Save our variables for the next slice. */
1644 save_state[0] = new_index;
115329f1 1645
a0d1931c 1646 return;
791e7b83
MN
1647}
1648
396a5e68 1649static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
791e7b83
MN
1650 int plane_index, level, orientation;
1651
19aa028d 1652 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1653 for(level=0; level<s->spatial_decomposition_count; level++){
1654 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1655 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1656 }
1657 }
1658 }
28869757
MN
1659 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1660 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1661}
1662
1663static int alloc_blocks(SnowContext *s){
1664 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1665 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 1666
155ec6ed
MN
1667 s->b_width = w;
1668 s->b_height= h;
115329f1 1669
155ec6ed
MN
1670 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1671 return 0;
1672}
1673
28869757
MN
1674static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1675 uint8_t *bytestream= d->bytestream;
1676 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1677 *d= *s;
28869757
MN
1678 d->bytestream= bytestream;
1679 d->bytestream_start= bytestream_start;
155ec6ed
MN
1680}
1681
1682//near copy & paste from dsputil, FIXME
1683static int pix_sum(uint8_t * pix, int line_size, int w)
1684{
1685 int s, i, j;
1686
1687 s = 0;
1688 for (i = 0; i < w; i++) {
1689 for (j = 0; j < w; j++) {
1690 s += pix[0];
1691 pix ++;
1692 }
1693 pix += line_size - w;
1694 }
1695 return s;
1696}
1697
1698//near copy & paste from dsputil, FIXME
1699static int pix_norm1(uint8_t * pix, int line_size, int w)
1700{
1701 int s, i, j;
1d503957 1702 uint32_t *sq = ff_squareTbl + 256;
155ec6ed
MN
1703
1704 s = 0;
1705 for (i = 0; i < w; i++) {
1706 for (j = 0; j < w; j ++) {
1707 s += sq[pix[0]];
1708 pix ++;
1709 }
1710 pix += line_size - w;
1711 }
1712 return s;
1713}
1714
8c36eaaa 1715static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
155ec6ed
MN
1716 const int w= s->b_width << s->block_max_depth;
1717 const int rem_depth= s->block_max_depth - level;
1718 const int index= (x + y*w) << rem_depth;
1719 const int block_w= 1<<rem_depth;
1720 BlockNode block;
1721 int i,j;
115329f1 1722
155ec6ed
MN
1723 block.color[0]= l;
1724 block.color[1]= cb;
1725 block.color[2]= cr;
1726 block.mx= mx;
1727 block.my= my;
8c36eaaa 1728 block.ref= ref;
155ec6ed
MN
1729 block.type= type;
1730 block.level= level;
1731
1732 for(j=0; j<block_w; j++){
1733 for(i=0; i<block_w; i++){
1734 s->block[index + i + j*w]= block;
1735 }
1736 }
1737}
1738
1739static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1740 const int offset[3]= {
1741 y*c-> stride + x,
1742 ((y*c->uvstride + x)>>1),
1743 ((y*c->uvstride + x)>>1),
1744 };
1745 int i;
1746 for(i=0; i<3; i++){
1747 c->src[0][i]= src [i];
1748 c->ref[0][i]= ref [i] + offset[i];
1749 }
1750 assert(!ref_index);
1751}
1752
85fc0e75 1753static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
aadcc5ce 1754 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
85fc0e75
LM
1755 if(s->ref_frames == 1){
1756 *mx = mid_pred(left->mx, top->mx, tr->mx);
1757 *my = mid_pred(left->my, top->my, tr->my);
1758 }else{
1759 const int *scale = scale_mv_ref[ref];
6884c36c
PI
1760 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1761 (top ->mx * scale[top ->ref] + 128) >>8,
1762 (tr ->mx * scale[tr ->ref] + 128) >>8);
1763 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1764 (top ->my * scale[top ->ref] + 128) >>8,
1765 (tr ->my * scale[tr ->ref] + 128) >>8);
85fc0e75
LM
1766 }
1767}
1768
155ec6ed
MN
1769//FIXME copy&paste
1770#define P_LEFT P[1]
1771#define P_TOP P[2]
1772#define P_TOPRIGHT P[3]
1773#define P_MEDIAN P[4]
1774#define P_MV1 P[9]
1775#define FLAG_QPEL 1 //must be 1
1776
1777static int encode_q_branch(SnowContext *s, int level, int x, int y){
1778 uint8_t p_buffer[1024];
1779 uint8_t i_buffer[1024];
1780 uint8_t p_state[sizeof(s->block_state)];
1781 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
1782 RangeCoder pc, ic;
1783 uint8_t *pbbak= s->c.bytestream;
1784 uint8_t *pbbak_start= s->c.bytestream_start;
1e6b5700 1785 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
155ec6ed
MN
1786 const int w= s->b_width << s->block_max_depth;
1787 const int h= s->b_height << s->block_max_depth;
1788 const int rem_depth= s->block_max_depth - level;
1789 const int index= (x + y*w) << rem_depth;
1790 const int block_w= 1<<(LOG2_MB_SIZE - level);
155ec6ed
MN
1791 int trx= (x+1)<<rem_depth;
1792 int try= (y+1)<<rem_depth;
aadcc5ce
PI
1793 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1794 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1795 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1796 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1797 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1798 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed
MN
1799 int pl = left->color[0];
1800 int pcb= left->color[1];
1801 int pcr= left->color[2];
85fc0e75 1802 int pmx, pmy;
155ec6ed 1803 int mx=0, my=0;
51d6a3cf 1804 int l,cr,cb;
155ec6ed
MN
1805 const int stride= s->current_picture.linesize[0];
1806 const int uvstride= s->current_picture.linesize[1];
51d6a3cf
MN
1807 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1808 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1809 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
155ec6ed
MN
1810 int P[10][2];
1811 int16_t last_mv[3][2];
1812 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1813 const int shift= 1+qpel;
1814 MotionEstContext *c= &s->m.me;
8c36eaaa 1815 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
1816 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1817 int my_context= av_log2(2*FFABS(left->my - top->my));
155ec6ed 1818 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
8c36eaaa 1819 int ref, best_ref, ref_score, ref_mx, ref_my;
155ec6ed
MN
1820
1821 assert(sizeof(s->block_state) >= 256);
1822 if(s->keyframe){
85fc0e75 1823 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
155ec6ed
MN
1824 return 0;
1825 }
1826
155ec6ed
MN
1827// clip predictors / edge ?
1828
1829 P_LEFT[0]= left->mx;
1830 P_LEFT[1]= left->my;
1831 P_TOP [0]= top->mx;
1832 P_TOP [1]= top->my;
1833 P_TOPRIGHT[0]= tr->mx;
1834 P_TOPRIGHT[1]= tr->my;
115329f1 1835
155ec6ed
MN
1836 last_mv[0][0]= s->block[index].mx;
1837 last_mv[0][1]= s->block[index].my;
1838 last_mv[1][0]= right->mx;
1839 last_mv[1][1]= right->my;
1840 last_mv[2][0]= bottom->mx;
1841 last_mv[2][1]= bottom->my;
115329f1 1842
155ec6ed 1843 s->m.mb_stride=2;
115329f1 1844 s->m.mb_x=
155ec6ed 1845 s->m.mb_y= 0;
e2158da8 1846 c->skip= 0;
155ec6ed 1847
e2158da8
PI
1848 assert(c-> stride == stride);
1849 assert(c->uvstride == uvstride);
115329f1 1850
155ec6ed
MN
1851 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1852 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1853 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1854 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
115329f1 1855
ff158dc9
MN
1856 c->xmin = - x*block_w - 16+2;
1857 c->ymin = - y*block_w - 16+2;
1858 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1859 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
1860
1861 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
115329f1 1862 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
155ec6ed
MN
1863 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1864 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1865 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1866 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1867 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1868
1869 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1870 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1871
1872 if (!y) {
1873 c->pred_x= P_LEFT[0];
1874 c->pred_y= P_LEFT[1];
1875 } else {
1876 c->pred_x = P_MEDIAN[0];
1877 c->pred_y = P_MEDIAN[1];
1878 }
1879
8c36eaaa
LM
1880 score= INT_MAX;
1881 best_ref= 0;
1882 for(ref=0; ref<s->ref_frames; ref++){
1883 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1884
1885 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1886 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
155ec6ed 1887
8c36eaaa
LM
1888 assert(ref_mx >= c->xmin);
1889 assert(ref_mx <= c->xmax);
1890 assert(ref_my >= c->ymin);
1891 assert(ref_my <= c->ymax);
115329f1 1892
e2158da8 1893 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
8c36eaaa
LM
1894 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1895 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1896 if(s->ref_mvs[ref]){
1897 s->ref_mvs[ref][index][0]= ref_mx;
1898 s->ref_mvs[ref][index][1]= ref_my;
1899 s->ref_scores[ref][index]= ref_score;
1900 }
1901 if(score > ref_score){
1902 score= ref_score;
1903 best_ref= ref;
1904 mx= ref_mx;
1905 my= ref_my;
1906 }
1907 }
755bfeab 1908 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
115329f1 1909
155ec6ed 1910 // subpel search
61d49d12 1911 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
155ec6ed 1912 pc= s->c;
28869757
MN
1913 pc.bytestream_start=
1914 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1915 memcpy(p_state, s->block_state, sizeof(s->block_state));
1916
1917 if(level!=s->block_max_depth)
28869757
MN
1918 put_rac(&pc, &p_state[4 + s_context], 1);
1919 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
8c36eaaa
LM
1920 if(s->ref_frames > 1)
1921 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
85fc0e75 1922 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
8c36eaaa
LM
1923 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1924 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
28869757 1925 p_len= pc.bytestream - pc.bytestream_start;
1e6b5700 1926 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1927
1928 block_s= block_w*block_w;
51d6a3cf 1929 sum = pix_sum(current_data[0], stride, block_w);
155ec6ed 1930 l= (sum + block_s/2)/block_s;
51d6a3cf 1931 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
115329f1 1932
155ec6ed 1933 block_s= block_w*block_w>>2;
51d6a3cf 1934 sum = pix_sum(current_data[1], uvstride, block_w>>1);
155ec6ed
MN
1935 cb= (sum + block_s/2)/block_s;
1936// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
51d6a3cf 1937 sum = pix_sum(current_data[2], uvstride, block_w>>1);
155ec6ed
MN
1938 cr= (sum + block_s/2)/block_s;
1939// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1940
1941 ic= s->c;
28869757
MN
1942 ic.bytestream_start=
1943 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1944 memcpy(i_state, s->block_state, sizeof(s->block_state));
1945 if(level!=s->block_max_depth)
28869757
MN
1946 put_rac(&ic, &i_state[4 + s_context], 1);
1947 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
1948 put_symbol(&ic, &i_state[32], l-pl , 1);
1949 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1950 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757 1951 i_len= ic.bytestream - ic.bytestream_start;
1e6b5700 1952 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1953
1954// assert(score==256*256*256*64-1);
1955 assert(iscore < 255*255*256 + s->lambda2*10);
1956 assert(iscore >= 0);
1957 assert(l>=0 && l<=255);
1958 assert(pl>=0 && pl<=255);
1959
1960 if(level==0){
1961 int varc= iscore >> 8;
1962 int vard= score >> 8;
1963 if (vard <= 64 || vard < varc)
1964 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1965 else
1966 c->scene_change_score+= s->m.qscale;
1967 }
115329f1 1968
155ec6ed 1969 if(level!=s->block_max_depth){
28869757 1970 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
1971 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1972 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1973 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1974 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1975 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
115329f1 1976
155ec6ed
MN
1977 if(score2 < score && score2 < iscore)
1978 return score2;
1979 }
115329f1 1980
155ec6ed 1981 if(iscore < score){
85fc0e75 1982 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
28869757 1983 memcpy(pbbak, i_buffer, i_len);
155ec6ed 1984 s->c= ic;
28869757
MN
1985 s->c.bytestream_start= pbbak_start;
1986 s->c.bytestream= pbbak + i_len;
8c36eaaa 1987 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
155ec6ed
MN
1988 memcpy(s->block_state, i_state, sizeof(s->block_state));
1989 return iscore;
1990 }else{
28869757 1991 memcpy(pbbak, p_buffer, p_len);
155ec6ed 1992 s->c= pc;
28869757
MN
1993 s->c.bytestream_start= pbbak_start;
1994 s->c.bytestream= pbbak + p_len;
8c36eaaa 1995 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
155ec6ed
MN
1996 memcpy(s->block_state, p_state, sizeof(s->block_state));
1997 return score;
1998 }
1999}
2000
849f1035 2001static av_always_inline int same_block(BlockNode *a, BlockNode *b){
51d6a3cf
MN
2002 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2003 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2004 }else{
8c36eaaa 2005 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
51d6a3cf
MN
2006 }
2007}
2008
2009static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2010 const int w= s->b_width << s->block_max_depth;
2011 const int rem_depth= s->block_max_depth - level;
2012 const int index= (x + y*w) << rem_depth;
2013 int trx= (x+1)<<rem_depth;
2014 BlockNode *b= &s->block[index];
aadcc5ce
PI
2015 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2016 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2017 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2018 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
51d6a3cf
MN
2019 int pl = left->color[0];
2020 int pcb= left->color[1];
2021 int pcr= left->color[2];
85fc0e75 2022 int pmx, pmy;
8c36eaaa 2023 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
2024 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2025 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
51d6a3cf
MN
2026 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2027
2028 if(s->keyframe){
85fc0e75 2029 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
51d6a3cf
MN
2030 return;
2031 }
2032
2033 if(level!=s->block_max_depth){
2034 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
d6f41eed
MN
2035 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2036 }else{
51d6a3cf
MN
2037 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2038 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2039 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2040 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2041 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2042 return;
51d6a3cf
MN
2043 }
2044 }
2045 if(b->type & BLOCK_INTRA){
85fc0e75 2046 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
51d6a3cf
MN
2047 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2048 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2049 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2050 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
8c36eaaa 2051 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
51d6a3cf 2052 }else{
85fc0e75 2053 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
51d6a3cf 2054 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
8c36eaaa
LM
2055 if(s->ref_frames > 1)
2056 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
51d6a3cf
MN
2057 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2058 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
8c36eaaa 2059 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
51d6a3cf
MN
2060 }
2061}
2062
155ec6ed
MN
2063static void decode_q_branch(SnowContext *s, int level, int x, int y){
2064 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2065 const int rem_depth= s->block_max_depth - level;
2066 const int index= (x + y*w) << rem_depth;
155ec6ed 2067 int trx= (x+1)<<rem_depth;
aadcc5ce
PI
2068 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2069 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2070 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2071 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
155ec6ed 2072 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 2073
155ec6ed 2074 if(s->keyframe){
8c36eaaa 2075 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
155ec6ed
MN
2076 return;
2077 }
2078
28869757 2079 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1e90b34f 2080 int type, mx, my;
155ec6ed
MN
2081 int l = left->color[0];
2082 int cb= left->color[1];
2083 int cr= left->color[2];
8c36eaaa
LM
2084 int ref = 0;
2085 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
c26abfa5
DB
2086 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2087 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
115329f1 2088
28869757 2089 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2090
2091 if(type){
85fc0e75 2092 pred_mv(s, &mx, &my, 0, left, top, tr);
155ec6ed
MN
2093 l += get_symbol(&s->c, &s->block_state[32], 1);
2094 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2095 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2096 }else{
8c36eaaa
LM
2097 if(s->ref_frames > 1)
2098 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
85fc0e75 2099 pred_mv(s, &mx, &my, ref, left, top, tr);
8c36eaaa
LM
2100 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2101 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
155ec6ed 2102 }
8c36eaaa 2103 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
155ec6ed
MN
2104 }else{
2105 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2106 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2107 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2108 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2109 }
2110}
2111
74e6a8aa 2112static void encode_blocks(SnowContext *s, int search){
155ec6ed
MN
2113 int x, y;
2114 int w= s->b_width;
2115 int h= s->b_height;
2116
74e6a8aa 2117 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
51d6a3cf
MN
2118 iterative_me(s);
2119
155ec6ed 2120 for(y=0; y<h; y++){
d06c75a8 2121 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2122 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2123 return;
2124 }
155ec6ed 2125 for(x=0; x<w; x++){
74e6a8aa 2126 if(s->avctx->me_method == ME_ITER || !search)
51d6a3cf
MN
2127 encode_q_branch2(s, 0, x, y);
2128 else
2129 encode_q_branch (s, 0, x, y);
155ec6ed
MN
2130 }
2131 }
2132}
2133
2134static void decode_blocks(SnowContext *s){
2135 int x, y;
2136 int w= s->b_width;
2137 int h= s->b_height;
2138
2139 for(y=0; y<h; y++){
2140 for(x=0; x<w; x++){
2141 decode_q_branch(s, 0, x, y);
2142 }
2143 }
791e7b83
MN
2144}
2145
bad700e3 2146static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
791e7b83 2147 int x, y;
3924dac4 2148START_TIMER
71dce72b 2149 for(y=0; y < b_h+HTAPS-1; y++){
791e7b83 2150 for(x=0; x < b_w; x++){
71dce72b
MN
2151 int a_2=src[x + HTAPS/2-5];
2152 int a_1=src[x + HTAPS/2-4];
2153 int a0= src[x + HTAPS/2-3];
2154 int a1= src[x + HTAPS/2-2];
2155 int a2= src[x + HTAPS/2-1];
2156 int a3= src[x + HTAPS/2+0];
2157 int a4= src[x + HTAPS/2+1];
2158 int a5= src[x + HTAPS/2+2];
2159 int a6= src[x + HTAPS/2+3];
2160 int a7= src[x + HTAPS/2+4];
791e7b83 2161// int am= 9*(a1+a2) - (a0+a3);
2b48d8ba 2162#if HTAPS==6
791e7b83 2163 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2b48d8ba
MN
2164#else
2165 int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
2166#endif
791e7b83
MN
2167// int am= 18*(a2+a3) - 2*(a1+a4);
2168// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2169// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2170
2171// if(b_w==16) am= 8*(a1+a2);
2172
8c2515bb
Y
2173 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2174 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
115329f1 2175
8c2515bb
Y
2176 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2177 if(am&(~255)) am= ~(am>>31);
115329f1 2178
8c2515bb 2179 tmp[x] = am;
791e7b83
MN
2180
2181/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2182 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2183 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2184 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2185 }
3924dac4
MN
2186 tmp += stride;
2187 src += stride;
791e7b83 2188 }
71dce72b 2189 tmp -= (b_h+HTAPS-1)*stride;
115329f1 2190
791e7b83
MN
2191 for(y=0; y < b_h; y++){
2192 for(x=0; x < b_w; x++){
71dce72b
MN
2193 int a_2=tmp[x + (HTAPS/2-5)*stride];
2194 int a_1=tmp[x + (HTAPS/2-4)*stride];
2195 int a0= tmp[x + (HTAPS/2-3)*stride];
2196 int a1= tmp[x + (HTAPS/2-2)*stride];
2197 int a2= tmp[x + (HTAPS/2-1)*stride];
2198 int a3= tmp[x + (HTAPS/2+0)*stride];
2199 int a4= tmp[x + (HTAPS/2+1)*stride];
2200 int a5= tmp[x + (HTAPS/2+2)*stride];
2201 int a6= tmp[x + (HTAPS/2+3)*stride];
2202 int a7= tmp[x + (HTAPS/2+4)*stride];
2b48d8ba 2203#if HTAPS==6
791e7b83 2204 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2b48d8ba
MN
2205#else
2206 int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
2207#endif
791e7b83
MN
2208// int am= 18*(a2+a3) - 2*(a1+a4);
2209/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2210 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
115329f1 2211
791e7b83
MN
2212// if(b_w==16) am= 8*(a1+a2);
2213
8c2515bb
Y
2214 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2215 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83 2216
8c2515bb 2217 if(am&(~255)) am= ~(am>>31);
115329f1 2218
8c2515bb 2219 dst[x] = am;
791e7b83
MN
2220/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2221 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2222 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2223 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2224 }
3924dac4
MN
2225 dst += stride;
2226 tmp += stride;
791e7b83 2227 }
3924dac4 2228STOP_TIMER("mc_block")
791e7b83
MN
2229}
2230
791e7b83 2231#define mca(dx,dy,b_w)\
bad700e3 2232static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
71dce72b 2233 uint8_t tmp[stride*(b_w+HTAPS-1)];\
791e7b83 2234 assert(h==b_w);\
71dce72b 2235 mc_block(dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
791e7b83
MN
2236}
2237
2238mca( 0, 0,16)
2239mca( 8, 0,16)
2240mca( 0, 8,16)
2241mca( 8, 8,16)
d92b5807
MN
2242mca( 0, 0,8)
2243mca( 8, 0,8)
2244mca( 0, 8,8)
2245mca( 8, 8,8)
791e7b83 2246
8c36eaaa 2247static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 2248 if(block->type & BLOCK_INTRA){
ff158dc9 2249 int x, y;
2692ceab
MN
2250 const int color = block->color[plane_index];
2251 const int color4= color*0x01010101;
1015631b
LM
2252 if(b_w==32){
2253 for(y=0; y < b_h; y++){
2254 *(uint32_t*)&dst[0 + y*stride]= color4;
2255 *(uint32_t*)&dst[4 + y*stride]= color4;
2256 *(uint32_t*)&dst[8 + y*stride]= color4;
2257 *(uint32_t*)&dst[12+ y*stride]= color4;
2258 *(uint32_t*)&dst[16+ y*stride]= color4;
2259 *(uint32_t*)&dst[20+ y*stride]= color4;
2260 *(uint32_t*)&dst[24+ y*stride]= color4;
2261 *(uint32_t*)&dst[28+ y*stride]= color4;
2262 }
2263 }else if(b_w==16){
2692ceab
MN
2264 for(y=0; y < b_h; y++){
2265 *(uint32_t*)&dst[0 + y*stride]= color4;
2266 *(uint32_t*)&dst[4 + y*stride]= color4;
2267 *(uint32_t*)&dst[8 + y*stride]= color4;
2268 *(uint32_t*)&dst[12+ y*stride]= color4;
2269 }
2270 }else if(b_w==8){
2271 for(y=0; y < b_h; y++){
2272 *(uint32_t*)&dst[0 + y*stride]= color4;
2273 *(uint32_t*)&dst[4 + y*stride]= color4;
2274 }
2275 }else if(b_w==4){
2276 for(y=0; y < b_h; y++){
2277 *(uint32_t*)&dst[0 + y*stride]= color4;
2278 }
2279 }else{
2280 for(y=0; y < b_h; y++){
2281 for(x=0; x < b_w; x++){
2282 dst[x + y*stride]= color;
2283 }
ff158dc9
MN
2284 }
2285 }
2286 }else{
8c36eaaa 2287 uint8_t *src= s->last_picture[block->ref].data[plane_index];
ff158dc9
MN
2288 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2289 int mx= block->mx*scale;
2290 int my= block->my*scale;
ec697587
MN
2291 const int dx= mx&15;
2292 const int dy= my&15;
80e44bc3 2293 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
71dce72b
MN
2294 sx += (mx>>4) - (HTAPS/2-1);
2295 sy += (my>>4) - (HTAPS/2-1);
ff158dc9 2296 src += sx + sy*stride;
71dce72b
MN
2297 if( (unsigned)sx >= w - b_w - (HTAPS-2)
2298 || (unsigned)sy >= h - b_h - (HTAPS-2)){
2299 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS-1, b_h+HTAPS-1, sx, sy, w, h);
ff158dc9
MN
2300 src= tmp + MB_SIZE;
2301 }
87f20c2f
MN
2302// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2303// assert(!(b_w&(b_w-1)));
2692ceab 2304 assert(b_w>1 && b_h>1);
1015631b 2305 assert(tab_index>=0 && tab_index<4 || b_w==32);
71dce72b 2306 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || HTAPS != 6)
ec697587 2307 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
1015631b
LM
2308 else if(b_w==32){
2309 int y;
2310 for(y=0; y<b_h; y+=16){
2311 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2312 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2313 }
2314 }else if(b_w==b_h)
80e44bc3 2315 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2692ceab 2316 else if(b_w==2*b_h){
80e44bc3
MN
2317 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2318 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2692ceab
MN
2319 }else{
2320 assert(2*b_w==b_h);
80e44bc3
MN
2321 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2322 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2692ceab 2323 }
ff158dc9
MN
2324 }
2325}
2326
9dd6c804 2327void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
059715a4
RE
2328 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2329 int y, x;
d593e329 2330 IDWTELEM * dst;
059715a4 2331 for(y=0; y<b_h; y++){
19032450 2332 //FIXME ugly misuse of obmc_stride
9dd6c804
PI
2333 const uint8_t *obmc1= obmc + y*obmc_stride;
2334 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2335 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2336 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
059715a4
RE
2337 dst = slice_buffer_get_line(sb, src_y + y);
2338 for(x=0; x<b_w; x++){
2339 int v= obmc1[x] * block[3][x + y*src_stride]
2340 +obmc2[x] * block[2][x + y*src_stride]
2341 +obmc3[x] * block[1][x + y*src_stride]
2342 +obmc4[x] * block[0][x + y*src_stride];
2343
2344 v <<= 8 - LOG2_OBMC_MAX;
2345 if(FRAC_BITS != 8){
059715a4
RE
2346 v >>= 8 - FRAC_BITS;
2347 }
2348 if(add){
2349 v += dst[x + src_x];
2350 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2351 if(v&(~255)) v= ~(v>>31);
2352 dst8[x + y*src_stride] = v;
2353 }else{
2354 dst[x + src_x] -= v;
2355 }
2356 }
2357 }
2358}
2359
ff158dc9 2360//FIXME name clenup (b_w, block_w, b_width stuff)
d593e329 2361static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
a0d1931c
Y
2362 const int b_width = s->b_width << s->block_max_depth;
2363 const int b_height= s->b_height << s->block_max_depth;
2364 const int b_stride= b_width;
2365 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2366 BlockNode *rt= lt+1;
2367 BlockNode *lb= lt+b_stride;
2368 BlockNode *rb= lb+1;
115329f1 2369 uint8_t *block[4];
cc884a35
MN
2370 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2371 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2372 uint8_t *ptmp;
a0d1931c
Y
2373 int x,y;
2374
2375 if(b_x<0){
2376 lt= rt;
2377 lb= rb;
2378 }else if(b_x + 1 >= b_width){
2379 rt= lt;
2380 rb= lb;
2381 }
2382 if(b_y<0){
2383 lt= lb;
2384 rt= rb;
2385 }else if(b_y + 1 >= b_height){
2386 lb= lt;
2387 rb= rt;
2388 }
115329f1 2389
a0d1931c
Y
2390 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2391 obmc -= src_x;
2392 b_w += src_x;
f7e89c73 2393 if(!sliced && !offset_dst)
1015631b 2394 dst -= src_x;
ff158dc9
MN
2395 src_x=0;
2396 }else if(src_x + b_w > w){
2397 b_w = w - src_x;
2398 }
2399 if(src_y<0){
2400 obmc -= src_y*obmc_stride;
2401 b_h += src_y;
f7e89c73 2402 if(!sliced && !offset_dst)
1015631b 2403 dst -= src_y*dst_stride;
ff158dc9
MN
2404 src_y=0;
2405 }else if(src_y + b_h> h){
2406 b_h = h - src_y;
791e7b83 2407 }
115329f1 2408
ff158dc9 2409 if(b_w<=0 || b_h<=0) return;
155ec6ed 2410
cc884a35 2411assert(src_stride > 2*MB_SIZE + 5);
f7e89c73 2412 if(!sliced && offset_dst)
1015631b 2413 dst += src_x + src_y*dst_stride;
715a97f0 2414 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2415// src += src_x + src_y*src_stride;
2416
cc884a35
MN
2417 ptmp= tmp + 3*tmp_step;
2418 block[0]= ptmp;
2419 ptmp+=tmp_step;
8c36eaaa 2420 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
2421
2422 if(same_block(lt, rt)){
2423 block[1]= block[0];
791e7b83 2424 }else{
cc884a35
MN
2425 block[1]= ptmp;
2426 ptmp+=tmp_step;
8c36eaaa 2427 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
ff158dc9 2428 }
115329f1 2429
ff158dc9
MN
2430 if(same_block(lt, lb)){
2431 block[2]= block[0];
2432 }else if(same_block(rt, lb)){
2433 block[2]= block[1];
2434 }else{
cc884a35
MN
2435 block[2]= ptmp;
2436 ptmp+=tmp_step;
8c36eaaa 2437 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
ff158dc9 2438 }
791e7b83 2439
ff158dc9
MN
2440 if(same_block(lt, rb) ){
2441 block[3]= block[0];
2442 }else if(same_block(rt, rb)){
2443 block[3]= block[1];
2444 }else if(same_block(lb, rb)){
2445 block[3]= block[2];
2446 }else{
cc884a35 2447 block[3]= ptmp;
8c36eaaa 2448 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
ff158dc9
MN
2449 }
2450#if 0
2451 for(y=0; y<b_h; y++){
2452 for(x=0; x<b_w; x++){
2453 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2454 if(add) dst[x + y*dst_stride] += v;
2455 else dst[x + y*dst_stride] -= v;
2456 }
2457 }
2458 for(y=0; y<b_h; y++){
2459 uint8_t *obmc2= obmc + (obmc_stride>>1);
2460 for(x=0; x<b_w; x++){
2461 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2462 if(add) dst[x + y*dst_stride] += v;
2463 else dst[x + y*dst_stride] -= v;
2464 }
2465 }
2466 for(y=0; y<b_h; y++){
2467 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2468 for(x=0; x<b_w; x++){
2469 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2470 if(add) dst[x + y*dst_stride] += v;
2471 else dst[x + y*dst_stride] -= v;
2472 }
2473 }
2474 for(y=0; y<b_h; y++){
2475 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2476 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2477 for(x=0; x<b_w; x++){
2478 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2479 if(add) dst[x + y*dst_stride] += v;
2480 else dst[x + y*dst_stride] -= v;
2481 }
2482 }
2483#else
f7e89c73
LM
2484 if(sliced){
2485 START_TIMER
2486
2487 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2488 STOP_TIMER("inner_add_yblock")
2489 }else
ff158dc9 2490 for(y=0; y<b_h; y++){
19032450 2491 //FIXME ugly misuse of obmc_stride
9dd6c804
PI
2492 const uint8_t *obmc1= obmc + y*obmc_stride;
2493 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2494 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2495 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
ff158dc9
MN
2496 for(x=0; x<b_w; x++){
2497 int v= obmc1[x] * block[3][x + y*src_stride]
2498 +obmc2[x] * block[2][x + y*src_stride]
2499 +obmc3[x] * block[1][x + y*src_stride]
2500 +obmc4[x] * block[0][x + y*src_stride];
115329f1 2501
715a97f0 2502 v <<= 8 - LOG2_OBMC_MAX;
034aff03 2503 if(FRAC_BITS != 8){
034aff03
MN
2504 v >>= 8 - FRAC_BITS;
2505 }
715a97f0
MN
2506 if(add){
2507 v += dst[x + y*dst_stride];
2508 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2509 if(v&(~255)) v= ~(v>>31);
2510 dst8[x + y*src_stride] = v;
2511 }else{
2512 dst[x + y*dst_stride] -= v;
2513 }
791e7b83
MN
2514 }
2515 }
ff158dc9 2516#endif
791e7b83
MN
2517}
2518
d593e329 2519static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
a0d1931c
Y
2520 Plane *p= &s->plane[plane_index];
2521 const int mb_w= s->b_width << s->block_max_depth;
2522 const int mb_h= s->b_height << s->block_max_depth;
2523 int x, y, mb_x;
2524 int block_size = MB_SIZE >> s->block_max_depth;
2525 int block_w = plane_index ? block_size/2 : block_size;
2526 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2527 int obmc_stride= plane_index ? block_size : 2*block_size;
2528 int ref_stride= s->current_picture.linesize[plane_index];
a0d1931c
Y
2529 uint8_t *dst8= s->current_picture.data[plane_index];
2530 int w= p->width;
2531 int h= p->height;
2532 START_TIMER
115329f1 2533
a0d1931c
Y
2534 if(s->keyframe || (s->avctx->debug&512)){
2535 if(mb_y==mb_h)
2536 return;
2537
2538 if(add){
86e59cc0 2539 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2540 {
2541// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 2542 IDWTELEM * line = sb->line[y];
a0d1931c
Y
2543 for(x=0; x<w; x++)
2544 {
2545// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2546 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2547 v >>= FRAC_BITS;
2548 if(v&(~255)) v= ~(v>>31);
2549 dst8[x + y*ref_stride]= v;
2550 }
2551 }
2552 }else{
86e59cc0 2553 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2554 {
2555// DWTELEM * line = slice_buffer_get_line(sb, y);
d593e329 2556 IDWTELEM * line = sb->line[y];
a0d1931c
Y
2557 for(x=0; x<w; x++)
2558 {
2559 line[x] -= 128 << FRAC_BITS;
2560// buf[x + y*w]-= 128<<FRAC_BITS;
2561 }
2562 }
2563 }
2564
2565 return;
2566 }
115329f1 2567
a0d1931c
Y
2568 for(mb_x=0; mb_x<=mb_w; mb_x++){
2569 START_TIMER
2570
f7e89c73 2571 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
a0d1931c
Y
2572 block_w*mb_x - block_w/2,
2573 block_w*mb_y - block_w/2,
2574 block_w, block_w,
2575 w, h,
2576 w, ref_stride, obmc_stride,
2577 mb_x - 1, mb_y - 1,
f7e89c73 2578 add, 0, plane_index);
115329f1 2579
a0d1931c
Y
2580 STOP_TIMER("add_yblock")
2581 }
115329f1 2582
a0d1931c
Y
2583 STOP_TIMER("predict_slice")
2584}
2585
d593e329 2586static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2587 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2588 const int mb_w= s->b_width << s->block_max_depth;
2589 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2590 int x, y, mb_x;
155ec6ed
MN
2591 int block_size = MB_SIZE >> s->block_max_depth;
2592 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2593 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 2594 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2595 int ref_stride= s->current_picture.linesize[plane_index];
715a97f0 2596 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2597 int w= p->width;
2598 int h= p->height;
fff6d4ea 2599 START_TIMER
115329f1 2600
ff158dc9 2601 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2602 if(mb_y==mb_h)
2603 return;
2604
715a97f0 2605 if(add){
86e59cc0 2606 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2607 for(x=0; x<w; x++){
2608 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2609 v >>= FRAC_BITS;
2610 if(v&(~255)) v= ~(v>>31);
2611 dst8[x + y*ref_stride]= v;
2612 }
2613 }
2614 }else{
86e59cc0 2615 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2616 for(x=0; x<w; x++){
2617 buf[x + y*w]-= 128<<FRAC_BITS;
2618 }
ff158dc9 2619 }
791e7b83 2620 }
ff158dc9
MN
2621
2622 return;
791e7b83 2623 }
115329f1 2624
ff158dc9 2625 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2626 START_TIMER
ff158dc9 2627
f7e89c73 2628 add_yblock(s, 0, NULL, buf, dst8, obmc,
ff158dc9 2629 block_w*mb_x - block_w/2,
791e7b83 2630 block_w*mb_y - block_w/2,
ff158dc9 2631 block_w, block_w,
791e7b83 2632 w, h,
ff158dc9
MN
2633 w, ref_stride, obmc_stride,
2634 mb_x - 1, mb_y - 1,
1015631b 2635 add, 1, plane_index);
115329f1 2636
ff158dc9 2637 STOP_TIMER("add_yblock")
791e7b83 2638 }
115329f1 2639
f9e6ebf7
LM
2640 STOP_TIMER("predict_slice")
2641}
2642
d593e329 2643static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
f9e6ebf7
LM
2644 const int mb_h= s->b_height << s->block_max_depth;
2645 int mb_y;
2646 for(mb_y=0; mb_y<=mb_h; mb_y++)
2647 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2648}
2649
51d6a3cf
MN
2650static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2651 int i, x2, y2;
2652 Plane *p= &s->plane[plane_index];
2653 const int block_size = MB_SIZE >> s->block_max_depth;
2654 const int block_w = plane_index ? block_size/2 : block_size;
2655 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2656 const int obmc_stride= plane_index ? block_size : 2*block_size;
2657 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2658 uint8_t *src= s-> input_picture.data[plane_index];
d593e329 2659 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
51d6a3cf
MN
2660 const int b_stride = s->b_width << s->block_max_depth;
2661 const int w= p->width;
2662 const int h= p->height;
2663 int index= mb_x + mb_y*b_stride;
2664 BlockNode *b= &s->block[index];
2665 BlockNode backup= *b;
2666 int ab=0;
2667 int aa=0;
2668
2669 b->type|= BLOCK_INTRA;
2670 b->color[plane_index]= 0;
d593e329 2671 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
51d6a3cf
MN
2672
2673 for(i=0; i<4; i++){
2674 int mb_x2= mb_x + (i &1) - 1;
2675 int mb_y2= mb_y + (i>>1) - 1;
2676 int x= block_w*mb_x2 + block_w/2;
2677 int y= block_w*mb_y2 + block_w/2;
2678
f7e89c73 2679 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
1015631b 2680 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
51d6a3cf
MN
2681
2682 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2683 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2684 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2685 int obmc_v= obmc[index];
1015631b 2686 int d;
51d6a3cf
MN
2687 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2688 if(x<0) obmc_v += obmc[index + block_w];
2689 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2690 if(x+block_w>w) obmc_v += obmc[index - block_w];
2691 //FIXME precalc this or simplify it somehow else
2692
1015631b
LM
2693 d = -dst[index] + (1<<(FRAC_BITS-1));
2694 dst[index] = d;
2695 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
51d6a3cf
MN
2696 aa += obmc_v * obmc_v; //FIXME precalclate this
2697 }
2698 }
2699 }
2700 *b= backup;
2701
755bfeab 2702 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
51d6a3cf
MN
2703}
2704
b104969f
LM
2705static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2706 const int b_stride = s->b_width << s->block_max_depth;
2707 const int b_height = s->b_height<< s->block_max_depth;
2708 int index= x + y*b_stride;
aadcc5ce
PI
2709 const BlockNode *b = &s->block[index];
2710 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2711 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2712 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2713 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
b104969f 2714 int dmx, dmy;
c26abfa5
DB
2715// int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2716// int my_context= av_log2(2*FFABS(left->my - top->my));
b104969f
LM
2717
2718 if(x<0 || x>=b_stride || y>=b_height)
2719 return 0;
b104969f
LM
2720/*
27211 0 0
272201X 1-2 1
2723001XX 3-6 2-3
27240001XXX 7-14 4-7
272500001XXXX 15-30 8-15
2726*/
2727//FIXME try accurate rate
2728//FIXME intra and inter predictors if surrounding blocks arent the same type
2729 if(b->type & BLOCK_INTRA){
c26abfa5
DB
2730 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2731 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2732 + av_log2(2*FFABS(left->color[2] - b->color[2])));
85fc0e75
LM
2733 }else{
2734 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2735 dmx-= b->mx;
2736 dmy-= b->my;
c26abfa5
DB
2737 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2738 + av_log2(2*FFABS(dmy))
8c36eaaa 2739 + av_log2(2*b->ref));
85fc0e75 2740 }
b104969f
LM
2741}
2742
1015631b 2743static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
51d6a3cf
MN
2744 Plane *p= &s->plane[plane_index];
2745 const int block_size = MB_SIZE >> s->block_max_depth;
2746 const int block_w = plane_index ? block_size/2 : block_size;
51d6a3cf
MN
2747 const int obmc_stride= plane_index ? block_size : 2*block_size;
2748 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2749 uint8_t *dst= s->current_picture.data[plane_index];
1015631b 2750 uint8_t *src= s-> input_picture.data[plane_index];
d593e329 2751 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
1015631b 2752 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
71dce72b 2753 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS-1)];
51d6a3cf
MN
2754 const int b_stride = s->b_width << s->block_max_depth;
2755 const int b_height = s->b_height<< s->block_max_depth;
2756 const int w= p->width;
2757 const int h= p->height;
1015631b 2758 int distortion;
51d6a3cf
MN
2759 int rate= 0;
2760 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
1015631b
LM
2761 int sx= block_w*mb_x - block_w/2;
2762 int sy= block_w*mb_y - block_w/2;
561a18d3
RE
2763 int x0= FFMAX(0,-sx);
2764 int y0= FFMAX(0,-sy);
2765 int x1= FFMIN(block_w*2, w-sx);
2766 int y1= FFMIN(block_w*2, h-sy);
1015631b
LM
2767 int i,x,y;
2768
8c36eaaa 2769 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
1015631b
LM
2770
2771 for(y=y0; y<y1; y++){
2772 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
d593e329 2773 const IDWTELEM *pred1 = pred + y*obmc_stride;
1015631b
LM
2774 uint8_t *cur1 = cur + y*ref_stride;
2775 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2776 for(x=x0; x<x1; x++){
d593e329 2777#if FRAC_BITS >= LOG2_OBMC_MAX
1015631b 2778 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
d593e329
MN
2779#else
2780 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2781#endif
1015631b
LM
2782 v = (v + pred1[x]) >> FRAC_BITS;
2783 if(v&(~255)) v= ~(v>>31);
2784 dst1[x] = v;
51d6a3cf 2785 }
1015631b 2786 }
51d6a3cf 2787
561a18d3
RE
2788 /* copy the regions where obmc[] = (uint8_t)256 */
2789 if(LOG2_OBMC_MAX == 8
2790 && (mb_x == 0 || mb_x == b_stride-1)
2791 && (mb_y == 0 || mb_y == b_height-1)){
2792 if(mb_x == 0)
2793 x1 = block_w;
2794 else
2795 x0 = block_w;
2796 if(mb_y == 0)
2797 y1 = block_w;
2798 else
2799 y0 = block_w;
2800 for(y=y0; y<y1; y++)
2801 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2802 }
2803
1015631b 2804 if(block_w==16){
871371a7
LM
2805 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2806 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2807 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2808 * so improving the score of one block is not strictly guaranteed to
2809 * improve the score of the whole frame, so iterative motion est
2810 * doesn't always converge. */
2811 if(s->avctx->me_cmp == FF_CMP_W97)
486497e0 2812 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
871371a7 2813 else if(s->avctx->me_cmp == FF_CMP_W53)
486497e0 2814 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
871371a7
LM
2815 else{
2816 distortion = 0;
2817 for(i=0; i<4; i++){
2818 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2819 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2820 }
1015631b
LM
2821 }
2822 }else{
2823 assert(block_w==8);
2824 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
51d6a3cf
MN
2825 }
2826
2827 if(plane_index==0){
2828 for(i=0; i<4; i++){
2829/* ..RRr
2830 * .RXx.
2831 * rxx..
2832 */
b104969f
LM
2833 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2834 }
48d1b9a1
LM
2835 if(mb_x == b_stride-2)
2836 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
b104969f
LM
2837 }
2838 return distortion + rate*penalty_factor;
2839}
2840
2841static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2842 int i, y2;
2843 Plane *p= &s->plane[plane_index];
2844 const int block_size = MB_SIZE >> s->block_max_depth;
2845 const int block_w = plane_index ? block_size/2 : block_size;
2846 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2847 const int obmc_stride= plane_index ? block_size : 2*block_size;
2848 const int ref_stride= s->current_picture.linesize[plane_index];
b104969f
LM
2849 uint8_t *dst= s->current_picture.data[plane_index];
2850 uint8_t *src= s-> input_picture.data[plane_index];
d593e329 2851 static const IDWTELEM zero_dst[4096]; //FIXME
b104969f 2852 const int b_stride = s->b_width << s->block_max_depth;
b104969f
LM
2853 const int w= p->width;
2854 const int h= p->height;
2855 int distortion= 0;
2856 int rate= 0;
2857 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2858
2859 for(i=0; i<9; i++){
2860 int mb_x2= mb_x + (i%3) - 1;
2861 int mb_y2= mb_y + (i/3) - 1;
2862 int x= block_w*mb_x2 + block_w/2;
2863 int y= block_w*mb_y2 + block_w/2;
2864
f7e89c73 2865 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
b104969f
LM
2866 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2867
2868 //FIXME find a cleaner/simpler way to skip the outside stuff
2869 for(y2= y; y2<0; y2++)
2870 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2871 for(y2= h; y2<y+block_w; y2++)
2872 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2873 if(x<0){
2874 for(y2= y; y2<y+block_w; y2++)
2875 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
51d6a3cf 2876 }
b104969f
LM
2877 if(x+block_w > w){
2878 for(y2= y; y2<y+block_w; y2++)
2879 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2880 }
2881
2882 assert(block_w== 8 || block_w==16);
2883 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
51d6a3cf
MN
2884 }
2885
b104969f
LM
2886 if(plane_index==0){
2887 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2888 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2889
2890/* ..RRRr
2891 * .RXXx.
2892 * .RXXx.
2893 * rxxx.
2894 */
2895 if(merged)
2896 rate = get_block_bits(s, mb_x, mb_y, 2);
2897 for(i=merged?4:0; i<9; i++){
2898 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2899 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2900 }
2901 }
51d6a3cf
MN
2902 return distortion + rate*penalty_factor;
2903}
2904
849f1035 2905static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
51d6a3cf
MN
2906 const int b_stride= s->b_width << s->block_max_depth;
2907 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2908 BlockNode backup= *block;
2909 int rd, index, value;
2910
2911 assert(mb_x>=0 && mb_y>=0);
735f9f34 2912 assert(mb_x<b_stride);
51d6a3cf
MN
2913
2914 if(intra){
2915 block->color[0] = p[0];
2916 block->color[1] = p[1];
2917 block->color[2] = p[2];
2918 block->type |= BLOCK_INTRA;
2919 }else{
2920 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
8c36eaaa 2921 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
51d6a3cf
MN
2922 if(s->me_cache[index] == value)
2923 return 0;
2924 s->me_cache[index]= value;
2925
2926 block->mx= p[0];
2927 block->my= p[1];
2928 block->type &= ~BLOCK_INTRA;
2929 }
2930
1015631b 2931 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
51d6a3cf
MN
2932
2933//FIXME chroma
2934 if(rd < *best_rd){
2935 *best_rd= rd;
2936 return 1;
2937 }else{
2938 *block= backup;
2939 return 0;
2940 }
2941}
2942
52137f2f 2943/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
849f1035 2944static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
52137f2f 2945 int p[2] = {p0, p1};
fc8c4992 2946 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
52137f2f
FR
2947}
2948
849f1035 2949static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
b104969f
LM
2950 const int b_stride= s->b_width << s->block_max_depth;
2951 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2952 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2953 int rd, index, value;
2954
2955 assert(mb_x>=0 && mb_y>=0);
2956 assert(mb_x<b_stride);
2957 assert(((mb_x|mb_y)&1) == 0);
2958
2959 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
8c36eaaa 2960 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
b104969f
LM
2961 if(s->me_cache[index] == value)
2962 return 0;
2963 s->me_cache[index]= value;
2964
2965 block->mx= p0;
2966 block->my= p1;
8c36eaaa 2967 block->ref= ref;
b104969f
LM
2968 block->type &= ~BLOCK_INTRA;
2969 block[1]= block[b_stride]= block[b_stride+1]= *block;
2970
2971 rd= get_4block_rd(s, mb_x, mb_y, 0);
2972
2973//FIXME chroma
2974 if(rd < *best_rd){
2975 *best_rd= rd;
2976 return 1;
2977 }else{
2978 block[0]= backup[0];
2979 block[1]= backup[1];
2980 block[b_stride]= backup[2];
2981 block[b_stride+1]= backup[3];
2982 return 0;
2983 }
2984}
2985
51d6a3cf
MN
2986static void iterative_me(SnowContext *s){
2987 int pass, mb_x, mb_y;
2988 const int b_width = s->b_width << s->block_max_depth;
2989 const int b_height= s->b_height << s->block_max_depth;
2990 const int b_stride= b_width;
2991 int color[3];
2992
8f8ae495
LM
2993 {
2994 RangeCoder r = s->c;
2995 uint8_t state[sizeof(s->block_state)];
2996 memcpy(state, s->block_state, sizeof(s->block_state));
2997 for(mb_y= 0; mb_y<s->b_height; mb_y++)
2998 for(mb_x= 0; mb_x<s->b_width; mb_x++)
2999 encode_q_branch(s, 0, mb_x, mb_y);
3000 s->c = r;
3001 memcpy(s->block_state, state, sizeof(s->block_state));
3002 }
3003
871371a7 3004 for(pass=0; pass<25; pass++){
51d6a3cf
MN
3005 int change= 0;
3006
3007 for(mb_y= 0; mb_y<b_height; mb_y++){
3008 for(mb_x= 0; mb_x<b_width; mb_x++){
8c36eaaa
LM
3009 int dia_change, i, j, ref;
3010 int best_rd= INT_MAX, ref_rd;
3011 BlockNode backup, ref_b;
51d6a3cf
MN
3012 const int index= mb_x + mb_y * b_stride;
3013 BlockNode *block= &s->block[index];
7f21a9a7
LM
3014 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3015 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3016 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3017 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3018 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3019 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3020 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3021 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
1015631b
LM
3022 const int b_w= (MB_SIZE >> s->block_max_depth);
3023 uint8_t obmc_edged[b_w*2][b_w*2];
51d6a3cf
MN
3024
3025 if(pass && (block->type & BLOCK_OPT))
3026 continue;
3027 block->type |= BLOCK_OPT;
3028
3029 backup= *block;
3030
3031 if(!s->me_cache_generation)
3032 memset(s->me_cache, 0, sizeof(s->me_cache));
3033 s->me_cache_generation += 1<<22;
3034
1015631b
LM
3035 //FIXME precalc
3036 {
3037 int x, y;
3038 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3039 if(mb_x==0)
3040 for(y=0; y<b_w*2; y++)
3041 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3042 if(mb_x==b_stride-1)
3043 for(y=0; y<b_w*2; y++)
3044 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3045 if(mb_y==0){
3046 for(x=0; x<b_w*2; x++)
3047 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3048 for(y=1; y<b_w; y++)
3049 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3050 }
3051 if(mb_y==b_height-1){
3052 for(x=0; x<b_w*2; x++)
3053 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3054 for(y=b_w; y<b_w*2-1; y++)
3055 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3056 }
3057 }
3058
3059 //skip stuff outside the picture
3060 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3061 {
3062 uint8_t *src= s-> input_picture.data[0];
3063 uint8_t *dst= s->current_picture.data[0];
3064 const int stride= s->current_picture.linesize[0];
3065 const int block_w= MB_SIZE >> s->block_max_depth;
3066 const int sx= block_w*mb_x - block_w/2;
3067 const int sy= block_w*mb_y - block_w/2;
3068 const int w= s->plane[0].width;
3069 const int h= s->plane[0].height;
3070 int y;
3071
3072 for(y=sy; y<0; y++)
3073 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3074 for(y=h; y<sy+block_w*2; y++)
3075 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3076 if(sx<0){
3077 for(y=sy; y<sy+block_w*2; y++)
3078 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3079 }
3080 if(sx+block_w*2 > w){
3081 for(y=sy; y<sy+block_w*2; y++)
3082 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3083 }
3084 }
3085
3086 // intra(black) = neighbors' contribution to the current block
3087 for(i=0; i<3; i++)
3088 color[i]= get_dc(s, mb_x, mb_y, i);
3089
755bfeab 3090 // get previous score (cannot be cached due to OBMC)
48d1b9a1
LM
3091 if(pass > 0 && (block->type&BLOCK_INTRA)){
3092 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3093 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3094 }else
fc8c4992 3095 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
48d1b9a1 3096
8c36eaaa
LM
3097 ref_b= *block;
3098 ref_rd= best_rd;
3099 for(ref=0; ref < s->ref_frames; ref++){
3100 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3101 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3102 continue;
3103 block->ref= ref;
3104 best_rd= INT_MAX;
3105
3106 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3107 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
7f21a9a7 3108 if(tb)
8c36eaaa 3109 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
7f21a9a7 3110 if(lb)
8c36eaaa 3111 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
7f21a9a7 3112 if(rb)
8c36eaaa 3113 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
7f21a9a7 3114 if(bb)
8c36eaaa
LM
3115 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3116
3117 /* fullpel ME */
3118 //FIXME avoid subpel interpol / round to nearest integer
3119 do{
3120 dia_change=0;
3121 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3122 for(j=0; j<i; j++){
3123 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3124 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3125 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3126 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3127 }
51d6a3cf 3128 }
8c36eaaa
LM
3129 }while(dia_change);
3130 /* subpel ME */
3131 do{
3132 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3133 dia_change=0;
3134 for(i=0; i<8; i++)
3135 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3136 }while(dia_change);
3137 //FIXME or try the standard 2 pass qpel or similar
3138
3139 mvr[0][0]= block->mx;
3140 mvr[0][1]= block->my;
3141 if(ref_rd > best_rd){
3142 ref_rd= best_rd;
3143 ref_b= *block;
51d6a3cf 3144 }
8c36eaaa
LM
3145 }
3146 best_rd= ref_rd;
3147 *block= ref_b;
13705b69 3148#if 1
1015631b 3149 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
51d6a3cf 3150 //FIXME RD style color selection
13705b69 3151#endif
51d6a3cf 3152 if(!same_block(block, &backup)){
7f21a9a7
LM
3153 if(tb ) tb ->type &= ~BLOCK_OPT;
3154 if(lb ) lb ->type &= ~BLOCK_OPT;
3155 if(rb ) rb ->type &= ~BLOCK_OPT;
3156 if(bb ) bb ->type &= ~BLOCK_OPT;
3157 if(tlb) tlb->type &= ~BLOCK_OPT;
3158 if(trb) trb->type &= ~BLOCK_OPT;
3159 if(blb) blb->type &= ~BLOCK_OPT;
3160 if(brb) brb->type &= ~BLOCK_OPT;
51d6a3cf
MN
3161 change ++;
3162 }
3163 }
3164 }
3165 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3166 if(!change)
3167 break;
3168 }
b104969f
LM
3169
3170 if(s->block_max_depth == 1){
3171 int change= 0;
3172 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3173 for(mb_x= 0; mb_x<b_width; mb_x+=2){
7f21a9a7 3174 int i;
b104969f
LM
3175 int best_rd, init_rd;
3176 const int index= mb_x + mb_y * b_stride;
3177 BlockNode *b[4];
3178
3179 b[0]= &s->block[index];
3180 b[1]= b[0]+1;
3181 b[2]= b[0]+b_stride;
3182 b[3]= b[2]+1;
3183 if(same_block(b[0], b[1]) &&
3184 same_block(b[0], b[2]) &&
3185 same_block(b[0], b[3]))
3186 continue;
3187
3188 if(!s->me_cache_generation)
3189 memset(s->me_cache, 0, sizeof(s->me_cache));
3190 s->me_cache_generation += 1<<22;
3191
3192 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3193
8c36eaaa 3194 //FIXME more multiref search?
b104969f
LM
3195 check_4block_inter(s, mb_x, mb_y,
3196 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
8c36eaaa 3197 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
b104969f
LM
3198
3199 for(i=0; i<4; i++)
3200 if(!(b[i]->type&BLOCK_INTRA))
8c36eaaa 3201 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
b104969f
LM
3202
3203 if(init_rd != best_rd)
3204 change++;
3205 }
3206 }
3207 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3208 }
51d6a3cf
MN
3209}
3210
d593e329 3211static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
791e7b83
MN
3212 const int level= b->level;
3213 const int w= b->width;
3214 const int h= b->height;
f66e4f5f 3215 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
b538791b 3216 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
da66b631 3217 int x,y, thres1, thres2;
62ab0b78 3218// START_TIMER
791e7b83 3219
d593e329
MN
3220 if(s->qlog == LOSSLESS_QLOG){
3221 for(y=0; y<h; y++)
3222 for(x=0; x<w; x++)
3223 dst[x + y*stride]= src[x + y*stride];
3224 return;
3225 }
115329f1 3226
791e7b83 3227 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
3228 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3229 thres2= 2*thres1;
115329f1 3230
791e7b83
MN
3231 if(!bias){
3232 for(y=0; y<h; y++){
3233 for(x=0; x<w; x++){
da66b631 3234 int i= src[x + y*stride];
115329f1 3235
da66b631
MN
3236 if((unsigned)(i+thres1) > thres2){
3237 if(i>=0){
3238 i<<= QEXPSHIFT;
3239 i/= qmul; //FIXME optimize
d593e329 3240 dst[x + y*stride]= i;
da66b631
MN
3241 }else{
3242 i= -i;
3243 i<<= QEXPSHIFT;
3244 i/= qmul; //FIXME optimize
d593e329 3245 dst[x + y*stride]= -i;
da66b631
MN
3246 }
3247 }else
d593e329 3248 dst[x + y*stride]= 0;
791e7b83
MN
3249 }
3250 }
3251 }else{
3252 for(y=0; y<h; y++){
3253 for(x=0; x<w; x++){
115329f1
DB
3254 int i= src[x + y*stride];
3255
da66b631
MN
3256 if((unsigned)(i+thres1) > thres2){
3257 if(i>=0){
3258 i<<= QEXPSHIFT;
3259 i= (i + bias) / qmul; //FIXME optimize
d593e329 3260 dst[x + y*stride]= i;
da66b631
MN
3261 }else{
3262 i= -i;
3263 i<<= QEXPSHIFT;
3264 i= (i + bias) / qmul; //FIXME optimize
d593e329 3265 dst[x + y*stride]= -i;
da66b631
MN
3266 }
3267 }else
d593e329 3268 dst[x + y*stride]= 0;
791e7b83
MN
3269 }
3270 }
3271 }
da66b631
MN
3272 if(level+1 == s->spatial_decomposition_count){
3273// STOP_TIMER("quantize")
3274 }
791e7b83
MN
3275}
3276
d593e329 3277static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
a0d1931c 3278 const int w= b->width;
f66e4f5f 3279 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
c97de57c 3280 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
3281 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3282 int x,y;
3283 START_TIMER
115329f1 3284
a0d1931c 3285 if(s->qlog == LOSSLESS_QLOG) return;
115329f1 3286
66b32bf2 3287 for(y=start_y; y<end_y; y++){
a0d1931c 3288// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
d593e329 3289 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
a0d1931c
Y
3290 for(x=0; x<w; x++){
3291 int i= line[x];
3292 if(i<0){
3293 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3294 }else if(i>0){
3295 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3296 }
3297 }
3298 }
3299 if(w > 200 /*l