Snow: cosmetics
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
5509bffa 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
MN
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
059715a4 22#include "snow.h"
28869757
MN
23
24#include "rangecoder.h"
791e7b83
MN
25
26#include "mpegvideo.h"
27
28#undef NDEBUG
29#include <assert.h>
30
791e7b83
MN
31static const int8_t quant3[256]={
32 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
41-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
48};
49static const int8_t quant3b[256]={
50 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66};
538a3841
MN
67static const int8_t quant3bA[256]={
68 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
69 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84};
791e7b83
MN
85static const int8_t quant5[256]={
86 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
95-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
102};
103static const int8_t quant7[256]={
104 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
107 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
113-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
118-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
119-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
120};
121static const int8_t quant9[256]={
122 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
123 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
124 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
131-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
137-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
138};
139static const int8_t quant11[256]={
140 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
141 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
142 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
143 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
149-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
154-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
155-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
156};
157static const int8_t quant13[256]={
158 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
159 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
160 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
162 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
167-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
171-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
172-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
174};
175
791e7b83
MN
176#if 0 //64*cubic
177static const uint8_t obmc32[1024]={
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
180 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
181 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
182 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
183 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
184 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
185 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
186 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
191 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
192 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
193 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
196 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
197 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
198 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
199 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
200 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
201 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
202 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
203 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
204 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
205 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
206 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
207 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
208 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210//error:0.000022
211};
212static const uint8_t obmc16[256]={
213 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
218 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
219 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
220 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
223 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
224 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
225 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
226 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
227 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
228 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
229//error:0.000033
230};
231#elif 1 // 64*linear
232static const uint8_t obmc32[1024]={
561a18d3
RE
233 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
234 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
235 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
236 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
237 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
238 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
239 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
240 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
241 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
242 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
243 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
244 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
245 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
246 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
247 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
248 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
253 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
254 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
255 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
256 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
257 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
258 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
259 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
260 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
261 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
262 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
263 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
264 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
791e7b83
MN
265 //error:0.000020
266};
267static const uint8_t obmc16[256]={
561a18d3
RE
268 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
269 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
270 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
271 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
272 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
273 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
274 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
275 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
280 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
281 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
282 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
283 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
791e7b83
MN
284//error:0.000015
285};
286#else //64*cos
287static const uint8_t obmc32[1024]={
288 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
289 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
291 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
292 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
293 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
294 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
295 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
296 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
301 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
302 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
303 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
306 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
307 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
308 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
309 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
310 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
311 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
312 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
313 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
314 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
315 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
316 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
317 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
318 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
320//error:0.000022
321};
322static const uint8_t obmc16[256]={
323 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
328 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
329 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
330 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
333 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
334 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
335 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
336 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
337 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
338 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
339//error:0.000022
340};
341#endif
342
155ec6ed
MN
343//linear *64
344static const uint8_t obmc8[64]={
561a18d3
RE
345 4, 12, 20, 28, 28, 20, 12, 4,
346 12, 36, 60, 84, 84, 60, 36, 12,
347 20, 60,100,140,140,100, 60, 20,
348 28, 84,140,196,196,140, 84, 28,
349 28, 84,140,196,196,140, 84, 28,
350 20, 60,100,140,140,100, 60, 20,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 4, 12, 20, 28, 28, 20, 12, 4,
155ec6ed
MN
353//error:0.000000
354};
355
356//linear *64
357static const uint8_t obmc4[16]={
561a18d3
RE
358 16, 48, 48, 16,
359 48,144,144, 48,
360 48,144,144, 48,
361 16, 48, 48, 16,
155ec6ed
MN
362//error:0.000000
363};
364
365static const uint8_t *obmc_tab[4]={
366 obmc32, obmc16, obmc8, obmc4
367};
368
369typedef struct BlockNode{
370 int16_t mx;
371 int16_t my;
8c36eaaa 372 uint8_t ref;
155ec6ed
MN
373 uint8_t color[3];
374 uint8_t type;
375//#define TYPE_SPLIT 1
376#define BLOCK_INTRA 1
51d6a3cf 377#define BLOCK_OPT 2
155ec6ed
MN
378//#define TYPE_NOCOLOR 4
379 uint8_t level; //FIXME merge into type?
380}BlockNode;
381
51d6a3cf
MN
382static const BlockNode null_block= { //FIXME add border maybe
383 .color= {128,128,128},
384 .mx= 0,
385 .my= 0,
8c36eaaa 386 .ref= 0,
51d6a3cf
MN
387 .type= 0,
388 .level= 0,
389};
390
155ec6ed
MN
391#define LOG2_MB_SIZE 4
392#define MB_SIZE (1<<LOG2_MB_SIZE)
393
a0d1931c
Y
394typedef struct x_and_coeff{
395 int16_t x;
538a3841 396 uint16_t coeff;
a0d1931c
Y
397} x_and_coeff;
398
791e7b83
MN
399typedef struct SubBand{
400 int level;
401 int stride;
402 int width;
403 int height;
404 int qlog; ///< log(qscale)/log[2^(1/6)]
405 DWTELEM *buf;
a0d1931c
Y
406 int buf_x_offset;
407 int buf_y_offset;
408 int stride_line; ///< Stride measured in lines, not pixels.
409 x_and_coeff * x_coeff;
791e7b83
MN
410 struct SubBand *parent;
411 uint8_t state[/*7*2*/ 7 + 512][32];
412}SubBand;
413
414typedef struct Plane{
415 int width;
416 int height;
417 SubBand band[MAX_DECOMPOSITIONS][4];
418}Plane;
419
420typedef struct SnowContext{
421// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
422
423 AVCodecContext *avctx;
28869757 424 RangeCoder c;
791e7b83 425 DSPContext dsp;
51d6a3cf
MN
426 AVFrame new_picture;
427 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83 428 AVFrame current_picture;
8c36eaaa 429 AVFrame last_picture[MAX_REF_FRAMES];
791e7b83
MN
430 AVFrame mconly_picture;
431// uint8_t q_context[16];
432 uint8_t header_state[32];
155ec6ed 433 uint8_t block_state[128 + 32*128];
791e7b83 434 int keyframe;
19aa028d 435 int always_reset;
791e7b83
MN
436 int version;
437 int spatial_decomposition_type;
438 int temporal_decomposition_type;
439 int spatial_decomposition_count;
440 int temporal_decomposition_count;
8c36eaaa
LM
441 int max_ref_frames;
442 int ref_frames;
443 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
444 uint32_t *ref_scores[MAX_REF_FRAMES];
791e7b83 445 DWTELEM *spatial_dwt_buffer;
791e7b83
MN
446 int colorspace_type;
447 int chroma_h_shift;
448 int chroma_v_shift;
449 int spatial_scalability;
450 int qlog;
155ec6ed
MN
451 int lambda;
452 int lambda2;
4e64bead 453 int pass1_rc;
791e7b83
MN
454 int mv_scale;
455 int qbias;
456#define QBIAS_SHIFT 3
155ec6ed
MN
457 int b_width;
458 int b_height;
459 int block_max_depth;
791e7b83 460 Plane plane[MAX_PLANES];
155ec6ed 461 BlockNode *block;
51d6a3cf
MN
462#define ME_CACHE_SIZE 1024
463 int me_cache[ME_CACHE_SIZE];
464 int me_cache_generation;
a0d1931c 465 slice_buffer sb;
155ec6ed 466
791e7b83
MN
467 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
468}SnowContext;
469
f9e6ebf7
LM
470typedef struct {
471 DWTELEM *b0;
472 DWTELEM *b1;
473 DWTELEM *b2;
474 DWTELEM *b3;
475 int y;
476} dwt_compose_t;
477
a0d1931c
Y
478#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
479//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
480
51d6a3cf
MN
481static void iterative_me(SnowContext *s);
482
a0d1931c
Y
483static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
484{
485 int i;
115329f1 486
a0d1931c
Y
487 buf->base_buffer = base_buffer;
488 buf->line_count = line_count;
489 buf->line_width = line_width;
490 buf->data_count = max_allocated_lines;
491 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
492 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
115329f1 493
a0d1931c
Y
494 for (i = 0; i < max_allocated_lines; i++)
495 {
496 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
497 }
115329f1 498
a0d1931c
Y
499 buf->data_stack_top = max_allocated_lines - 1;
500}
501
502static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
503{
a0d1931c
Y
504 int offset;
505 DWTELEM * buffer;
115329f1
DB
506
507// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
508
a0d1931c
Y
509 assert(buf->data_stack_top >= 0);
510// assert(!buf->line[line]);
511 if (buf->line[line])
512 return buf->line[line];
115329f1 513
a0d1931c
Y
514 offset = buf->line_width * line;
515 buffer = buf->data_stack[buf->data_stack_top];
516 buf->data_stack_top--;
517 buf->line[line] = buffer;
115329f1 518
a0d1931c 519// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
115329f1 520
a0d1931c
Y
521 return buffer;
522}
523
524static void slice_buffer_release(slice_buffer * buf, int line)
525{
a0d1931c
Y
526 int offset;
527 DWTELEM * buffer;
528
529 assert(line >= 0 && line < buf->line_count);
530 assert(buf->line[line]);
531
532 offset = buf->line_width * line;
533 buffer = buf->line[line];
534 buf->data_stack_top++;
535 buf->data_stack[buf->data_stack_top] = buffer;
536 buf->line[line] = NULL;
115329f1 537
a0d1931c
Y
538// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
539}
540
541static void slice_buffer_flush(slice_buffer * buf)
542{
543 int i;
544 for (i = 0; i < buf->line_count; i++)
545 {
546 if (buf->line[i])
547 {
548// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
549 slice_buffer_release(buf, i);
550 }
551 }
552}
553
554static void slice_buffer_destroy(slice_buffer * buf)
555{
556 int i;
557 slice_buffer_flush(buf);
115329f1 558
a0d1931c
Y
559 for (i = buf->data_count - 1; i >= 0; i--)
560 {
561 assert(buf->data_stack[i]);
e7c8206e 562 av_freep(&buf->data_stack[i]);
a0d1931c
Y
563 }
564 assert(buf->data_stack);
e7c8206e 565 av_freep(&buf->data_stack);
a0d1931c 566 assert(buf->line);
e7c8206e 567 av_freep(&buf->line);
a0d1931c
Y
568}
569
bb270c08 570#ifdef __sgi
2554db9b 571// Avoid a name clash on SGI IRIX
bb270c08 572#undef qexp
2554db9b 573#endif
034aff03 574#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 575static uint8_t qexp[QROOT];
791e7b83
MN
576
577static inline int mirror(int v, int m){
13705b69
MN
578 while((unsigned)v > (unsigned)m){
579 v=-v;
580 if(v<0) v+= 2*m;
581 }
582 return v;
791e7b83
MN
583}
584
28869757 585static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
586 int i;
587
588 if(v){
589 const int a= ABS(v);
590 const int e= av_log2(a);
591#if 1
115329f1 592 const int el= FFMIN(e, 10);
28869757 593 put_rac(c, state+0, 0);
791e7b83
MN
594
595 for(i=0; i<el; i++){
28869757 596 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
597 }
598 for(; i<e; i++){
28869757 599 put_rac(c, state+1+9, 1); //1..10
791e7b83 600 }
28869757 601 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
602
603 for(i=e-1; i>=el; i--){
28869757 604 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
605 }
606 for(; i>=0; i--){
28869757 607 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
608 }
609
610 if(is_signed)
28869757 611 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 612#else
115329f1 613
28869757 614 put_rac(c, state+0, 0);
791e7b83
MN
615 if(e<=9){
616 for(i=0; i<e; i++){
28869757 617 put_rac(c, state+1+i, 1); //1..10
791e7b83 618 }
28869757 619 put_rac(c, state+1+i, 0);
791e7b83
MN
620
621 for(i=e-1; i>=0; i--){
28869757 622 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
623 }
624
625 if(is_signed)
28869757 626 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
627 }else{
628 for(i=0; i<e; i++){
28869757 629 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 630 }
28869757 631 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
632
633 for(i=e-1; i>=0; i--){
28869757 634 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
635 }
636
637 if(is_signed)
28869757 638 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83
MN
639 }
640#endif
641 }else{
28869757 642 put_rac(c, state+0, 1);
791e7b83
MN
643 }
644}
645
28869757
MN
646static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
647 if(get_rac(c, state+0))
791e7b83
MN
648 return 0;
649 else{
7c2425d2
LM
650 int i, e, a;
651 e= 0;
28869757 652 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 653 e++;
791e7b83 654 }
7c2425d2 655
791e7b83 656 a= 1;
7c2425d2 657 for(i=e-1; i>=0; i--){
28869757 658 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
659 }
660
28869757 661 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
662 return -a;
663 else
664 return a;
665 }
666}
667
28869757 668static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 669 int i;
0635cbfc 670 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
671
672 assert(v>=0);
0635cbfc
MN
673 assert(log2>=-4);
674
675 while(v >= r){
28869757 676 put_rac(c, state+4+log2, 1);
0635cbfc 677 v -= r;
4f4e9633 678 log2++;
0635cbfc 679 if(log2>0) r+=r;
4f4e9633 680 }
28869757 681 put_rac(c, state+4+log2, 0);
115329f1 682
4f4e9633 683 for(i=log2-1; i>=0; i--){
28869757 684 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 685 }
4f4e9633
MN
686}
687
28869757 688static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 689 int i;
0635cbfc 690 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
691 int v=0;
692
0635cbfc
MN
693 assert(log2>=-4);
694
28869757 695 while(get_rac(c, state+4+log2)){
0635cbfc 696 v+= r;
4f4e9633 697 log2++;
0635cbfc 698 if(log2>0) r+=r;
4f4e9633 699 }
115329f1 700
4f4e9633 701 for(i=log2-1; i>=0; i--){
28869757 702 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
703 }
704
705 return v;
706}
707
791e7b83
MN
708static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
709 const int mirror_left= !highpass;
710 const int mirror_right= (width&1) ^ highpass;
711 const int w= (width>>1) - 1 + (highpass & width);
712 int i;
713
714#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
715 if(mirror_left){
716 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
717 dst += dst_step;
718 src += src_step;
719 }
115329f1 720
791e7b83
MN
721 for(i=0; i<w; i++){
722 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
723 }
115329f1 724
791e7b83
MN
725 if(mirror_right){
726 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
727 }
728}
729
059715a4 730#ifndef lift5
791e7b83
MN
731static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
732 const int mirror_left= !highpass;
733 const int mirror_right= (width&1) ^ highpass;
734 const int w= (width>>1) - 1 + (highpass & width);
735 int i;
736
737 if(mirror_left){
738 int r= 3*2*ref[0];
739 r += r>>4;
740 r += r>>8;
741 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
742 dst += dst_step;
743 src += src_step;
744 }
115329f1 745
791e7b83
MN
746 for(i=0; i<w; i++){
747 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
748 r += r>>4;
749 r += r>>8;
750 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
751 }
115329f1 752
791e7b83
MN
753 if(mirror_right){
754 int r= 3*2*ref[w*ref_step];
755 r += r>>4;
756 r += r>>8;
757 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
758 }
759}
059715a4 760#endif
791e7b83 761
059715a4 762#ifndef liftS
f5a71928
MN
763static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
764 const int mirror_left= !highpass;
765 const int mirror_right= (width&1) ^ highpass;
766 const int w= (width>>1) - 1 + (highpass & width);
767 int i;
768
769 assert(shift == 4);
770#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
771 if(mirror_left){
772 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
773 dst += dst_step;
774 src += src_step;
775 }
115329f1 776
f5a71928
MN
777 for(i=0; i<w; i++){
778 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
779 }
115329f1 780
f5a71928
MN
781 if(mirror_right){
782 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
783 }
784}
059715a4 785#endif
f5a71928 786
791e7b83 787
aa25a462 788static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791e7b83 789 int x, i;
115329f1 790
791e7b83
MN
791 for(x=start; x<width; x+=2){
792 int64_t sum=0;
793
794 for(i=0; i<n; i++){
795 int x2= x + 2*i - n + 1;
796 if (x2< 0) x2= -x2;
797 else if(x2>=width) x2= 2*width-x2-2;
798 sum += coeffs[i]*(int64_t)dst[x2];
799 }
800 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
801 else dst[x] += (sum + (1<<shift)/2)>>shift;
802 }
803}
804
aa25a462 805static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
806 int x, y, i;
807 for(y=start; y<height; y+=2){
808 for(x=0; x<width; x++){
809 int64_t sum=0;
115329f1 810
791e7b83
MN
811 for(i=0; i<n; i++){
812 int y2= y + 2*i - n + 1;
813 if (y2< 0) y2= -y2;
814 else if(y2>=height) y2= 2*height-y2-2;
815 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
816 }
817 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
818 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
819 }
820 }
821}
822
823#define SCALEX 1
824#define LX0 0
825#define LX1 1
826
de890c9b 827#if 0 // more accurate 9/7
791e7b83
MN
828#define N1 2
829#define SHIFT1 14
830#define COEFFS1 (int[]){-25987,-25987}
831#define N2 2
832#define SHIFT2 19
833#define COEFFS2 (int[]){-27777,-27777}
834#define N3 2
835#define SHIFT3 15
836#define COEFFS3 (int[]){28931,28931}
837#define N4 2
838#define SHIFT4 15
839#define COEFFS4 (int[]){14533,14533}
840#elif 1 // 13/7 CRF
841#define N1 4
842#define SHIFT1 4
843#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 844#define N2 4
791e7b83
MN
845#define SHIFT2 4
846#define COEFFS2 (int[]){-1,5,5,-1}
847#define N3 0
848#define SHIFT3 1
849#define COEFFS3 NULL
850#define N4 0
851#define SHIFT4 1
852#define COEFFS4 NULL
853#elif 1 // 3/5
854#define LX0 1
855#define LX1 0
856#define SCALEX 0.5
857#define N1 2
858#define SHIFT1 1
859#define COEFFS1 (int[]){1,1}
860#define N2 2
861#define SHIFT2 2
862#define COEFFS2 (int[]){-1,-1}
863#define N3 0
864#define SHIFT3 0
865#define COEFFS3 NULL
866#define N4 0
867#define SHIFT4 0
868#define COEFFS4 NULL
115329f1 869#elif 1 // 11/5
791e7b83
MN
870#define N1 0
871#define SHIFT1 1
872#define COEFFS1 NULL
873#define N2 2
874#define SHIFT2 2
875#define COEFFS2 (int[]){-1,-1}
876#define N3 2
877#define SHIFT3 0
878#define COEFFS3 (int[]){-1,-1}
879#define N4 4
880#define SHIFT4 7
881#define COEFFS4 (int[]){-5,29,29,-5}
882#define SCALEX 4
883#elif 1 // 9/7 CDF
884#define N1 2
885#define SHIFT1 7
886#define COEFFS1 (int[]){-203,-203}
887#define N2 2
888#define SHIFT2 12
889#define COEFFS2 (int[]){-217,-217}
890#define N3 2
891#define SHIFT3 7
892#define COEFFS3 (int[]){113,113}
893#define N4 2
894#define SHIFT4 9
895#define COEFFS4 (int[]){227,227}
896#define SCALEX 1
897#elif 1 // 7/5 CDF
898#define N1 0
899#define SHIFT1 1
900#define COEFFS1 NULL
901#define N2 2
902#define SHIFT2 2
903#define COEFFS2 (int[]){-1,-1}
904#define N3 2
905#define SHIFT3 0
906#define COEFFS3 (int[]){-1,-1}
907#define N4 2
908#define SHIFT4 4
909#define COEFFS4 (int[]){3,3}
910#elif 1 // 9/7 MN
911#define N1 4
912#define SHIFT1 4
913#define COEFFS1 (int[]){1,-9,-9,1}
914#define N2 2
915#define SHIFT2 2
916#define COEFFS2 (int[]){1,1}
917#define N3 0
918#define SHIFT3 1
919#define COEFFS3 NULL
920#define N4 0
921#define SHIFT4 1
922#define COEFFS4 NULL
923#else // 13/7 CRF
924#define N1 4
925#define SHIFT1 4
926#define COEFFS1 (int[]){1,-9,-9,1}
927#define N2 4
928#define SHIFT2 4
929#define COEFFS2 (int[]){-1,5,5,-1}
930#define N3 0
931#define SHIFT3 1
932#define COEFFS3 NULL
933#define N4 0
934#define SHIFT4 1
935#define COEFFS4 NULL
936#endif
aa25a462
RFI
937static void horizontal_decomposeX(DWTELEM *b, int width){
938 DWTELEM temp[width];
791e7b83
MN
939 const int width2= width>>1;
940 const int w2= (width+1)>>1;
62ab0b78 941 int x;
791e7b83
MN
942
943 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
944 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
945 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
946 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
115329f1 947
791e7b83
MN
948 for(x=0; x<width2; x++){
949 temp[x ]= b[2*x ];
950 temp[x+w2]= b[2*x + 1];
951 }
952 if(width&1)
953 temp[x ]= b[2*x ];
954 memcpy(b, temp, width*sizeof(int));
955}
956
aa25a462
RFI
957static void horizontal_composeX(DWTELEM *b, int width){
958 DWTELEM temp[width];
791e7b83 959 const int width2= width>>1;
62ab0b78 960 int x;
791e7b83
MN
961 const int w2= (width+1)>>1;
962
963 memcpy(temp, b, width*sizeof(int));
964 for(x=0; x<width2; x++){
965 b[2*x ]= temp[x ];
966 b[2*x + 1]= temp[x+w2];
967 }
968 if(width&1)
969 b[2*x ]= temp[x ];
970
971 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
972 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
973 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
974 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
975}
976
aa25a462 977static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83 978 int x, y;
115329f1 979
791e7b83
MN
980 for(y=0; y<height; y++){
981 for(x=0; x<width; x++){
982 buffer[y*stride + x] *= SCALEX;
983 }
984 }
985
986 for(y=0; y<height; y++){
987 horizontal_decomposeX(buffer + y*stride, width);
988 }
115329f1 989
791e7b83
MN
990 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
991 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
992 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
115329f1 993 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
791e7b83
MN
994}
995
aa25a462 996static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83 997 int x, y;
115329f1 998
791e7b83
MN
999 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1000 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1001 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1002 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1003
1004 for(y=0; y<height; y++){
1005 horizontal_composeX(buffer + y*stride, width);
1006 }
1007
1008 for(y=0; y<height; y++){
1009 for(x=0; x<width; x++){
1010 buffer[y*stride + x] /= SCALEX;
1011 }
1012 }
1013}
1014
aa25a462
RFI
1015static void horizontal_decompose53i(DWTELEM *b, int width){
1016 DWTELEM temp[width];
791e7b83 1017 const int width2= width>>1;
62ab0b78 1018 int x;
791e7b83
MN
1019 const int w2= (width+1)>>1;
1020
1021 for(x=0; x<width2; x++){
1022 temp[x ]= b[2*x ];
1023 temp[x+w2]= b[2*x + 1];
1024 }
1025 if(width&1)
1026 temp[x ]= b[2*x ];
1027#if 0
62ab0b78
AJ
1028 {
1029 int A1,A2,A3,A4;
791e7b83
MN
1030 A2= temp[1 ];
1031 A4= temp[0 ];
1032 A1= temp[0+width2];
1033 A1 -= (A2 + A4)>>1;
1034 A4 += (A1 + 1)>>1;
1035 b[0+width2] = A1;
1036 b[0 ] = A4;
1037 for(x=1; x+1<width2; x+=2){
1038 A3= temp[x+width2];
1039 A4= temp[x+1 ];
1040 A3 -= (A2 + A4)>>1;
1041 A2 += (A1 + A3 + 2)>>2;
1042 b[x+width2] = A3;
1043 b[x ] = A2;
1044
1045 A1= temp[x+1+width2];
1046 A2= temp[x+2 ];
1047 A1 -= (A2 + A4)>>1;
1048 A4 += (A1 + A3 + 2)>>2;
1049 b[x+1+width2] = A1;
1050 b[x+1 ] = A4;
1051 }
1052 A3= temp[width-1];
1053 A3 -= A2;
1054 A2 += (A1 + A3 + 2)>>2;
1055 b[width -1] = A3;
1056 b[width2-1] = A2;
62ab0b78 1057 }
115329f1 1058#else
791e7b83
MN
1059 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1060 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1061#endif
1062}
1063
aa25a462 1064static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1065 int i;
115329f1 1066
791e7b83
MN
1067 for(i=0; i<width; i++){
1068 b1[i] -= (b0[i] + b2[i])>>1;
1069 }
1070}
1071
aa25a462 1072static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1073 int i;
115329f1 1074
791e7b83
MN
1075 for(i=0; i<width; i++){
1076 b1[i] += (b0[i] + b2[i] + 2)>>2;
1077 }
1078}
1079
aa25a462 1080static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1081 int y;
791e7b83
MN
1082 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1083 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
115329f1 1084
791e7b83
MN
1085 for(y=-2; y<height; y+=2){
1086 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1087 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1088
1089{START_TIMER
13705b69
MN
1090 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1091 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
791e7b83 1092STOP_TIMER("horizontal_decompose53i")}
115329f1 1093
791e7b83 1094{START_TIMER
13705b69
MN
1095 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1096 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
791e7b83 1097STOP_TIMER("vertical_decompose53i*")}
115329f1 1098
791e7b83
MN
1099 b0=b2;
1100 b1=b3;
1101 }
1102}
1103
aa25a462
RFI
1104static void horizontal_decompose97i(DWTELEM *b, int width){
1105 DWTELEM temp[width];
791e7b83
MN
1106 const int w2= (width+1)>>1;
1107
1108 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
f5a71928 1109 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
791e7b83
MN
1110 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1111 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1112}
1113
1114
aa25a462 1115static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1116 int i;
115329f1 1117
791e7b83
MN
1118 for(i=0; i<width; i++){
1119 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1120 }
1121}
1122
aa25a462 1123static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1124 int i;
115329f1 1125
791e7b83
MN
1126 for(i=0; i<width; i++){
1127#ifdef lift5
1128 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1129#else
1130 int r= 3*(b0[i] + b2[i]);
1131 r+= r>>4;
1132 r+= r>>8;
1133 b1[i] += (r+W_CO)>>W_CS;
1134#endif
1135 }
1136}
1137
aa25a462 1138static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1139 int i;
115329f1 1140
791e7b83 1141 for(i=0; i<width; i++){
f5a71928 1142#ifdef liftS
791e7b83 1143 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1144#else
1145 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1146#endif
791e7b83
MN
1147 }
1148}
1149
aa25a462 1150static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1151 int i;
115329f1 1152
791e7b83
MN
1153 for(i=0; i<width; i++){
1154 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1155 }
1156}
1157
aa25a462 1158static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1159 int y;
791e7b83
MN
1160 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1161 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1162 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1163 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
115329f1 1164
791e7b83
MN
1165 for(y=-4; y<height; y+=2){
1166 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1167 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1168
1169{START_TIMER
13705b69
MN
1170 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1171 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
791e7b83
MN
1172if(width>400){
1173STOP_TIMER("horizontal_decompose97i")
1174}}
115329f1 1175
791e7b83 1176{START_TIMER
13705b69
MN
1177 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1178 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1179 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1180 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
791e7b83
MN
1181
1182if(width>400){
1183STOP_TIMER("vertical_decompose97i")
1184}}
115329f1 1185
791e7b83
MN
1186 b0=b2;
1187 b1=b3;
1188 b2=b4;
1189 b3=b5;
1190 }
1191}
1192
aa25a462 1193void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83 1194 int level;
115329f1 1195
46c281e8
MN
1196 for(level=0; level<decomposition_count; level++){
1197 switch(type){
791e7b83
MN
1198 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1199 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1200 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1201 }
1202 }
1203}
1204
aa25a462
RFI
1205static void horizontal_compose53i(DWTELEM *b, int width){
1206 DWTELEM temp[width];
791e7b83
MN
1207 const int width2= width>>1;
1208 const int w2= (width+1)>>1;
62ab0b78 1209 int x;
791e7b83
MN
1210
1211#if 0
62ab0b78 1212 int A1,A2,A3,A4;
791e7b83
MN
1213 A2= temp[1 ];
1214 A4= temp[0 ];
1215 A1= temp[0+width2];
1216 A1 -= (A2 + A4)>>1;
1217 A4 += (A1 + 1)>>1;
1218 b[0+width2] = A1;
1219 b[0 ] = A4;
1220 for(x=1; x+1<width2; x+=2){
1221 A3= temp[x+width2];
1222 A4= temp[x+1 ];
1223 A3 -= (A2 + A4)>>1;
1224 A2 += (A1 + A3 + 2)>>2;
1225 b[x+width2] = A3;
1226 b[x ] = A2;
1227
1228 A1= temp[x+1+width2];
1229 A2= temp[x+2 ];
1230 A1 -= (A2 + A4)>>1;
1231 A4 += (A1 + A3 + 2)>>2;
1232 b[x+1+width2] = A1;
1233 b[x+1 ] = A4;
1234 }
1235 A3= temp[width-1];
1236 A3 -= A2;
1237 A2 += (A1 + A3 + 2)>>2;
1238 b[width -1] = A3;
1239 b[width2-1] = A2;
115329f1 1240#else
791e7b83
MN
1241 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1242 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1243#endif
1244 for(x=0; x<width2; x++){
1245 b[2*x ]= temp[x ];
1246 b[2*x + 1]= temp[x+w2];
1247 }
1248 if(width&1)
1249 b[2*x ]= temp[x ];
1250}
1251
aa25a462 1252static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1253 int i;
115329f1 1254
791e7b83
MN
1255 for(i=0; i<width; i++){
1256 b1[i] += (b0[i] + b2[i])>>1;
1257 }
1258}
1259
aa25a462 1260static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1261 int i;
115329f1 1262
791e7b83
MN
1263 for(i=0; i<width; i++){
1264 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1265 }
1266}
1267
a0d1931c
Y
1268static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1269 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1270 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1271 cs->y = -1;
1272}
1273
f9e6ebf7
LM
1274static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1275 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1276 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1277 cs->y = -1;
1278}
1279
a0d1931c
Y
1280static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1281 int y= cs->y;
115329f1 1282
a0d1931c
Y
1283 DWTELEM *b0= cs->b0;
1284 DWTELEM *b1= cs->b1;
3b6ab26c
MN
1285 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1286 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
a0d1931c
Y
1287
1288{START_TIMER
13705b69
MN
1289 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1290 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
a0d1931c
Y
1291STOP_TIMER("vertical_compose53i*")}
1292
1293{START_TIMER
13705b69
MN
1294 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1295 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
a0d1931c
Y
1296STOP_TIMER("horizontal_compose53i")}
1297
1298 cs->b0 = b2;
1299 cs->b1 = b3;
1300 cs->y += 2;
1301}
1302
f9e6ebf7
LM
1303static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1304 int y= cs->y;
1305 DWTELEM *b0= cs->b0;
1306 DWTELEM *b1= cs->b1;
1307 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1308 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83
MN
1309
1310{START_TIMER
13705b69
MN
1311 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1312 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
791e7b83
MN
1313STOP_TIMER("vertical_compose53i*")}
1314
1315{START_TIMER
13705b69
MN
1316 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1317 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
791e7b83
MN
1318STOP_TIMER("horizontal_compose53i")}
1319
f9e6ebf7
LM
1320 cs->b0 = b2;
1321 cs->b1 = b3;
1322 cs->y += 2;
1323}
1324
1325static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1326 dwt_compose_t cs;
1327 spatial_compose53i_init(&cs, buffer, height, stride);
1328 while(cs.y <= height)
1329 spatial_compose53i_dy(&cs, buffer, width, height, stride);
115329f1
DB
1330}
1331
791e7b83 1332
059715a4 1333void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
aa25a462 1334 DWTELEM temp[width];
791e7b83
MN
1335 const int w2= (width+1)>>1;
1336
1337 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1338 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
f5a71928 1339 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
791e7b83
MN
1340 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1341}
1342
aa25a462 1343static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1344 int i;
115329f1 1345
791e7b83
MN
1346 for(i=0; i<width; i++){
1347 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1348 }
1349}
1350
aa25a462 1351static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1352 int i;
115329f1 1353
791e7b83
MN
1354 for(i=0; i<width; i++){
1355#ifdef lift5
1356 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1357#else
1358 int r= 3*(b0[i] + b2[i]);
1359 r+= r>>4;
1360 r+= r>>8;
1361 b1[i] -= (r+W_CO)>>W_CS;
1362#endif
1363 }
1364}
1365
aa25a462 1366static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1367 int i;
115329f1 1368
791e7b83 1369 for(i=0; i<width; i++){
f5a71928 1370#ifdef liftS
791e7b83 1371 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1372#else
1373 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1374#endif
791e7b83
MN
1375 }
1376}
1377
aa25a462 1378static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1379 int i;
115329f1 1380
791e7b83
MN
1381 for(i=0; i<width; i++){
1382 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1383 }
1384}
1385
059715a4 1386void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
565a45ac 1387 int i;
115329f1 1388
565a45ac 1389 for(i=0; i<width; i++){
62ab0b78 1390#ifndef lift5
565a45ac 1391 int r;
62ab0b78 1392#endif
565a45ac
MN
1393 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1394#ifdef lift5
1395 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1396#else
1397 r= 3*(b2[i] + b4[i]);
1398 r+= r>>4;
1399 r+= r>>8;
1400 b3[i] -= (r+W_CO)>>W_CS;
1401#endif
f5a71928 1402#ifdef liftS
565a45ac 1403 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1404#else
1405 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1406#endif
565a45ac
MN
1407 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1408 }
1409}
1410
a0d1931c
Y
1411static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1412 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1413 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1414 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1415 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1416 cs->y = -3;
1417}
1418
f9e6ebf7
LM
1419static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1420 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1421 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1422 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1423 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1424 cs->y = -3;
1425}
791e7b83 1426
059715a4 1427static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
a0d1931c 1428 int y = cs->y;
115329f1 1429
a0d1931c
Y
1430 DWTELEM *b0= cs->b0;
1431 DWTELEM *b1= cs->b1;
1432 DWTELEM *b2= cs->b2;
1433 DWTELEM *b3= cs->b3;
3b6ab26c
MN
1434 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1435 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
115329f1 1436
a0d1931c 1437{START_TIMER
565a45ac 1438 if(y>0 && y+4<height){
059715a4 1439 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
565a45ac 1440 }else{
13705b69
MN
1441 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1442 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1443 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1444 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
565a45ac 1445 }
a0d1931c
Y
1446if(width>400){
1447STOP_TIMER("vertical_compose97i")}}
a0d1931c
Y
1448
1449{START_TIMER
059715a4
RE
1450 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1451 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
3b6ab26c 1452if(width>400 && y+0<(unsigned)height){
a0d1931c
Y
1453STOP_TIMER("horizontal_compose97i")}}
1454
1455 cs->b0=b2;
1456 cs->b1=b3;
1457 cs->b2=b4;
1458 cs->b3=b5;
1459 cs->y += 2;
1460}
1461
f9e6ebf7
LM
1462static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1463 int y = cs->y;
1464 DWTELEM *b0= cs->b0;
1465 DWTELEM *b1= cs->b1;
1466 DWTELEM *b2= cs->b2;
1467 DWTELEM *b3= cs->b3;
1468 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1469 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83 1470
791e7b83 1471{START_TIMER
13705b69
MN
1472 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1473 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1474 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1475 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
791e7b83
MN
1476if(width>400){
1477STOP_TIMER("vertical_compose97i")}}
1478
1479{START_TIMER
059715a4
RE
1480 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1481 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
791e7b83
MN
1482if(width>400 && b0 <= b2){
1483STOP_TIMER("horizontal_compose97i")}}
f9e6ebf7
LM
1484
1485 cs->b0=b2;
1486 cs->b1=b3;
1487 cs->b2=b4;
1488 cs->b3=b5;
1489 cs->y += 2;
1490}
1491
1492static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1493 dwt_compose_t cs;
1494 spatial_compose97i_init(&cs, buffer, height, stride);
1495 while(cs.y <= height)
1496 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1497}
1498
ceaf1909 1499static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
a0d1931c
Y
1500 int level;
1501 for(level=decomposition_count-1; level>=0; level--){
1502 switch(type){
1503 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1504 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1505 /* not slicified yet */
1506 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1507 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1508 }
1509 }
1510}
1511
ceaf1909 1512static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1513 int level;
1514 for(level=decomposition_count-1; level>=0; level--){
1515 switch(type){
1516 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1517 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1518 /* not slicified yet */
1519 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1520 }
791e7b83
MN
1521 }
1522}
1523
ceaf1909 1524static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
f9e6ebf7 1525 const int support = type==1 ? 3 : 5;
791e7b83 1526 int level;
f9e6ebf7 1527 if(type==2) return;
791e7b83 1528
46c281e8 1529 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1530 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1531 switch(type){
1532 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1533 break;
1534 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1535 break;
1536 case 2: break;
1537 }
791e7b83
MN
1538 }
1539 }
1540}
1541
059715a4 1542static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
a0d1931c
Y
1543 const int support = type==1 ? 3 : 5;
1544 int level;
1545 if(type==2) return;
1546
1547 for(level=decomposition_count-1; level>=0; level--){
1548 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1549 switch(type){
059715a4 1550 case 0: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
a0d1931c
Y
1551 break;
1552 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1553 break;
1554 case 2: break;
1555 }
1556 }
1557 }
1558}
1559
ceaf1909 1560static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
f9e6ebf7
LM
1561 if(type==2){
1562 int level;
1563 for(level=decomposition_count-1; level>=0; level--)
1564 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1565 }else{
1566 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1567 int y;
1568 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1569 for(y=0; y<height; y+=4)
1570 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1571 }
1572}
1573
0ecca7a4 1574static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1575 const int w= b->width;
1576 const int h= b->height;
1577 int x, y;
1578
791e7b83 1579 if(1){
791e7b83 1580 int run=0;
a8d73e56 1581 int runs[w*h];
791e7b83 1582 int run_index=0;
b44985ba 1583 int max_index;
115329f1 1584
791e7b83
MN
1585 for(y=0; y<h; y++){
1586 for(x=0; x<w; x++){
78486403 1587 int v, p=0;
6b2f6646 1588 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1589 v= src[x + y*stride];
791e7b83
MN
1590
1591 if(y){
a8d73e56 1592 t= src[x + (y-1)*stride];
791e7b83 1593 if(x){
a8d73e56 1594 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1595 }
1596 if(x + 1 < w){
a8d73e56 1597 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1598 }
1599 }
1600 if(x){
a8d73e56 1601 l= src[x - 1 + y*stride];
6b2f6646
MN
1602 /*if(x > 1){
1603 if(orientation==1) ll= src[y + (x-2)*stride];
1604 else ll= src[x - 2 + y*stride];
791e7b83
MN
1605 }*/
1606 }
78486403 1607 if(parent){
a8d73e56
MN
1608 int px= x>>1;
1609 int py= y>>1;
115329f1 1610 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1611 p= parent[px + py*2*stride];
1612 }
1613 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1614 if(v){
1615 runs[run_index++]= run;
1616 run=0;
1617 }else{
1618 run++;
1619 }
1620 }
1621 }
1622 }
b44985ba 1623 max_index= run_index;
791e7b83
MN
1624 runs[run_index++]= run;
1625 run_index=0;
1626 run= runs[run_index++];
1627
b44985ba
MN
1628 put_symbol2(&s->c, b->state[30], max_index, 0);
1629 if(run_index <= max_index)
1630 put_symbol2(&s->c, b->state[1], run, 3);
115329f1 1631
791e7b83 1632 for(y=0; y<h; y++){
d06c75a8 1633 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1634 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1635 return -1;
1636 }
791e7b83 1637 for(x=0; x<w; x++){
78486403 1638 int v, p=0;
6b2f6646 1639 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1640 v= src[x + y*stride];
791e7b83
MN
1641
1642 if(y){
a8d73e56 1643 t= src[x + (y-1)*stride];
791e7b83 1644 if(x){
a8d73e56 1645 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1646 }
1647 if(x + 1 < w){
a8d73e56 1648 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1649 }
1650 }
1651 if(x){
a8d73e56 1652 l= src[x - 1 + y*stride];
6b2f6646
MN
1653 /*if(x > 1){
1654 if(orientation==1) ll= src[y + (x-2)*stride];
1655 else ll= src[x - 2 + y*stride];
791e7b83
MN
1656 }*/
1657 }
78486403 1658 if(parent){
a8d73e56
MN
1659 int px= x>>1;
1660 int py= y>>1;
115329f1 1661 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1662 p= parent[px + py*2*stride];
1663 }
1664 if(/*ll|*/l|lt|t|rt|p){
1665 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1666
28869757 1667 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1668 }else{
1669 if(!run){
1670 run= runs[run_index++];
4f4e9633 1671
b44985ba
MN
1672 if(run_index <= max_index)
1673 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1674 assert(v);
1675 }else{
1676 run--;
1677 assert(!v);
1678 }
1679 }
1680 if(v){
78486403 1681 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
538a3841
MN
1682 int l2= 2*ABS(l) + (l<0);
1683 int t2= 2*ABS(t) + (t<0);
6b2f6646 1684
0635cbfc 1685 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
538a3841 1686 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1687 }
1688 }
1689 }
791e7b83 1690 }
0ecca7a4 1691 return 0;
791e7b83
MN
1692}
1693
115329f1 1694static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1695// encode_subband_qtree(s, b, src, parent, stride, orientation);
1696// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1697 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1698// encode_subband_dzr(s, b, src, parent, stride, orientation);
1699}
1700
a0d1931c 1701static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1702 const int w= b->width;
1703 const int h= b->height;
1704 int x,y;
115329f1 1705
791e7b83 1706 if(1){
b44985ba 1707 int run, runs;
cbb1d2b1
MN
1708 x_and_coeff *xc= b->x_coeff;
1709 x_and_coeff *prev_xc= NULL;
1710 x_and_coeff *prev2_xc= xc;
1711 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1712 x_and_coeff *prev_parent_xc= parent_xc;
791e7b83 1713
b44985ba
MN
1714 runs= get_symbol2(&s->c, b->state[30], 0);
1715 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1716 else run= INT_MAX;
1717
791e7b83 1718 for(y=0; y<h; y++){
0cea8a03
MN
1719 int v=0;
1720 int lt=0, t=0, rt=0;
1721
cbb1d2b1
MN
1722 if(y && prev_xc->x == 0){
1723 rt= prev_xc->coeff;
0cea8a03 1724 }
791e7b83 1725 for(x=0; x<w; x++){
0cea8a03
MN
1726 int p=0;
1727 const int l= v;
115329f1 1728
0cea8a03 1729 lt= t; t= rt;
791e7b83 1730
ff765159 1731 if(y){
cbb1d2b1
MN
1732 if(prev_xc->x <= x)
1733 prev_xc++;
1734 if(prev_xc->x == x + 1)
1735 rt= prev_xc->coeff;
ff765159
MN
1736 else
1737 rt=0;
1738 }
cbb1d2b1
MN
1739 if(parent_xc){
1740 if(x>>1 > parent_xc->x){
1741 parent_xc++;
7b49c309 1742 }
cbb1d2b1
MN
1743 if(x>>1 == parent_xc->x){
1744 p= parent_xc->coeff;
ff765159 1745 }
78486403
MN
1746 }
1747 if(/*ll|*/l|lt|t|rt|p){
538a3841 1748 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1749
28869757 1750 v=get_rac(&s->c, &b->state[0][context]);
3c096ac7
MN
1751 if(v){
1752 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1753 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 1754
cbb1d2b1
MN
1755 xc->x=x;
1756 (xc++)->coeff= v;
3c096ac7 1757 }
791e7b83
MN
1758 }else{
1759 if(!run){
b44985ba
MN
1760 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1761 else run= INT_MAX;
3c096ac7
MN
1762 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1763 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
115329f1 1764
cbb1d2b1
MN
1765 xc->x=x;
1766 (xc++)->coeff= v;
791e7b83 1767 }else{
99cd59e5 1768 int max_run;
791e7b83
MN
1769 run--;
1770 v=0;
3c1adccd 1771
cbb1d2b1 1772 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
99cd59e5 1773 else max_run= FFMIN(run, w-x-1);
cbb1d2b1
MN
1774 if(parent_xc)
1775 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
99cd59e5
MN
1776 x+= max_run;
1777 run-= max_run;
791e7b83
MN
1778 }
1779 }
7b49c309 1780 }
cbb1d2b1
MN
1781 (xc++)->x= w+1; //end marker
1782 prev_xc= prev2_xc;
1783 prev2_xc= xc;
115329f1 1784
cbb1d2b1 1785 if(parent_xc){
7b49c309 1786 if(y&1){
cbb1d2b1
MN
1787 while(parent_xc->x != parent->width+1)
1788 parent_xc++;
1789 parent_xc++;
1790 prev_parent_xc= parent_xc;
7b49c309 1791 }else{
cbb1d2b1 1792 parent_xc= prev_parent_xc;
791e7b83
MN
1793 }
1794 }
1795 }
a0d1931c 1796
cbb1d2b1 1797 (xc++)->x= w+1; //end marker
a0d1931c
Y
1798 }
1799}
1800
1801static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1802 const int w= b->width;
62ab0b78 1803 int y;
c97de57c
MN
1804 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1805 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1806 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1807 int new_index = 0;
115329f1 1808
a0d1931c
Y
1809 START_TIMER
1810
1811 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1812 qadd= 0;
1813 qmul= 1<<QEXPSHIFT;
1814 }
1815
1816 /* If we are on the second or later slice, restore our index. */
1817 if (start_y != 0)
1818 new_index = save_state[0];
1819
115329f1 1820
a0d1931c
Y
1821 for(y=start_y; y<h; y++){
1822 int x = 0;
1823 int v;
1824 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1825 memset(line, 0, b->width*sizeof(DWTELEM));
1826 v = b->x_coeff[new_index].coeff;
1827 x = b->x_coeff[new_index++].x;
1828 while(x < w)
1829 {
538a3841
MN
1830 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1831 register int u= -(v&1);
1832 line[x] = (t^u) - u;
1833
a0d1931c
Y
1834 v = b->x_coeff[new_index].coeff;
1835 x = b->x_coeff[new_index++].x;
1836 }
791e7b83 1837 }
a0d1931c
Y
1838 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1839 STOP_TIMER("decode_subband")
1840 }
115329f1 1841
a0d1931c
Y
1842 /* Save our variables for the next slice. */
1843 save_state[0] = new_index;
115329f1 1844
a0d1931c 1845 return;
791e7b83
MN
1846}
1847
1848static void reset_contexts(SnowContext *s){
1849 int plane_index, level, orientation;
1850
19aa028d 1851 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1852 for(level=0; level<s->spatial_decomposition_count; level++){
1853 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1854 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1855 }
1856 }
1857 }
28869757
MN
1858 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1859 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1860}
1861
1862static int alloc_blocks(SnowContext *s){
1863 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1864 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 1865
155ec6ed
MN
1866 s->b_width = w;
1867 s->b_height= h;
115329f1 1868
155ec6ed
MN
1869 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1870 return 0;
1871}
1872
28869757
MN
1873static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1874 uint8_t *bytestream= d->bytestream;
1875 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1876 *d= *s;
28869757
MN
1877 d->bytestream= bytestream;
1878 d->bytestream_start= bytestream_start;
155ec6ed
MN
1879}
1880
1881//near copy & paste from dsputil, FIXME
1882static int pix_sum(uint8_t * pix, int line_size, int w)
1883{
1884 int s, i, j;
1885
1886 s = 0;
1887 for (i = 0; i < w; i++) {
1888 for (j = 0; j < w; j++) {
1889 s += pix[0];
1890 pix ++;
1891 }
1892 pix += line_size - w;
1893 }
1894 return s;
1895}
1896
1897//near copy & paste from dsputil, FIXME
1898static int pix_norm1(uint8_t * pix, int line_size, int w)
1899{
1900 int s, i, j;
1901 uint32_t *sq = squareTbl + 256;
1902
1903 s = 0;
1904 for (i = 0; i < w; i++) {
1905 for (j = 0; j < w; j ++) {
1906 s += sq[pix[0]];
1907 pix ++;
1908 }
1909 pix += line_size - w;
1910 }
1911 return s;
1912}
1913
8c36eaaa 1914static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
155ec6ed
MN
1915 const int w= s->b_width << s->block_max_depth;
1916 const int rem_depth= s->block_max_depth - level;
1917 const int index= (x + y*w) << rem_depth;
1918 const int block_w= 1<<rem_depth;
1919 BlockNode block;
1920 int i,j;
115329f1 1921
155ec6ed
MN
1922 block.color[0]= l;
1923 block.color[1]= cb;
1924 block.color[2]= cr;
1925 block.mx= mx;
1926 block.my= my;
8c36eaaa 1927 block.ref= ref;
155ec6ed
MN
1928 block.type= type;
1929 block.level= level;
1930
1931 for(j=0; j<block_w; j++){
1932 for(i=0; i<block_w; i++){
1933 s->block[index + i + j*w]= block;
1934 }
1935 }
1936}
1937
1938static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1939 const int offset[3]= {
1940 y*c-> stride + x,
1941 ((y*c->uvstride + x)>>1),
1942 ((y*c->uvstride + x)>>1),
1943 };
1944 int i;
1945 for(i=0; i<3; i++){
1946 c->src[0][i]= src [i];
1947 c->ref[0][i]= ref [i] + offset[i];
1948 }
1949 assert(!ref_index);
1950}
1951
1952//FIXME copy&paste
1953#define P_LEFT P[1]
1954#define P_TOP P[2]
1955#define P_TOPRIGHT P[3]
1956#define P_MEDIAN P[4]
1957#define P_MV1 P[9]
1958#define FLAG_QPEL 1 //must be 1
1959
1960static int encode_q_branch(SnowContext *s, int level, int x, int y){
1961 uint8_t p_buffer[1024];
1962 uint8_t i_buffer[1024];
1963 uint8_t p_state[sizeof(s->block_state)];
1964 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
1965 RangeCoder pc, ic;
1966 uint8_t *pbbak= s->c.bytestream;
1967 uint8_t *pbbak_start= s->c.bytestream_start;
155ec6ed
MN
1968 int score, score2, iscore, i_len, p_len, block_s, sum;
1969 const int w= s->b_width << s->block_max_depth;
1970 const int h= s->b_height << s->block_max_depth;
1971 const int rem_depth= s->block_max_depth - level;
1972 const int index= (x + y*w) << rem_depth;
1973 const int block_w= 1<<(LOG2_MB_SIZE - level);
155ec6ed
MN
1974 int trx= (x+1)<<rem_depth;
1975 int try= (y+1)<<rem_depth;
1976 BlockNode *left = x ? &s->block[index-1] : &null_block;
1977 BlockNode *top = y ? &s->block[index-w] : &null_block;
1978 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1979 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1980 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1981 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1982 int pl = left->color[0];
1983 int pcb= left->color[1];
1984 int pcr= left->color[2];
1985 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1986 int pmy= mid_pred(left->my, top->my, tr->my);
1987 int mx=0, my=0;
51d6a3cf 1988 int l,cr,cb;
155ec6ed
MN
1989 const int stride= s->current_picture.linesize[0];
1990 const int uvstride= s->current_picture.linesize[1];
51d6a3cf
MN
1991 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1992 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1993 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
155ec6ed
MN
1994 int P[10][2];
1995 int16_t last_mv[3][2];
1996 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1997 const int shift= 1+qpel;
1998 MotionEstContext *c= &s->m.me;
8c36eaaa 1999 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
155ec6ed
MN
2000 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2001 int my_context= av_log2(2*ABS(left->my - top->my));
2002 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
8c36eaaa 2003 int ref, best_ref, ref_score, ref_mx, ref_my;
155ec6ed
MN
2004
2005 assert(sizeof(s->block_state) >= 256);
2006 if(s->keyframe){
8c36eaaa 2007 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, 0, BLOCK_INTRA);
155ec6ed
MN
2008 return 0;
2009 }
2010
155ec6ed
MN
2011// clip predictors / edge ?
2012
2013 P_LEFT[0]= left->mx;
2014 P_LEFT[1]= left->my;
2015 P_TOP [0]= top->mx;
2016 P_TOP [1]= top->my;
2017 P_TOPRIGHT[0]= tr->mx;
2018 P_TOPRIGHT[1]= tr->my;
115329f1 2019
155ec6ed
MN
2020 last_mv[0][0]= s->block[index].mx;
2021 last_mv[0][1]= s->block[index].my;
2022 last_mv[1][0]= right->mx;
2023 last_mv[1][1]= right->my;
2024 last_mv[2][0]= bottom->mx;
2025 last_mv[2][1]= bottom->my;
115329f1 2026
155ec6ed 2027 s->m.mb_stride=2;
115329f1 2028 s->m.mb_x=
155ec6ed
MN
2029 s->m.mb_y= 0;
2030 s->m.me.skip= 0;
2031
155ec6ed
MN
2032 assert(s->m.me. stride == stride);
2033 assert(s->m.me.uvstride == uvstride);
115329f1 2034
155ec6ed
MN
2035 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2036 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2037 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2038 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
115329f1 2039
ff158dc9
MN
2040 c->xmin = - x*block_w - 16+2;
2041 c->ymin = - y*block_w - 16+2;
2042 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2043 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
2044
2045 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
115329f1 2046 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
155ec6ed
MN
2047 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2048 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2049 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2050 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2051 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2052
2053 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2054 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2055
2056 if (!y) {
2057 c->pred_x= P_LEFT[0];
2058 c->pred_y= P_LEFT[1];
2059 } else {
2060 c->pred_x = P_MEDIAN[0];
2061 c->pred_y = P_MEDIAN[1];
2062 }
2063
8c36eaaa
LM
2064 score= INT_MAX;
2065 best_ref= 0;
2066 for(ref=0; ref<s->ref_frames; ref++){
2067 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2068
2069 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2070 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
155ec6ed 2071
8c36eaaa
LM
2072 assert(ref_mx >= c->xmin);
2073 assert(ref_mx <= c->xmax);
2074 assert(ref_my >= c->ymin);
2075 assert(ref_my <= c->ymax);
115329f1 2076
8c36eaaa
LM
2077 ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2078 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2079 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2080 if(s->ref_mvs[ref]){
2081 s->ref_mvs[ref][index][0]= ref_mx;
2082 s->ref_mvs[ref][index][1]= ref_my;
2083 s->ref_scores[ref][index]= ref_score;
2084 }
2085 if(score > ref_score){
2086 score= ref_score;
2087 best_ref= ref;
2088 mx= ref_mx;
2089 my= ref_my;
2090 }
2091 }
155ec6ed 2092 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
115329f1 2093
155ec6ed
MN
2094 // subpel search
2095 pc= s->c;
28869757
MN
2096 pc.bytestream_start=
2097 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2098 memcpy(p_state, s->block_state, sizeof(s->block_state));
2099
2100 if(level!=s->block_max_depth)
28869757
MN
2101 put_rac(&pc, &p_state[4 + s_context], 1);
2102 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
8c36eaaa
LM
2103 if(s->ref_frames > 1)
2104 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2105 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2106 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
28869757
MN
2107 p_len= pc.bytestream - pc.bytestream_start;
2108 score += (s->lambda2*(p_len*8
2109 + (pc.outstanding_count - s->c.outstanding_count)*8
2110 + (-av_log2(pc.range) + av_log2(s->c.range))
2111 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2112
2113 block_s= block_w*block_w;
51d6a3cf 2114 sum = pix_sum(current_data[0], stride, block_w);
155ec6ed 2115 l= (sum + block_s/2)/block_s;
51d6a3cf 2116 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
115329f1 2117
155ec6ed 2118 block_s= block_w*block_w>>2;
51d6a3cf 2119 sum = pix_sum(current_data[1], uvstride, block_w>>1);
155ec6ed
MN
2120 cb= (sum + block_s/2)/block_s;
2121// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
51d6a3cf 2122 sum = pix_sum(current_data[2], uvstride, block_w>>1);
155ec6ed
MN
2123 cr= (sum + block_s/2)/block_s;
2124// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2125
2126 ic= s->c;
28869757
MN
2127 ic.bytestream_start=
2128 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2129 memcpy(i_state, s->block_state, sizeof(s->block_state));
2130 if(level!=s->block_max_depth)
28869757
MN
2131 put_rac(&ic, &i_state[4 + s_context], 1);
2132 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
2133 put_symbol(&ic, &i_state[32], l-pl , 1);
2134 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2135 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757
MN
2136 i_len= ic.bytestream - ic.bytestream_start;
2137 iscore += (s->lambda2*(i_len*8
2138 + (ic.outstanding_count - s->c.outstanding_count)*8
2139 + (-av_log2(ic.range) + av_log2(s->c.range))
2140 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2141
2142// assert(score==256*256*256*64-1);
2143 assert(iscore < 255*255*256 + s->lambda2*10);
2144 assert(iscore >= 0);
2145 assert(l>=0 && l<=255);
2146 assert(pl>=0 && pl<=255);
2147
2148 if(level==0){
2149 int varc= iscore >> 8;
2150 int vard= score >> 8;
2151 if (vard <= 64 || vard < varc)
2152 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2153 else
2154 c->scene_change_score+= s->m.qscale;
2155 }
115329f1 2156
155ec6ed 2157 if(level!=s->block_max_depth){
28869757 2158 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
2159 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2160 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2161 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2162 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2163 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
115329f1 2164
155ec6ed
MN
2165 if(score2 < score && score2 < iscore)
2166 return score2;
2167 }
115329f1 2168
155ec6ed 2169 if(iscore < score){
28869757 2170 memcpy(pbbak, i_buffer, i_len);
155ec6ed 2171 s->c= ic;
28869757
MN
2172 s->c.bytestream_start= pbbak_start;
2173 s->c.bytestream= pbbak + i_len;
8c36eaaa 2174 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
155ec6ed
MN
2175 memcpy(s->block_state, i_state, sizeof(s->block_state));
2176 return iscore;
2177 }else{
28869757 2178 memcpy(pbbak, p_buffer, p_len);
155ec6ed 2179 s->c= pc;
28869757
MN
2180 s->c.bytestream_start= pbbak_start;
2181 s->c.bytestream= pbbak + p_len;
8c36eaaa 2182 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
155ec6ed
MN
2183 memcpy(s->block_state, p_state, sizeof(s->block_state));
2184 return score;
2185 }
2186}
2187
51d6a3cf
MN
2188static always_inline int same_block(BlockNode *a, BlockNode *b){
2189 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2190 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2191 }else{
8c36eaaa 2192 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
51d6a3cf
MN
2193 }
2194}
2195
2196static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2197 const int w= s->b_width << s->block_max_depth;
2198 const int rem_depth= s->block_max_depth - level;
2199 const int index= (x + y*w) << rem_depth;
2200 int trx= (x+1)<<rem_depth;
2201 BlockNode *b= &s->block[index];
2202 BlockNode *left = x ? &s->block[index-1] : &null_block;
2203 BlockNode *top = y ? &s->block[index-w] : &null_block;
2204 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2205 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2206 int pl = left->color[0];
2207 int pcb= left->color[1];
2208 int pcr= left->color[2];
2209 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2210 int pmy= mid_pred(left->my, top->my, tr->my);
8c36eaaa
LM
2211 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2212 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref;
2213 int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref;
51d6a3cf
MN
2214 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2215
2216 if(s->keyframe){
8c36eaaa 2217 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, 0, BLOCK_INTRA);
51d6a3cf
MN
2218 return;
2219 }
2220
2221 if(level!=s->block_max_depth){
2222 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
d6f41eed
MN
2223 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2224 }else{
51d6a3cf
MN
2225 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2226 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2227 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2228 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2229 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2230 return;
51d6a3cf
MN
2231 }
2232 }
2233 if(b->type & BLOCK_INTRA){
2234 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2235 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2236 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2237 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
8c36eaaa 2238 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
51d6a3cf
MN
2239 }else{
2240 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
8c36eaaa
LM
2241 if(s->ref_frames > 1)
2242 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
51d6a3cf
MN
2243 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2244 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
8c36eaaa 2245 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
51d6a3cf
MN
2246 }
2247}
2248
155ec6ed
MN
2249static void decode_q_branch(SnowContext *s, int level, int x, int y){
2250 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2251 const int rem_depth= s->block_max_depth - level;
2252 const int index= (x + y*w) << rem_depth;
155ec6ed 2253 int trx= (x+1)<<rem_depth;
155ec6ed
MN
2254 BlockNode *left = x ? &s->block[index-1] : &null_block;
2255 BlockNode *top = y ? &s->block[index-w] : &null_block;
2256 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2257 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2258 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 2259
155ec6ed 2260 if(s->keyframe){
8c36eaaa 2261 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
155ec6ed
MN
2262 return;
2263 }
2264
28869757 2265 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
155ec6ed
MN
2266 int type;
2267 int l = left->color[0];
2268 int cb= left->color[1];
2269 int cr= left->color[2];
2270 int mx= mid_pred(left->mx, top->mx, tr->mx);
2271 int my= mid_pred(left->my, top->my, tr->my);
8c36eaaa
LM
2272 int ref = 0;
2273 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
155ec6ed
MN
2274 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2275 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
115329f1 2276
28869757 2277 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2278
2279 if(type){
2280 l += get_symbol(&s->c, &s->block_state[32], 1);
2281 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2282 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2283 }else{
8c36eaaa
LM
2284 if(s->ref_frames > 1)
2285 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2286 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2287 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
155ec6ed 2288 }
8c36eaaa 2289 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
155ec6ed
MN
2290 }else{
2291 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2292 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2293 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2294 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2295 }
2296}
2297
2298static void encode_blocks(SnowContext *s){
2299 int x, y;
2300 int w= s->b_width;
2301 int h= s->b_height;
2302
51d6a3cf
MN
2303 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2304 iterative_me(s);
2305
155ec6ed 2306 for(y=0; y<h; y++){
d06c75a8 2307 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2308 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2309 return;
2310 }
155ec6ed 2311 for(x=0; x<w; x++){
51d6a3cf
MN
2312 if(s->avctx->me_method == ME_ITER)
2313 encode_q_branch2(s, 0, x, y);
2314 else
2315 encode_q_branch (s, 0, x, y);
155ec6ed
MN
2316 }
2317 }
2318}
2319
2320static void decode_blocks(SnowContext *s){
2321 int x, y;
2322 int w= s->b_width;
2323 int h= s->b_height;
2324
2325 for(y=0; y<h; y++){
2326 for(x=0; x<w; x++){
2327 decode_q_branch(s, 0, x, y);
2328 }
2329 }
791e7b83
MN
2330}
2331
2332static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2333 int x, y;
3924dac4 2334START_TIMER
791e7b83
MN
2335 for(y=0; y < b_h+5; y++){
2336 for(x=0; x < b_w; x++){
3924dac4
MN
2337 int a0= src[x ];
2338 int a1= src[x + 1];
2339 int a2= src[x + 2];
2340 int a3= src[x + 3];
2341 int a4= src[x + 4];
2342 int a5= src[x + 5];
791e7b83
MN
2343// int am= 9*(a1+a2) - (a0+a3);
2344 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2345// int am= 18*(a2+a3) - 2*(a1+a4);
2346// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2347// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2348
2349// if(b_w==16) am= 8*(a1+a2);
2350
8c2515bb
Y
2351 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2352 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
115329f1 2353
8c2515bb
Y
2354 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2355 if(am&(~255)) am= ~(am>>31);
115329f1 2356
8c2515bb 2357 tmp[x] = am;
791e7b83
MN
2358
2359/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2360 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2361 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2362 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2363 }
3924dac4
MN
2364 tmp += stride;
2365 src += stride;
791e7b83 2366 }
3924dac4 2367 tmp -= (b_h+5)*stride;
115329f1 2368
791e7b83
MN
2369 for(y=0; y < b_h; y++){
2370 for(x=0; x < b_w; x++){
3924dac4
MN
2371 int a0= tmp[x + 0*stride];
2372 int a1= tmp[x + 1*stride];
2373 int a2= tmp[x + 2*stride];
2374 int a3= tmp[x + 3*stride];
2375 int a4= tmp[x + 4*stride];
2376 int a5= tmp[x + 5*stride];
791e7b83
MN
2377 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2378// int am= 18*(a2+a3) - 2*(a1+a4);
2379/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2380 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
115329f1 2381
791e7b83
MN
2382// if(b_w==16) am= 8*(a1+a2);
2383
8c2515bb
Y
2384 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2385 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83 2386
8c2515bb 2387 if(am&(~255)) am= ~(am>>31);
115329f1 2388
8c2515bb 2389 dst[x] = am;
791e7b83
MN
2390/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2391 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2392 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2393 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2394 }
3924dac4
MN
2395 dst += stride;
2396 tmp += stride;
791e7b83 2397 }
3924dac4 2398STOP_TIMER("mc_block")
791e7b83
MN
2399}
2400
791e7b83 2401#define mca(dx,dy,b_w)\
d92b5807 2402static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
791e7b83
MN
2403 uint8_t tmp[stride*(b_w+5)];\
2404 assert(h==b_w);\
2405 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2406}
2407
2408mca( 0, 0,16)
2409mca( 8, 0,16)
2410mca( 0, 8,16)
2411mca( 8, 8,16)
d92b5807
MN
2412mca( 0, 0,8)
2413mca( 8, 0,8)
2414mca( 0, 8,8)
2415mca( 8, 8,8)
791e7b83 2416
8c36eaaa 2417static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 2418 if(block->type & BLOCK_INTRA){
ff158dc9 2419 int x, y;
2692ceab
MN
2420 const int color = block->color[plane_index];
2421 const int color4= color*0x01010101;
1015631b
LM
2422 if(b_w==32){
2423 for(y=0; y < b_h; y++){
2424 *(uint32_t*)&dst[0 + y*stride]= color4;
2425 *(uint32_t*)&dst[4 + y*stride]= color4;
2426 *(uint32_t*)&dst[8 + y*stride]= color4;
2427 *(uint32_t*)&dst[12+ y*stride]= color4;
2428 *(uint32_t*)&dst[16+ y*stride]= color4;
2429 *(uint32_t*)&dst[20+ y*stride]= color4;
2430 *(uint32_t*)&dst[24+ y*stride]= color4;
2431 *(uint32_t*)&dst[28+ y*stride]= color4;
2432 }
2433 }else if(b_w==16){
2692ceab
MN
2434 for(y=0; y < b_h; y++){
2435 *(uint32_t*)&dst[0 + y*stride]= color4;
2436 *(uint32_t*)&dst[4 + y*stride]= color4;
2437 *(uint32_t*)&dst[8 + y*stride]= color4;
2438 *(uint32_t*)&dst[12+ y*stride]= color4;
2439 }
2440 }else if(b_w==8){
2441 for(y=0; y < b_h; y++){
2442 *(uint32_t*)&dst[0 + y*stride]= color4;
2443 *(uint32_t*)&dst[4 + y*stride]= color4;
2444 }
2445 }else if(b_w==4){
2446 for(y=0; y < b_h; y++){
2447 *(uint32_t*)&dst[0 + y*stride]= color4;
2448 }
2449 }else{
2450 for(y=0; y < b_h; y++){
2451 for(x=0; x < b_w; x++){
2452 dst[x + y*stride]= color;
2453 }
ff158dc9
MN
2454 }
2455 }
2456 }else{
8c36eaaa 2457 uint8_t *src= s->last_picture[block->ref].data[plane_index];
ff158dc9
MN
2458 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2459 int mx= block->mx*scale;
2460 int my= block->my*scale;
ec697587
MN
2461 const int dx= mx&15;
2462 const int dy= my&15;
80e44bc3 2463 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
ff158dc9
MN
2464 sx += (mx>>4) - 2;
2465 sy += (my>>4) - 2;
2466 src += sx + sy*stride;
2467 if( (unsigned)sx >= w - b_w - 4
2468 || (unsigned)sy >= h - b_h - 4){
2469 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2470 src= tmp + MB_SIZE;
2471 }
87f20c2f
MN
2472// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2473// assert(!(b_w&(b_w-1)));
2692ceab 2474 assert(b_w>1 && b_h>1);
1015631b 2475 assert(tab_index>=0 && tab_index<4 || b_w==32);
87f20c2f 2476 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
ec697587 2477 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
1015631b
LM
2478 else if(b_w==32){
2479 int y;
2480 for(y=0; y<b_h; y+=16){
2481 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2482 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2483 }
2484 }else if(b_w==b_h)
80e44bc3 2485 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2692ceab 2486 else if(b_w==2*b_h){
80e44bc3
MN
2487 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2488 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2692ceab
MN
2489 }else{
2490 assert(2*b_w==b_h);
80e44bc3
MN
2491 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2492 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2692ceab 2493 }
ff158dc9
MN
2494 }
2495}
2496
059715a4
RE
2497void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2498 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2499 int y, x;
2500 DWTELEM * dst;
2501 for(y=0; y<b_h; y++){
2502 //FIXME ugly missue of obmc_stride
2503 uint8_t *obmc1= obmc + y*obmc_stride;
2504 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2505 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2506 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2507 dst = slice_buffer_get_line(sb, src_y + y);
2508 for(x=0; x<b_w; x++){
2509 int v= obmc1[x] * block[3][x + y*src_stride]
2510 +obmc2[x] * block[2][x + y*src_stride]
2511 +obmc3[x] * block[1][x + y*src_stride]
2512 +obmc4[x] * block[0][x + y*src_stride];
2513
2514 v <<= 8 - LOG2_OBMC_MAX;
2515 if(FRAC_BITS != 8){
2516 v += 1<<(7 - FRAC_BITS);
2517 v >>= 8 - FRAC_BITS;
2518 }
2519 if(add){
2520 v += dst[x + src_x];
2521 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2522 if(v&(~255)) v= ~(v>>31);
2523 dst8[x + y*src_stride] = v;
2524 }else{
2525 dst[x + src_x] -= v;
2526 }
2527 }
2528 }
2529}
2530
ff158dc9 2531//FIXME name clenup (b_w, block_w, b_width stuff)
8c36eaaa 2532static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
a0d1931c
Y
2533 DWTELEM * dst = NULL;
2534 const int b_width = s->b_width << s->block_max_depth;
2535 const int b_height= s->b_height << s->block_max_depth;
2536 const int b_stride= b_width;
2537 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2538 BlockNode *rt= lt+1;
2539 BlockNode *lb= lt+b_stride;
2540 BlockNode *rb= lb+1;
115329f1 2541 uint8_t *block[4];
cc884a35
MN
2542 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2543 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2544 uint8_t *ptmp;
a0d1931c
Y
2545 int x,y;
2546
2547 if(b_x<0){
2548 lt= rt;
2549 lb= rb;
2550 }else if(b_x + 1 >= b_width){
2551 rt= lt;
2552 rb= lb;
2553 }
2554 if(b_y<0){
2555 lt= lb;
2556 rt= rb;
2557 }else if(b_y + 1 >= b_height){
2558 lb= lt;
2559 rb= rt;
2560 }
115329f1 2561
a0d1931c
Y
2562 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2563 obmc -= src_x;
2564 b_w += src_x;
2565 src_x=0;
2566 }else if(src_x + b_w > w){
2567 b_w = w - src_x;
2568 }
2569 if(src_y<0){
2570 obmc -= src_y*obmc_stride;
2571 b_h += src_y;
2572 src_y=0;
2573 }else if(src_y + b_h> h){
2574 b_h = h - src_y;
2575 }
115329f1 2576
a0d1931c
Y
2577 if(b_w<=0 || b_h<=0) return;
2578
cc884a35 2579assert(src_stride > 2*MB_SIZE + 5);
a0d1931c
Y
2580// old_dst += src_x + src_y*dst_stride;
2581 dst8+= src_x + src_y*src_stride;
2582// src += src_x + src_y*src_stride;
2583
cc884a35
MN
2584 ptmp= tmp + 3*tmp_step;
2585 block[0]= ptmp;
2586 ptmp+=tmp_step;
8c36eaaa 2587 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
a0d1931c
Y
2588
2589 if(same_block(lt, rt)){
2590 block[1]= block[0];
2591 }else{
cc884a35
MN
2592 block[1]= ptmp;
2593 ptmp+=tmp_step;
8c36eaaa 2594 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
a0d1931c 2595 }
115329f1 2596
a0d1931c
Y
2597 if(same_block(lt, lb)){
2598 block[2]= block[0];
2599 }else if(same_block(rt, lb)){
2600 block[2]= block[1];
2601 }else{
cc884a35
MN
2602 block[2]= ptmp;
2603 ptmp+=tmp_step;
8c36eaaa 2604 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
a0d1931c
Y
2605 }
2606
2607 if(same_block(lt, rb) ){
2608 block[3]= block[0];
2609 }else if(same_block(rt, rb)){
2610 block[3]= block[1];
2611 }else if(same_block(lb, rb)){
2612 block[3]= block[2];
2613 }else{
cc884a35 2614 block[3]= ptmp;
8c36eaaa 2615 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
a0d1931c
Y
2616 }
2617#if 0
2618 for(y=0; y<b_h; y++){
2619 for(x=0; x<b_w; x++){
2620 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2621 if(add) dst[x + y*dst_stride] += v;
2622 else dst[x + y*dst_stride] -= v;
2623 }
2624 }
2625 for(y=0; y<b_h; y++){
2626 uint8_t *obmc2= obmc + (obmc_stride>>1);
2627 for(x=0; x<b_w; x++){
2628 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2629 if(add) dst[x + y*dst_stride] += v;
2630 else dst[x + y*dst_stride] -= v;
2631 }
2632 }
2633 for(y=0; y<b_h; y++){
2634 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2635 for(x=0; x<b_w; x++){
2636 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2637 if(add) dst[x + y*dst_stride] += v;
2638 else dst[x + y*dst_stride] -= v;
2639 }
2640 }
2641 for(y=0; y<b_h; y++){
2642 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2643 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2644 for(x=0; x<b_w; x++){
2645 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2646 if(add) dst[x + y*dst_stride] += v;
2647 else dst[x + y*dst_stride] -= v;
2648 }
2649 }
2650#else
2651{
2652
2653 START_TIMER
115329f1 2654
059715a4 2655 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
a0d1931c
Y
2656 STOP_TIMER("Inner add y block")
2657}
2658#endif
2659}
2660
2661//FIXME name clenup (b_w, block_w, b_width stuff)
7f21a9a7 2662static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
ff158dc9
MN
2663 const int b_width = s->b_width << s->block_max_depth;
2664 const int b_height= s->b_height << s->block_max_depth;
2665 const int b_stride= b_width;
2666 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2667 BlockNode *rt= lt+1;
2668 BlockNode *lb= lt+b_stride;
2669 BlockNode *rb= lb+1;
115329f1 2670 uint8_t *block[4];
cc884a35
MN
2671 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2672 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2673 uint8_t *ptmp;
791e7b83
MN
2674 int x,y;
2675
ff158dc9
MN
2676 if(b_x<0){
2677 lt= rt;
2678 lb= rb;
2679 }else if(b_x + 1 >= b_width){
2680 rt= lt;
2681 rb= lb;
791e7b83 2682 }
ff158dc9
MN
2683 if(b_y<0){
2684 lt= lb;
2685 rt= rb;
2686 }else if(b_y + 1 >= b_height){
2687 lb= lt;
2688 rb= rt;
2689 }
115329f1 2690
ff158dc9
MN
2691 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2692 obmc -= src_x;
2693 b_w += src_x;
1015631b
LM
2694 if(!offset_dst)
2695 dst -= src_x;
ff158dc9
MN
2696 src_x=0;
2697 }else if(src_x + b_w > w){
2698 b_w = w - src_x;
2699 }
2700 if(src_y<0){
2701 obmc -= src_y*obmc_stride;
2702 b_h += src_y;
1015631b
LM
2703 if(!offset_dst)
2704 dst -= src_y*dst_stride;
ff158dc9
MN
2705 src_y=0;
2706 }else if(src_y + b_h> h){
2707 b_h = h - src_y;
791e7b83 2708 }
115329f1 2709
ff158dc9 2710 if(b_w<=0 || b_h<=0) return;
155ec6ed 2711
cc884a35 2712assert(src_stride > 2*MB_SIZE + 5);
1015631b
LM
2713 if(offset_dst)
2714 dst += src_x + src_y*dst_stride;
715a97f0 2715 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2716// src += src_x + src_y*src_stride;
2717
cc884a35
MN
2718 ptmp= tmp + 3*tmp_step;
2719 block[0]= ptmp;
2720 ptmp+=tmp_step;
8c36eaaa 2721 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
2722
2723 if(same_block(lt, rt)){
2724 block[1]= block[0];
791e7b83 2725 }else{
cc884a35
MN
2726 block[1]= ptmp;
2727 ptmp+=tmp_step;
8c36eaaa 2728 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
ff158dc9 2729 }
115329f1 2730
ff158dc9
MN
2731 if(same_block(lt, lb)){
2732 block[2]= block[0];
2733 }else if(same_block(rt, lb)){
2734 block[2]= block[1];
2735 }else{
cc884a35
MN
2736 block[2]= ptmp;
2737 ptmp+=tmp_step;
8c36eaaa 2738 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
ff158dc9 2739 }
791e7b83 2740
ff158dc9
MN
2741 if(same_block(lt, rb) ){
2742 block[3]= block[0];
2743 }else if(same_block(rt, rb)){
2744 block[3]= block[1];
2745 }else if(same_block(lb, rb)){
2746 block[3]= block[2];
2747 }else{
cc884a35 2748 block[3]= ptmp;
8c36eaaa 2749 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
ff158dc9
MN
2750 }
2751#if 0
2752 for(y=0; y<b_h; y++){
2753 for(x=0; x<b_w; x++){
2754 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2755 if(add) dst[x + y*dst_stride] += v;
2756 else dst[x + y*dst_stride] -= v;
2757 }
2758 }
2759 for(y=0; y<b_h; y++){
2760 uint8_t *obmc2= obmc + (obmc_stride>>1);
2761 for(x=0; x<b_w; x++){
2762 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2763 if(add) dst[x + y*dst_stride] += v;
2764 else dst[x + y*dst_stride] -= v;
2765 }
2766 }
2767 for(y=0; y<b_h; y++){
2768 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2769 for(x=0; x<b_w; x++){
2770 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2771 if(add) dst[x + y*dst_stride] += v;
2772 else dst[x + y*dst_stride] -= v;
2773 }
2774 }
2775 for(y=0; y<b_h; y++){
2776 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2777 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2778 for(x=0; x<b_w; x++){
2779 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2780 if(add) dst[x + y*dst_stride] += v;
2781 else dst[x + y*dst_stride] -= v;
2782 }
2783 }
2784#else
2785 for(y=0; y<b_h; y++){
2786 //FIXME ugly missue of obmc_stride
2787 uint8_t *obmc1= obmc + y*obmc_stride;
2788 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2789 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2790 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2791 for(x=0; x<b_w; x++){
2792 int v= obmc1[x] * block[3][x + y*src_stride]
2793 +obmc2[x] * block[2][x + y*src_stride]
2794 +obmc3[x] * block[1][x + y*src_stride]
2795 +obmc4[x] * block[0][x + y*src_stride];
115329f1 2796
715a97f0 2797 v <<= 8 - LOG2_OBMC_MAX;
034aff03
MN
2798 if(FRAC_BITS != 8){
2799 v += 1<<(7 - FRAC_BITS);
2800 v >>= 8 - FRAC_BITS;
2801 }
715a97f0
MN
2802 if(add){
2803 v += dst[x + y*dst_stride];
2804 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2805 if(v&(~255)) v= ~(v>>31);
2806 dst8[x + y*src_stride] = v;
2807 }else{
2808 dst[x + y*dst_stride] -= v;
2809 }
791e7b83
MN
2810 }
2811 }
ff158dc9 2812#endif
791e7b83
MN
2813}
2814
a0d1931c
Y
2815static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2816 Plane *p= &s->plane[plane_index];
2817 const int mb_w= s->b_width << s->block_max_depth;
2818 const int mb_h= s->b_height << s->block_max_depth;
2819 int x, y, mb_x;
2820 int block_size = MB_SIZE >> s->block_max_depth;
2821 int block_w = plane_index ? block_size/2 : block_size;
2822 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2823 int obmc_stride= plane_index ? block_size : 2*block_size;
2824 int ref_stride= s->current_picture.linesize[plane_index];
a0d1931c
Y
2825 uint8_t *dst8= s->current_picture.data[plane_index];
2826 int w= p->width;
2827 int h= p->height;
2828 START_TIMER
115329f1 2829
a0d1931c
Y
2830 if(s->keyframe || (s->avctx->debug&512)){
2831 if(mb_y==mb_h)
2832 return;
2833
2834 if(add){
86e59cc0 2835 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2836 {
2837// DWTELEM * line = slice_buffer_get_line(sb, y);
2838 DWTELEM * line = sb->line[y];
2839 for(x=0; x<w; x++)
2840 {
2841// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2842 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2843 v >>= FRAC_BITS;
2844 if(v&(~255)) v= ~(v>>31);
2845 dst8[x + y*ref_stride]= v;
2846 }
2847 }
2848 }else{
86e59cc0 2849 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2850 {
2851// DWTELEM * line = slice_buffer_get_line(sb, y);
2852 DWTELEM * line = sb->line[y];
2853 for(x=0; x<w; x++)
2854 {
2855 line[x] -= 128 << FRAC_BITS;
2856// buf[x + y*w]-= 128<<FRAC_BITS;
2857 }
2858 }
2859 }
2860
2861 return;
2862 }
115329f1 2863
a0d1931c
Y
2864 for(mb_x=0; mb_x<=mb_w; mb_x++){
2865 START_TIMER
2866
8c36eaaa 2867 add_yblock_buffered(s, sb, old_buffer, dst8, obmc,
a0d1931c
Y
2868 block_w*mb_x - block_w/2,
2869 block_w*mb_y - block_w/2,
2870 block_w, block_w,
2871 w, h,
2872 w, ref_stride, obmc_stride,
2873 mb_x - 1, mb_y - 1,
2874 add, plane_index);
115329f1 2875
a0d1931c
Y
2876 STOP_TIMER("add_yblock")
2877 }
115329f1 2878
a0d1931c
Y
2879 STOP_TIMER("predict_slice")
2880}
2881
f9e6ebf7 2882static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2883 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2884 const int mb_w= s->b_width << s->block_max_depth;
2885 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2886 int x, y, mb_x;
155ec6ed
MN
2887 int block_size = MB_SIZE >> s->block_max_depth;
2888 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2889 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 2890 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2891 int ref_stride= s->current_picture.linesize[plane_index];
715a97f0 2892 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2893 int w= p->width;
2894 int h= p->height;
fff6d4ea 2895 START_TIMER
115329f1 2896
ff158dc9 2897 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2898 if(mb_y==mb_h)
2899 return;
2900
715a97f0 2901 if(add){
86e59cc0 2902 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2903 for(x=0; x<w; x++){
2904 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2905 v >>= FRAC_BITS;
2906 if(v&(~255)) v= ~(v>>31);
2907 dst8[x + y*ref_stride]= v;
2908 }
2909 }
2910 }else{
86e59cc0 2911 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2912 for(x=0; x<w; x++){
2913 buf[x + y*w]-= 128<<FRAC_BITS;
2914 }
ff158dc9 2915 }
791e7b83 2916 }
ff158dc9
MN
2917
2918 return;
791e7b83 2919 }
115329f1 2920
ff158dc9 2921 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2922 START_TIMER
ff158dc9 2923
8c36eaaa 2924 add_yblock(s, buf, dst8, obmc,
ff158dc9 2925 block_w*mb_x - block_w/2,
791e7b83 2926 block_w*mb_y - block_w/2,
ff158dc9 2927 block_w, block_w,
791e7b83 2928 w, h,
ff158dc9
MN
2929 w, ref_stride, obmc_stride,
2930 mb_x - 1, mb_y - 1,
1015631b 2931 add, 1, plane_index);
115329f1 2932
ff158dc9 2933 STOP_TIMER("add_yblock")
791e7b83 2934 }
115329f1 2935
f9e6ebf7
LM
2936 STOP_TIMER("predict_slice")
2937}
2938
2939static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2940 const int mb_h= s->b_height << s->block_max_depth;
2941 int mb_y;
2942 for(mb_y=0; mb_y<=mb_h; mb_y++)
2943 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2944}
2945
51d6a3cf
MN
2946static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2947 int i, x2, y2;
2948 Plane *p= &s->plane[plane_index];
2949 const int block_size = MB_SIZE >> s->block_max_depth;
2950 const int block_w = plane_index ? block_size/2 : block_size;
2951 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2952 const int obmc_stride= plane_index ? block_size : 2*block_size;
2953 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 2954 uint8_t *src= s-> input_picture.data[plane_index];
1015631b 2955 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
51d6a3cf
MN
2956 const int b_stride = s->b_width << s->block_max_depth;
2957 const int w= p->width;
2958 const int h= p->height;
2959 int index= mb_x + mb_y*b_stride;
2960 BlockNode *b= &s->block[index];
2961 BlockNode backup= *b;
2962 int ab=0;
2963 int aa=0;
2964
2965 b->type|= BLOCK_INTRA;
2966 b->color[plane_index]= 0;
1015631b 2967 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
51d6a3cf
MN
2968
2969 for(i=0; i<4; i++){
2970 int mb_x2= mb_x + (i &1) - 1;
2971 int mb_y2= mb_y + (i>>1) - 1;
2972 int x= block_w*mb_x2 + block_w/2;
2973 int y= block_w*mb_y2 + block_w/2;
2974
8c36eaaa 2975 add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
1015631b 2976 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
51d6a3cf
MN
2977
2978 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2979 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2980 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2981 int obmc_v= obmc[index];
1015631b 2982 int d;
51d6a3cf
MN
2983 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2984 if(x<0) obmc_v += obmc[index + block_w];
2985 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2986 if(x+block_w>w) obmc_v += obmc[index - block_w];
2987 //FIXME precalc this or simplify it somehow else
2988
1015631b
LM
2989 d = -dst[index] + (1<<(FRAC_BITS-1));
2990 dst[index] = d;
2991 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
51d6a3cf
MN
2992 aa += obmc_v * obmc_v; //FIXME precalclate this
2993 }
2994 }
2995 }
2996 *b= backup;
2997
561a18d3 2998 return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
51d6a3cf
MN
2999}
3000
b104969f
LM
3001static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3002 const int b_stride = s->b_width << s->block_max_depth;
3003 const int b_height = s->b_height<< s->block_max_depth;
3004 int index= x + y*b_stride;
3005 BlockNode *b = &s->block[index];
3006 BlockNode *left = x ? &s->block[index-1] : &null_block;
3007 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3008 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3009 BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3010 int dmx, dmy;
3011// int mx_context= av_log2(2*ABS(left->mx - top->mx));
3012// int my_context= av_log2(2*ABS(left->my - top->my));
3013
3014 if(x<0 || x>=b_stride || y>=b_height)
3015 return 0;
3016 dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3017 dmy= b->my - mid_pred(left->my, top->my, tr->my);
3018/*
30191 0 0
302001X 1-2 1
3021001XX 3-6 2-3
30220001XXX 7-14 4-7
302300001XXXX 15-30 8-15
3024*/
3025//FIXME try accurate rate
3026//FIXME intra and inter predictors if surrounding blocks arent the same type
3027 if(b->type & BLOCK_INTRA){
3028 return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3029 + av_log2(2*ABS(left->color[1] - b->color[1]))
3030 + av_log2(2*ABS(left->color[2] - b->color[2])));
3031 }else
8c36eaaa
LM
3032 return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda
3033 + av_log2(2*ABS(dmy))
3034 + av_log2(2*b->ref));
b104969f
LM
3035}
3036
1015631b 3037static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
51d6a3cf
MN
3038 Plane *p= &s->plane[plane_index];
3039 const int block_size = MB_SIZE >> s->block_max_depth;
3040 const int block_w = plane_index ? block_size/2 : block_size;
3041 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3042 const int obmc_stride= plane_index ? block_size : 2*block_size;
3043 const int ref_stride= s->current_picture.linesize[plane_index];
51d6a3cf 3044 uint8_t *dst= s->current_picture.data[plane_index];
1015631b
LM
3045 uint8_t *src= s-> input_picture.data[plane_index];
3046 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3047 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3048 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
51d6a3cf
MN
3049 const int b_stride = s->b_width << s->block_max_depth;
3050 const int b_height = s->b_height<< s->block_max_depth;
3051 const int w= p->width;
3052 const int h= p->height;
1015631b 3053 int distortion;
51d6a3cf
MN
3054 int rate= 0;
3055 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
1015631b
LM
3056 int sx= block_w*mb_x - block_w/2;
3057 int sy= block_w*mb_y - block_w/2;
561a18d3
RE
3058 int x0= FFMAX(0,-sx);
3059 int y0= FFMAX(0,-sy);
3060 int x1= FFMIN(block_w*2, w-sx);
3061 int y1= FFMIN(block_w*2, h-sy);
1015631b
LM
3062 int i,x,y;
3063
8c36eaaa 3064 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
1015631b
LM
3065
3066 for(y=y0; y<y1; y++){
3067 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3068 const DWTELEM *pred1 = pred + y*obmc_stride;
3069 uint8_t *cur1 = cur + y*ref_stride;
3070 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3071 for(x=x0; x<x1; x++){
3072 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3073 v = (v + pred1[x]) >> FRAC_BITS;
3074 if(v&(~255)) v= ~(v>>31);
3075 dst1[x] = v;
51d6a3cf 3076 }
1015631b 3077 }
51d6a3cf 3078
561a18d3
RE
3079 /* copy the regions where obmc[] = (uint8_t)256 */
3080 if(LOG2_OBMC_MAX == 8
3081 && (mb_x == 0 || mb_x == b_stride-1)
3082 && (mb_y == 0 || mb_y == b_height-1)){
3083 if(mb_x == 0)
3084 x1 = block_w;
3085 else
3086 x0 = block_w;
3087 if(mb_y == 0)
3088 y1 = block_w;
3089 else
3090 y0 = block_w;
3091 for(y=y0; y<y1; y++)
3092 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3093 }
3094
1015631b 3095 if(block_w==16){
871371a7
LM
3096 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
3097 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3098 /* FIXME cmps overlap but don't cover the wavelet's whole support,
3099 * so improving the score of one block is not strictly guaranteed to
3100 * improve the score of the whole frame, so iterative motion est
3101 * doesn't always converge. */
3102 if(s->avctx->me_cmp == FF_CMP_W97)
3103 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3104 else if(s->avctx->me_cmp == FF_CMP_W53)
3105 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3106 else{
3107 distortion = 0;
3108 for(i=0; i<4; i++){
3109 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3110 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3111 }
1015631b
LM
3112 }
3113 }else{
3114 assert(block_w==8);
3115 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
51d6a3cf
MN
3116 }
3117
3118 if(plane_index==0){
3119 for(i=0; i<4; i++){
3120/* ..RRr
3121 * .RXx.
3122 * rxx..
3123 */
b104969f
LM
3124 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3125 }
48d1b9a1
LM
3126 if(mb_x == b_stride-2)
3127 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
b104969f
LM
3128 }
3129 return distortion + rate*penalty_factor;
3130}
3131
3132static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3133 int i, y2;
3134 Plane *p= &s->plane[plane_index];
3135 const int block_size = MB_SIZE >> s->block_max_depth;
3136 const int block_w = plane_index ? block_size/2 : block_size;
3137 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3138 const int obmc_stride= plane_index ? block_size : 2*block_size;
3139 const int ref_stride= s->current_picture.linesize[plane_index];
b104969f
LM
3140 uint8_t *dst= s->current_picture.data[plane_index];
3141 uint8_t *src= s-> input_picture.data[plane_index];
3142 const static DWTELEM zero_dst[4096]; //FIXME
3143 const int b_stride = s->b_width << s->block_max_depth;
3144 const int b_height = s->b_height<< s->block_max_depth;
3145 const int w= p->width;
3146 const int h= p->height;
3147 int distortion= 0;
3148 int rate= 0;
3149 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3150
3151 for(i=0; i<9; i++){
3152 int mb_x2= mb_x + (i%3) - 1;
3153 int mb_y2= mb_y + (i/3) - 1;
3154 int x= block_w*mb_x2 + block_w/2;
3155 int y= block_w*mb_y2 + block_w/2;
3156
8c36eaaa 3157 add_yblock(s, zero_dst, dst, obmc,
b104969f
LM
3158 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3159
3160 //FIXME find a cleaner/simpler way to skip the outside stuff
3161 for(y2= y; y2<0; y2++)
3162 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3163 for(y2= h; y2<y+block_w; y2++)
3164 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3165 if(x<0){
3166 for(y2= y; y2<y+block_w; y2++)
3167 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
51d6a3cf 3168 }
b104969f
LM
3169 if(x+block_w > w){
3170 for(y2= y; y2<y+block_w; y2++)
3171 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3172 }
3173
3174 assert(block_w== 8 || block_w==16);
3175 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
51d6a3cf
MN
3176 }
3177
b104969f
LM
3178 if(plane_index==0){
3179 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3180 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3181
3182/* ..RRRr
3183 * .RXXx.
3184 * .RXXx.
3185 * rxxx.
3186 */
3187 if(merged)
3188 rate = get_block_bits(s, mb_x, mb_y, 2);
3189 for(i=merged?4:0; i<9; i++){
3190 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3191 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3192 }
3193 }
51d6a3cf
MN
3194 return distortion + rate*penalty_factor;
3195}
3196
1015631b 3197static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
51d6a3cf
MN
3198 const int b_stride= s->b_width << s->block_max_depth;
3199 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3200 BlockNode backup= *block;
3201 int rd, index, value;
3202
3203 assert(mb_x>=0 && mb_y>=0);
735f9f34 3204 assert(mb_x<b_stride);
51d6a3cf
MN
3205
3206 if(intra){
3207 block->color[0] = p[0];
3208 block->color[1] = p[1];
3209 block->color[2] = p[2];
3210 block->type |= BLOCK_INTRA;
3211 }else{
3212 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
8c36eaaa 3213 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
51d6a3cf
MN
3214 if(s->me_cache[index] == value)
3215 return 0;
3216 s->me_cache[index]= value;
3217
3218 block->mx= p[0];
3219 block->my= p[1];
3220 block->type &= ~BLOCK_INTRA;
3221 }
3222
1015631b 3223 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
51d6a3cf
MN
3224
3225//FIXME chroma
3226 if(rd < *best_rd){
3227 *best_rd= rd;
3228 return 1;
3229 }else{
3230 *block= backup;
3231 return 0;
3232 }
3233}
3234
52137f2f 3235/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
fc8c4992 3236static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
52137f2f 3237 int p[2] = {p0, p1};
fc8c4992 3238 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
52137f2f
FR
3239}
3240
8c36eaaa 3241static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
b104969f
LM
3242 const int b_stride= s->b_width << s->block_max_depth;
3243 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3244 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3245 int rd, index, value;
3246
3247 assert(mb_x>=0 && mb_y>=0);
3248 assert(mb_x<b_stride);
3249 assert(((mb_x|mb_y)&1) == 0);
3250
3251 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
8c36eaaa 3252 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
b104969f
LM
3253 if(s->me_cache[index] == value)
3254 return 0;
3255 s->me_cache[index]= value;
3256
3257 block->mx= p0;
3258 block->my= p1;
8c36eaaa 3259 block->ref= ref;
b104969f
LM
3260 block->type &= ~BLOCK_INTRA;
3261 block[1]= block[b_stride]= block[b_stride+1]= *block;
3262
3263 rd= get_4block_rd(s, mb_x, mb_y, 0);
3264
3265//FIXME chroma
3266 if(rd < *best_rd){
3267 *best_rd= rd;
3268 return 1;
3269 }else{
3270 block[0]= backup[0];
3271 block[1]= backup[1];
3272 block[b_stride]= backup[2];
3273 block[b_stride+1]= backup[3];
3274 return 0;
3275 }
3276}
3277
51d6a3cf
MN
3278static void iterative_me(SnowContext *s){
3279 int pass, mb_x, mb_y;
3280 const int b_width = s->b_width << s->block_max_depth;
3281 const int b_height= s->b_height << s->block_max_depth;
3282 const int b_stride= b_width;
3283 int color[3];
3284
8f8ae495
LM
3285 {
3286 RangeCoder r = s->c;
3287 uint8_t state[sizeof(s->block_state)];
3288 memcpy(state, s->block_state, sizeof(s->block_state));
3289 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3290 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3291 encode_q_branch(s, 0, mb_x, mb_y);
3292 s->c = r;
3293 memcpy(s->block_state, state, sizeof(s->block_state));
3294 }
3295
871371a7 3296 for(pass=0; pass<25; pass++){
51d6a3cf
MN
3297 int change= 0;
3298
3299 for(mb_y= 0; mb_y<b_height; mb_y++){
3300 for(mb_x= 0; mb_x<b_width; mb_x++){
8c36eaaa
LM
3301 int dia_change, i, j, ref;
3302 int best_rd= INT_MAX, ref_rd;
3303 BlockNode backup, ref_b;
51d6a3cf
MN
3304 const int index= mb_x + mb_y * b_stride;
3305 BlockNode *block= &s->block[index];
7f21a9a7
LM
3306 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3307 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3308 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3309 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3310 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3311 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3312 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3313 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
1015631b
LM
3314 const int b_w= (MB_SIZE >> s->block_max_depth);
3315 uint8_t obmc_edged[b_w*2][b_w*2];
51d6a3cf
MN
3316
3317 if(pass && (block->type & BLOCK_OPT))
3318 continue;
3319 block->type |= BLOCK_OPT;
3320
3321 backup= *block;
3322
3323 if(!s->me_cache_generation)
3324 memset(s->me_cache, 0, sizeof(s->me_cache));
3325 s->me_cache_generation += 1<<22;
3326
1015631b
LM
3327 //FIXME precalc
3328 {
3329 int x, y;
3330 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3331 if(mb_x==0)
3332 for(y=0; y<b_w*2; y++)
3333 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3334 if(mb_x==b_stride-1)
3335 for(y=0; y<b_w*2; y++)
3336 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3337 if(mb_y==0){
3338 for(x=0; x<b_w*2; x++)
3339 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3340 for(y=1; y<b_w; y++)
3341 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3342 }
3343 if(mb_y==b_height-1){
3344 for(x=0; x<b_w*2; x++)
3345 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3346 for(y=b_w; y<b_w*2-1; y++)
3347 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3348 }
3349 }
3350
3351 //skip stuff outside the picture
3352 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3353 {
3354 uint8_t *src= s-> input_picture.data[0];
3355 uint8_t *dst= s->current_picture.data[0];
3356 const int stride= s->current_picture.linesize[0];
3357 const int block_w= MB_SIZE >> s->block_max_depth;
3358 const int sx= block_w*mb_x - block_w/2;
3359 const int sy= block_w*mb_y - block_w/2;
3360 const int w= s->plane[0].width;
3361 const int h= s->plane[0].height;
3362 int y;
3363
3364 for(y=sy; y<0; y++)
3365 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3366 for(y=h; y<sy+block_w*2; y++)
3367 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3368 if(sx<0){
3369 for(y=sy; y<sy+block_w*2; y++)
3370 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3371 }
3372 if(sx+block_w*2 > w){
3373 for(y=sy; y<sy+block_w*2; y++)
3374 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3375 }
3376 }
3377
3378 // intra(black) = neighbors' contribution to the current block
3379 for(i=0; i<3; i++)
3380 color[i]= get_dc(s, mb_x, mb_y, i);
3381
51d6a3cf 3382 // get previous score (cant be cached due to OBMC)
48d1b9a1
LM
3383 if(pass > 0 && (block->type&BLOCK_INTRA)){
3384 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3385 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3386 }else
fc8c4992 3387 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
48d1b9a1 3388
8c36eaaa
LM
3389 ref_b= *block;
3390 ref_rd= best_rd;
3391 for(ref=0; ref < s->ref_frames; ref++){
3392 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3393 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3394 continue;
3395 block->ref= ref;
3396 best_rd= INT_MAX;
3397
3398 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3399 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
7f21a9a7 3400 if(tb)
8c36eaaa 3401 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
7f21a9a7 3402 if(lb)
8c36eaaa 3403 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
7f21a9a7 3404 if(rb)
8c36eaaa 3405 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
7f21a9a7 3406 if(bb)
8c36eaaa
LM
3407 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3408
3409 /* fullpel ME */
3410 //FIXME avoid subpel interpol / round to nearest integer
3411 do{
3412 dia_change=0;
3413 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3414 for(j=0; j<i; j++){
3415 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3416 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3417 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3418 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3419 }
51d6a3cf 3420 }
8c36eaaa
LM
3421 }while(dia_change);
3422 /* subpel ME */
3423 do{
3424 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3425 dia_change=0;
3426 for(i=0; i<8; i++)
3427 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3428 }while(dia_change);
3429 //FIXME or try the standard 2 pass qpel or similar
3430
3431 mvr[0][0]= block->mx;
3432 mvr[0][1]= block->my;
3433 if(ref_rd > best_rd){
3434 ref_rd= best_rd;
3435 ref_b= *block;
51d6a3cf 3436 }
8c36eaaa
LM
3437 }
3438 best_rd= ref_rd;
3439 *block= ref_b;
13705b69 3440#if 1
1015631b 3441 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
51d6a3cf 3442 //FIXME RD style color selection
13705b69 3443#endif
51d6a3cf 3444 if(!same_block(block, &backup)){
7f21a9a7
LM
3445 if(tb ) tb ->type &= ~BLOCK_OPT;
3446 if(lb ) lb ->type &= ~BLOCK_OPT;
3447 if(rb ) rb ->type &= ~BLOCK_OPT;
3448 if(bb ) bb ->type &= ~BLO