Make clean/distclean consistent throughout all the Makefiles.
[libav.git] / libavcodec / snow.c
<
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
5509bffa 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
MN
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
28869757
MN
22
23#include "rangecoder.h"
24#define MID_STATE 128
791e7b83
MN
25
26#include "mpegvideo.h"
27
28#undef NDEBUG
29#include <assert.h>
30
31#define MAX_DECOMPOSITIONS 8
32#define MAX_PLANES 4
33#define DWTELEM int
a0a74ad9 34#define QSHIFT 5
c97de57c 35#define QROOT (1<<QSHIFT)
93fbdb5a 36#define LOSSLESS_QLOG -128
034aff03 37#define FRAC_BITS 8
791e7b83
MN
38
39static const int8_t quant3[256]={
40 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56};
57static const int8_t quant3b[256]={
58 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74};
538a3841
MN
75static const int8_t quant3bA[256]={
76 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92};
791e7b83
MN
93static const int8_t quant5[256]={
94 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110};
111static const int8_t quant7[256]={
112 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128};
129static const int8_t quant9[256]={
130 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146};
147static const int8_t quant11[256]={
148 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164};
165static const int8_t quant13[256]={
166 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182};
183
715a97f0
MN
184#define LOG2_OBMC_MAX 6
185#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
791e7b83
MN
186#if 0 //64*cubic
187static const uint8_t obmc32[1024]={
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220//error:0.000022
221};
222static const uint8_t obmc16[256]={
223 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239//error:0.000033
240};
241#elif 1 // 64*linear
242static const uint8_t obmc32[1024]={
243 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275 //error:0.000020
276};
277static const uint8_t obmc16[256]={
278 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294//error:0.000015
295};
296#else //64*cos
297static const uint8_t obmc32[1024]={
298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330//error:0.000022
331};
332static const uint8_t obmc16[256]={
333 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349//error:0.000022
350};
351#endif
352
155ec6ed
MN
353//linear *64
354static const uint8_t obmc8[64]={
355 1, 3, 5, 7, 7, 5, 3, 1,
356 3, 9,15,21,21,15, 9, 3,
357 5,15,25,35,35,25,15, 5,
358 7,21,35,49,49,35,21, 7,
359 7,21,35,49,49,35,21, 7,
360 5,15,25,35,35,25,15, 5,
361 3, 9,15,21,21,15, 9, 3,
362 1, 3, 5, 7, 7, 5, 3, 1,
363//error:0.000000
364};
365
366//linear *64
367static const uint8_t obmc4[16]={
368 4,12,12, 4,
36912,36,36,12,
37012,36,36,12,
371 4,12,12, 4,
372//error:0.000000
373};
374
375static const uint8_t *obmc_tab[4]={
376 obmc32, obmc16, obmc8, obmc4
377};
378
379typedef struct BlockNode{
380 int16_t mx;
381 int16_t my;
382 uint8_t color[3];
383 uint8_t type;
384//#define TYPE_SPLIT 1
385#define BLOCK_INTRA 1
51d6a3cf 386#define BLOCK_OPT 2
155ec6ed
MN
387//#define TYPE_NOCOLOR 4
388 uint8_t level; //FIXME merge into type?
389}BlockNode;
390
51d6a3cf
MN
391static const BlockNode null_block= { //FIXME add border maybe
392 .color= {128,128,128},
393 .mx= 0,
394 .my= 0,
395 .type= 0,
396 .level= 0,
397};
398
155ec6ed
MN
399#define LOG2_MB_SIZE 4
400#define MB_SIZE (1<<LOG2_MB_SIZE)
401
a0d1931c
Y
402typedef struct x_and_coeff{
403 int16_t x;
538a3841 404 uint16_t coeff;
a0d1931c
Y
405} x_and_coeff;
406
791e7b83
MN
407typedef struct SubBand{
408 int level;
409 int stride;
410 int width;
411 int height;
412 int qlog; ///< log(qscale)/log[2^(1/6)]
413 DWTELEM *buf;
a0d1931c
Y
414 int buf_x_offset;
415 int buf_y_offset;
416 int stride_line; ///< Stride measured in lines, not pixels.
417 x_and_coeff * x_coeff;
791e7b83
MN
418 struct SubBand *parent;
419 uint8_t state[/*7*2*/ 7 + 512][32];
420}SubBand;
421
422typedef struct Plane{
423 int width;
424 int height;
425 SubBand band[MAX_DECOMPOSITIONS][4];
426}Plane;
427
a0d1931c
Y
428/** Used to minimize the amount of memory used in order to optimize cache performance. **/
429typedef struct {
430 DWTELEM * * line; ///< For use by idwt and predict_slices.
431 DWTELEM * * data_stack; ///< Used for internal purposes.
432 int data_stack_top;
433 int line_count;
434 int line_width;
435 int data_count;
436 DWTELEM * base_buffer; ///< Buffer that this structure is caching.
437} slice_buffer;
438
791e7b83
MN
439typedef struct SnowContext{
440// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
441
442 AVCodecContext *avctx;
28869757 443 RangeCoder c;
791e7b83 444 DSPContext dsp;
51d6a3cf
MN
445 AVFrame new_picture;
446 AVFrame input_picture; ///< new_picture with the internal linesizes
791e7b83
MN
447 AVFrame current_picture;
448 AVFrame last_picture;
449 AVFrame mconly_picture;
450// uint8_t q_context[16];
451 uint8_t header_state[32];
155ec6ed 452 uint8_t block_state[128 + 32*128];
791e7b83 453 int keyframe;
19aa028d 454 int always_reset;
791e7b83
MN
455 int version;
456 int spatial_decomposition_type;
457 int temporal_decomposition_type;
458 int spatial_decomposition_count;
459 int temporal_decomposition_count;
460 DWTELEM *spatial_dwt_buffer;
791e7b83
MN
461 int colorspace_type;
462 int chroma_h_shift;
463 int chroma_v_shift;
464 int spatial_scalability;
465 int qlog;
155ec6ed
MN
466 int lambda;
467 int lambda2;
791e7b83
MN
468 int mv_scale;
469 int qbias;
470#define QBIAS_SHIFT 3
155ec6ed
MN
471 int b_width;
472 int b_height;
473 int block_max_depth;
791e7b83 474 Plane plane[MAX_PLANES];
155ec6ed 475 BlockNode *block;
51d6a3cf
MN
476#define ME_CACHE_SIZE 1024
477 int me_cache[ME_CACHE_SIZE];
478 int me_cache_generation;
a0d1931c 479 slice_buffer sb;
155ec6ed 480
791e7b83
MN
481 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
482}SnowContext;
483
f9e6ebf7
LM
484typedef struct {
485 DWTELEM *b0;
486 DWTELEM *b1;
487 DWTELEM *b2;
488 DWTELEM *b3;
489 int y;
490} dwt_compose_t;
491
a0d1931c
Y
492#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
493//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
494
51d6a3cf
MN
495static void iterative_me(SnowContext *s);
496
a0d1931c
Y
497static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
498{
499 int i;
115329f1 500
a0d1931c
Y
501 buf->base_buffer = base_buffer;
502 buf->line_count = line_count;
503 buf->line_width = line_width;
504 buf->data_count = max_allocated_lines;
505 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
506 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
115329f1 507
a0d1931c
Y
508 for (i = 0; i < max_allocated_lines; i++)
509 {
510 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
511 }
115329f1 512
a0d1931c
Y
513 buf->data_stack_top = max_allocated_lines - 1;
514}
515
516static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
517{
a0d1931c
Y
518 int offset;
519 DWTELEM * buffer;
115329f1
DB
520
521// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
522
a0d1931c
Y
523 assert(buf->data_stack_top >= 0);
524// assert(!buf->line[line]);
525 if (buf->line[line])
526 return buf->line[line];
115329f1 527
a0d1931c
Y
528 offset = buf->line_width * line;
529 buffer = buf->data_stack[buf->data_stack_top];
530 buf->data_stack_top--;
531 buf->line[line] = buffer;
115329f1 532
a0d1931c 533// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
115329f1 534
a0d1931c
Y
535 return buffer;
536}
537
538static void slice_buffer_release(slice_buffer * buf, int line)
539{
a0d1931c
Y
540 int offset;
541 DWTELEM * buffer;
542
543 assert(line >= 0 && line < buf->line_count);
544 assert(buf->line[line]);
545
546 offset = buf->line_width * line;
547 buffer = buf->line[line];
548 buf->data_stack_top++;
549 buf->data_stack[buf->data_stack_top] = buffer;
550 buf->line[line] = NULL;
115329f1 551
a0d1931c
Y
552// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
553}
554
555static void slice_buffer_flush(slice_buffer * buf)
556{
557 int i;
558 for (i = 0; i < buf->line_count; i++)
559 {
560 if (buf->line[i])
561 {
562// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
563 slice_buffer_release(buf, i);
564 }
565 }
566}
567
568static void slice_buffer_destroy(slice_buffer * buf)
569{
570 int i;
571 slice_buffer_flush(buf);
115329f1 572
a0d1931c
Y
573 for (i = buf->data_count - 1; i >= 0; i--)
574 {
575 assert(buf->data_stack[i]);
576 av_free(buf->data_stack[i]);
577 }
578 assert(buf->data_stack);
579 av_free(buf->data_stack);
580 assert(buf->line);
581 av_free(buf->line);
582}
583
bb270c08 584#ifdef __sgi
2554db9b 585// Avoid a name clash on SGI IRIX
bb270c08 586#undef qexp
2554db9b 587#endif
034aff03 588#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
c97de57c 589static uint8_t qexp[QROOT];
791e7b83
MN
590
591static inline int mirror(int v, int m){
13705b69
MN
592 while((unsigned)v > (unsigned)m){
593 v=-v;
594 if(v<0) v+= 2*m;
595 }
596 return v;
791e7b83
MN
597}
598
28869757 599static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
600 int i;
601
602 if(v){
603 const int a= ABS(v);
604 const int e= av_log2(a);
605#if 1
115329f1 606 const int el= FFMIN(e, 10);
28869757 607 put_rac(c, state+0, 0);
791e7b83
MN
608
609 for(i=0; i<el; i++){
28869757 610 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
611 }
612 for(; i<e; i++){
28869757 613 put_rac(c, state+1+9, 1); //1..10
791e7b83 614 }
28869757 615 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
616
617 for(i=e-1; i>=el; i--){
28869757 618 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
619 }
620 for(; i>=0; i--){
28869757 621 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
622 }
623
624 if(is_signed)
28869757 625 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83 626#else
115329f1 627
28869757 628 put_rac(c, state+0, 0);
791e7b83
MN
629 if(e<=9){
630 for(i=0; i<e; i++){
28869757 631 put_rac(c, state+1+i, 1); //1..10
791e7b83 632 }
28869757 633 put_rac(c, state+1+i, 0);
791e7b83
MN
634
635 for(i=e-1; i>=0; i--){
28869757 636 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
637 }
638
639 if(is_signed)
28869757 640 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
641 }else{
642 for(i=0; i<e; i++){
28869757 643 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 644 }
28869757 645 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
646
647 for(i=e-1; i>=0; i--){
28869757 648 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
649 }
650
651 if(is_signed)
28869757 652 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83
MN
653 }
654#endif
655 }else{
28869757 656 put_rac(c, state+0, 1);
791e7b83
MN
657 }
658}
659
28869757
MN
660static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
661 if(get_rac(c, state+0))
791e7b83
MN
662 return 0;
663 else{
7c2425d2
LM
664 int i, e, a;
665 e= 0;
28869757 666 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 667 e++;
791e7b83 668 }
7c2425d2 669
791e7b83 670 a= 1;
7c2425d2 671 for(i=e-1; i>=0; i--){
28869757 672 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
673 }
674
28869757 675 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
676 return -a;
677 else
678 return a;
679 }
680}
681
28869757 682static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 683 int i;
0635cbfc 684 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
685
686 assert(v>=0);
0635cbfc
MN
687 assert(log2>=-4);
688
689 while(v >= r){
28869757 690 put_rac(c, state+4+log2, 1);
0635cbfc 691 v -= r;
4f4e9633 692 log2++;
0635cbfc 693 if(log2>0) r+=r;
4f4e9633 694 }
28869757 695 put_rac(c, state+4+log2, 0);
115329f1 696
4f4e9633 697 for(i=log2-1; i>=0; i--){
28869757 698 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 699 }
4f4e9633
MN
700}
701
28869757 702static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 703 int i;
0635cbfc 704 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
705 int v=0;
706
0635cbfc
MN
707 assert(log2>=-4);
708
28869757 709 while(get_rac(c, state+4+log2)){
0635cbfc 710 v+= r;
4f4e9633 711 log2++;
0635cbfc 712 if(log2>0) r+=r;
4f4e9633 713 }
115329f1 714
4f4e9633 715 for(i=log2-1; i>=0; i--){
28869757 716 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
717 }
718
719 return v;
720}
721
791e7b83
MN
722static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
723 const int mirror_left= !highpass;
724 const int mirror_right= (width&1) ^ highpass;
725 const int w= (width>>1) - 1 + (highpass & width);
726 int i;
727
728#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
729 if(mirror_left){
730 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
731 dst += dst_step;
732 src += src_step;
733 }
115329f1 734
791e7b83
MN
735 for(i=0; i<w; i++){
736 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
737 }
115329f1 738
791e7b83
MN
739 if(mirror_right){
740 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
741 }
742}
743
744static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
745 const int mirror_left= !highpass;
746 const int mirror_right= (width&1) ^ highpass;
747 const int w= (width>>1) - 1 + (highpass & width);
748 int i;
749
750 if(mirror_left){
751 int r= 3*2*ref[0];
752 r += r>>4;
753 r += r>>8;
754 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
755 dst += dst_step;
756 src += src_step;
757 }
115329f1 758
791e7b83
MN
759 for(i=0; i<w; i++){
760 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
761 r += r>>4;
762 r += r>>8;
763 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
764 }
115329f1 765
791e7b83
MN
766 if(mirror_right){
767 int r= 3*2*ref[w*ref_step];
768 r += r>>4;
769 r += r>>8;
770 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
771 }
772}
773
f5a71928
MN
774static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
775 const int mirror_left= !highpass;
776 const int mirror_right= (width&1) ^ highpass;
777 const int w= (width>>1) - 1 + (highpass & width);
778 int i;
779
780 assert(shift == 4);
781#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
782 if(mirror_left){
783 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
784 dst += dst_step;
785 src += src_step;
786 }
115329f1 787
f5a71928
MN
788 for(i=0; i<w; i++){
789 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
790 }
115329f1 791
f5a71928
MN
792 if(mirror_right){
793 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
794 }
795}
796
791e7b83 797
aa25a462 798static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791e7b83 799 int x, i;
115329f1 800
791e7b83
MN
801 for(x=start; x<width; x+=2){
802 int64_t sum=0;
803
804 for(i=0; i<n; i++){
805 int x2= x + 2*i - n + 1;
806 if (x2< 0) x2= -x2;
807 else if(x2>=width) x2= 2*width-x2-2;
808 sum += coeffs[i]*(int64_t)dst[x2];
809 }
810 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
811 else dst[x] += (sum + (1<<shift)/2)>>shift;
812 }
813}
814
aa25a462 815static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
816 int x, y, i;
817 for(y=start; y<height; y+=2){
818 for(x=0; x<width; x++){
819 int64_t sum=0;
115329f1 820
791e7b83
MN
821 for(i=0; i<n; i++){
822 int y2= y + 2*i - n + 1;
823 if (y2< 0) y2= -y2;
824 else if(y2>=height) y2= 2*height-y2-2;
825 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
826 }
827 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
828 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
829 }
830 }
831}
832
833#define SCALEX 1
834#define LX0 0
835#define LX1 1
836
de890c9b 837#if 0 // more accurate 9/7
791e7b83
MN
838#define N1 2
839#define SHIFT1 14
840#define COEFFS1 (int[]){-25987,-25987}
841#define N2 2
842#define SHIFT2 19
843#define COEFFS2 (int[]){-27777,-27777}
844#define N3 2
845#define SHIFT3 15
846#define COEFFS3 (int[]){28931,28931}
847#define N4 2
848#define SHIFT4 15
849#define COEFFS4 (int[]){14533,14533}
850#elif 1 // 13/7 CRF
851#define N1 4
852#define SHIFT1 4
853#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 854#define N2 4
791e7b83
MN
855#define SHIFT2 4
856#define COEFFS2 (int[]){-1,5,5,-1}
857#define N3 0
858#define SHIFT3 1
859#define COEFFS3 NULL
860#define N4 0
861#define SHIFT4 1
862#define COEFFS4 NULL
863#elif 1 // 3/5
864#define LX0 1
865#define LX1 0
866#define SCALEX 0.5
867#define N1 2
868#define SHIFT1 1
869#define COEFFS1 (int[]){1,1}
870#define N2 2
871#define SHIFT2 2
872#define COEFFS2 (int[]){-1,-1}
873#define N3 0
874#define SHIFT3 0
875#define COEFFS3 NULL
876#define N4 0
877#define SHIFT4 0
878#define COEFFS4 NULL
115329f1 879#elif 1 // 11/5
791e7b83
MN
880#define N1 0
881#define SHIFT1 1
882#define COEFFS1 NULL
883#define N2 2
884#define SHIFT2 2
885#define COEFFS2 (int[]){-1,-1}
886#define N3 2
887#define SHIFT3 0
888#define COEFFS3 (int[]){-1,-1}
889#define N4 4
890#define SHIFT4 7
891#define COEFFS4 (int[]){-5,29,29,-5}
892#define SCALEX 4
893#elif 1 // 9/7 CDF
894#define N1 2
895#define SHIFT1 7
896#define COEFFS1 (int[]){-203,-203}
897#define N2 2
898#define SHIFT2 12
899#define COEFFS2 (int[]){-217,-217}
900#define N3 2
901#define SHIFT3 7
902#define COEFFS3 (int[]){113,113}
903#define N4 2
904#define SHIFT4 9
905#define COEFFS4 (int[]){227,227}
906#define SCALEX 1
907#elif 1 // 7/5 CDF
908#define N1 0
909#define SHIFT1 1
910#define COEFFS1 NULL
911#define N2 2
912#define SHIFT2 2
913#define COEFFS2 (int[]){-1,-1}
914#define N3 2
915#define SHIFT3 0
916#define COEFFS3 (int[]){-1,-1}
917#define N4 2
918#define SHIFT4 4
919#define COEFFS4 (int[]){3,3}
920#elif 1 // 9/7 MN
921#define N1 4
922#define SHIFT1 4
923#define COEFFS1 (int[]){1,-9,-9,1}
924#define N2 2
925#define SHIFT2 2
926#define COEFFS2 (int[]){1,1}
927#define N3 0
928#define SHIFT3 1
929#define COEFFS3 NULL
930#define N4 0
931#define SHIFT4 1
932#define COEFFS4 NULL
933#else // 13/7 CRF
934#define N1 4
935#define SHIFT1 4
936#define COEFFS1 (int[]){1,-9,-9,1}
937#define N2 4
938#define SHIFT2 4
939#define COEFFS2 (int[]){-1,5,5,-1}
940#define N3 0
941#define SHIFT3 1
942#define COEFFS3 NULL
943#define N4 0
944#define SHIFT4 1
945#define COEFFS4 NULL
946#endif
aa25a462
RFI
947static void horizontal_decomposeX(DWTELEM *b, int width){
948 DWTELEM temp[width];
791e7b83
MN
949 const int width2= width>>1;
950 const int w2= (width+1)>>1;
62ab0b78 951 int x;
791e7b83
MN
952
953 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
954 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
955 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
956 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
115329f1 957
791e7b83
MN
958 for(x=0; x<width2; x++){
959 temp[x ]= b[2*x ];
960 temp[x+w2]= b[2*x + 1];
961 }
962 if(width&1)
963 temp[x ]= b[2*x ];
964 memcpy(b, temp, width*sizeof(int));
965}
966
aa25a462
RFI
967static void horizontal_composeX(DWTELEM *b, int width){
968 DWTELEM temp[width];
791e7b83 969 const int width2= width>>1;
62ab0b78 970 int x;
791e7b83
MN
971 const int w2= (width+1)>>1;
972
973 memcpy(temp, b, width*sizeof(int));
974 for(x=0; x<width2; x++){
975 b[2*x ]= temp[x ];
976 b[2*x + 1]= temp[x+w2];
977 }
978 if(width&1)
979 b[2*x ]= temp[x ];
980
981 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
982 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
983 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
984 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
985}
986
aa25a462 987static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83 988 int x, y;
115329f1 989
791e7b83
MN
990 for(y=0; y<height; y++){
991 for(x=0; x<width; x++){
992 buffer[y*stride + x] *= SCALEX;
993 }
994 }
995
996 for(y=0; y<height; y++){
997 horizontal_decomposeX(buffer + y*stride, width);
998 }
115329f1 999
791e7b83
MN
1000 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1002 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
115329f1 1003 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
791e7b83
MN
1004}
1005
aa25a462 1006static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83 1007 int x, y;
115329f1 1008
791e7b83
MN
1009 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1011 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1012 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1013
1014 for(y=0; y<height; y++){
1015 horizontal_composeX(buffer + y*stride, width);
1016 }
1017
1018 for(y=0; y<height; y++){
1019 for(x=0; x<width; x++){
1020 buffer[y*stride + x] /= SCALEX;
1021 }
1022 }
1023}
1024
aa25a462
RFI
1025static void horizontal_decompose53i(DWTELEM *b, int width){
1026 DWTELEM temp[width];
791e7b83 1027 const int width2= width>>1;
62ab0b78 1028 int x;
791e7b83
MN
1029 const int w2= (width+1)>>1;
1030
1031 for(x=0; x<width2; x++){
1032 temp[x ]= b[2*x ];
1033 temp[x+w2]= b[2*x + 1];
1034 }
1035 if(width&1)
1036 temp[x ]= b[2*x ];
1037#if 0
62ab0b78
AJ
1038 {
1039 int A1,A2,A3,A4;
791e7b83
MN
1040 A2= temp[1 ];
1041 A4= temp[0 ];
1042 A1= temp[0+width2];
1043 A1 -= (A2 + A4)>>1;
1044 A4 += (A1 + 1)>>1;
1045 b[0+width2] = A1;
1046 b[0 ] = A4;
1047 for(x=1; x+1<width2; x+=2){
1048 A3= temp[x+width2];
1049 A4= temp[x+1 ];
1050 A3 -= (A2 + A4)>>1;
1051 A2 += (A1 + A3 + 2)>>2;
1052 b[x+width2] = A3;
1053 b[x ] = A2;
1054
1055 A1= temp[x+1+width2];
1056 A2= temp[x+2 ];
1057 A1 -= (A2 + A4)>>1;
1058 A4 += (A1 + A3 + 2)>>2;
1059 b[x+1+width2] = A1;
1060 b[x+1 ] = A4;
1061 }
1062 A3= temp[width-1];
1063 A3 -= A2;
1064 A2 += (A1 + A3 + 2)>>2;
1065 b[width -1] = A3;
1066 b[width2-1] = A2;
62ab0b78 1067 }
115329f1 1068#else
791e7b83
MN
1069 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1070 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1071#endif
1072}
1073
aa25a462 1074static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1075 int i;
115329f1 1076
791e7b83
MN
1077 for(i=0; i<width; i++){
1078 b1[i] -= (b0[i] + b2[i])>>1;
1079 }
1080}
1081
aa25a462 1082static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1083 int i;
115329f1 1084
791e7b83
MN
1085 for(i=0; i<width; i++){
1086 b1[i] += (b0[i] + b2[i] + 2)>>2;
1087 }
1088}
1089
aa25a462 1090static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1091 int y;
791e7b83
MN
1092 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1093 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
115329f1 1094
791e7b83
MN
1095 for(y=-2; y<height; y+=2){
1096 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1097 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1098
1099{START_TIMER
13705b69
MN
1100 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1101 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
791e7b83 1102STOP_TIMER("horizontal_decompose53i")}
115329f1 1103
791e7b83 1104{START_TIMER
13705b69
MN
1105 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1106 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
791e7b83 1107STOP_TIMER("vertical_decompose53i*")}
115329f1 1108
791e7b83
MN
1109 b0=b2;
1110 b1=b3;
1111 }
1112}
1113
f5a71928 1114#define liftS lift
791e7b83
MN
1115#define lift5 lift
1116#if 1
1117#define W_AM 3
1118#define W_AO 0
1119#define W_AS 1
1120
f5a71928 1121#undef liftS
791e7b83
MN
1122#define W_BM 1
1123#define W_BO 8
1124#define W_BS 4
1125
f5a71928
MN
1126#define W_CM 1
1127#define W_CO 0
1128#define W_CS 0
791e7b83 1129
f5a71928
MN
1130#define W_DM 3
1131#define W_DO 4
1132#define W_DS 3
791e7b83
MN
1133#elif 0
1134#define W_AM 55
1135#define W_AO 16
1136#define W_AS 5
1137
1138#define W_BM 3
1139#define W_BO 32
1140#define W_BS 6
1141
1142#define W_CM 127
1143#define W_CO 64
1144#define W_CS 7
1145
1146#define W_DM 7
1147#define W_DO 8
1148#define W_DS 4
1149#elif 0
1150#define W_AM 97
1151#define W_AO 32
1152#define W_AS 6
1153
1154#define W_BM 63
1155#define W_BO 512
1156#define W_BS 10
1157
1158#define W_CM 13
1159#define W_CO 8
1160#define W_CS 4
1161
1162#define W_DM 15
1163#define W_DO 16
1164#define W_DS 5
1165
1166#else
1167
1168#define W_AM 203
1169#define W_AO 64
1170#define W_AS 7
1171
1172#define W_BM 217
1173#define W_BO 2048
1174#define W_BS 12
1175
1176#define W_CM 113
1177#define W_CO 64
1178#define W_CS 7
1179
1180#define W_DM 227
1181#define W_DO 128
1182#define W_DS 9
1183#endif
aa25a462
RFI
1184static void horizontal_decompose97i(DWTELEM *b, int width){
1185 DWTELEM temp[width];
791e7b83
MN
1186 const int w2= (width+1)>>1;
1187
1188 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
f5a71928 1189 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
791e7b83
MN
1190 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1191 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1192}
1193
1194
aa25a462 1195static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1196 int i;
115329f1 1197
791e7b83
MN
1198 for(i=0; i<width; i++){
1199 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1200 }
1201}
1202
aa25a462 1203static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1204 int i;
115329f1 1205
791e7b83
MN
1206 for(i=0; i<width; i++){
1207#ifdef lift5
1208 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1209#else
1210 int r= 3*(b0[i] + b2[i]);
1211 r+= r>>4;
1212 r+= r>>8;
1213 b1[i] += (r+W_CO)>>W_CS;
1214#endif
1215 }
1216}
1217
aa25a462 1218static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1219 int i;
115329f1 1220
791e7b83 1221 for(i=0; i<width; i++){
f5a71928 1222#ifdef liftS
791e7b83 1223 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1224#else
1225 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1226#endif
791e7b83
MN
1227 }
1228}
1229
aa25a462 1230static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1231 int i;
115329f1 1232
791e7b83
MN
1233 for(i=0; i<width; i++){
1234 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1235 }
1236}
1237
aa25a462 1238static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1239 int y;
791e7b83
MN
1240 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1241 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1242 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1243 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
115329f1 1244
791e7b83
MN
1245 for(y=-4; y<height; y+=2){
1246 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1247 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1248
1249{START_TIMER
13705b69
MN
1250 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1251 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
791e7b83
MN
1252if(width>400){
1253STOP_TIMER("horizontal_decompose97i")
1254}}
115329f1 1255
791e7b83 1256{START_TIMER
13705b69
MN
1257 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1258 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1259 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1260 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
791e7b83
MN
1261
1262if(width>400){
1263STOP_TIMER("vertical_decompose97i")
1264}}
115329f1 1265
791e7b83
MN
1266 b0=b2;
1267 b1=b3;
1268 b2=b4;
1269 b3=b5;
1270 }
1271}
1272
aa25a462 1273void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83 1274 int level;
115329f1 1275
46c281e8
MN
1276 for(level=0; level<decomposition_count; level++){
1277 switch(type){
791e7b83
MN
1278 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1279 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1280 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1281 }
1282 }
1283}
1284
aa25a462
RFI
1285static void horizontal_compose53i(DWTELEM *b, int width){
1286 DWTELEM temp[width];
791e7b83
MN
1287 const int width2= width>>1;
1288 const int w2= (width+1)>>1;
62ab0b78 1289 int x;
791e7b83
MN
1290
1291#if 0
62ab0b78 1292 int A1,A2,A3,A4;
791e7b83
MN
1293 A2= temp[1 ];
1294 A4= temp[0 ];
1295 A1= temp[0+width2];
1296 A1 -= (A2 + A4)>>1;
1297 A4 += (A1 + 1)>>1;
1298 b[0+width2] = A1;
1299 b[0 ] = A4;
1300 for(x=1; x+1<width2; x+=2){
1301 A3= temp[x+width2];
1302 A4= temp[x+1 ];
1303 A3 -= (A2 + A4)>>1;
1304 A2 += (A1 + A3 + 2)>>2;
1305 b[x+width2] = A3;
1306 b[x ] = A2;
1307
1308 A1= temp[x+1+width2];
1309 A2= temp[x+2 ];
1310 A1 -= (A2 + A4)>>1;
1311 A4 += (A1 + A3 + 2)>>2;
1312 b[x+1+width2] = A1;
1313 b[x+1 ] = A4;
1314 }
1315 A3= temp[width-1];
1316 A3 -= A2;
1317 A2 += (A1 + A3 + 2)>>2;
1318 b[width -1] = A3;
1319 b[width2-1] = A2;
115329f1 1320#else
791e7b83
MN
1321 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1322 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1323#endif
1324 for(x=0; x<width2; x++){
1325 b[2*x ]= temp[x ];
1326 b[2*x + 1]= temp[x+w2];
1327 }
1328 if(width&1)
1329 b[2*x ]= temp[x ];
1330}
1331
aa25a462 1332static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1333 int i;
115329f1 1334
791e7b83
MN
1335 for(i=0; i<width; i++){
1336 b1[i] += (b0[i] + b2[i])>>1;
1337 }
1338}
1339
aa25a462 1340static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1341 int i;
115329f1 1342
791e7b83
MN
1343 for(i=0; i<width; i++){
1344 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1345 }
1346}
1347
a0d1931c
Y
1348static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1349 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1350 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1351 cs->y = -1;
1352}
1353
f9e6ebf7
LM
1354static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1355 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1356 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1357 cs->y = -1;
1358}
1359
a0d1931c
Y
1360static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1361 int y= cs->y;
115329f1 1362
a0d1931c
Y
1363 DWTELEM *b0= cs->b0;
1364 DWTELEM *b1= cs->b1;
3b6ab26c
MN
1365 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1366 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
a0d1931c
Y
1367
1368{START_TIMER
13705b69
MN
1369 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1370 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
a0d1931c
Y
1371STOP_TIMER("vertical_compose53i*")}
1372
1373{START_TIMER
13705b69
MN
1374 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1375 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
a0d1931c
Y
1376STOP_TIMER("horizontal_compose53i")}
1377
1378 cs->b0 = b2;
1379 cs->b1 = b3;
1380 cs->y += 2;
1381}
1382
f9e6ebf7
LM
1383static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1384 int y= cs->y;
1385 DWTELEM *b0= cs->b0;
1386 DWTELEM *b1= cs->b1;
1387 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1388 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83
MN
1389
1390{START_TIMER
13705b69
MN
1391 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1392 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
791e7b83
MN
1393STOP_TIMER("vertical_compose53i*")}
1394
1395{START_TIMER
13705b69
MN
1396 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1397 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
791e7b83
MN
1398STOP_TIMER("horizontal_compose53i")}
1399
f9e6ebf7
LM
1400 cs->b0 = b2;
1401 cs->b1 = b3;
1402 cs->y += 2;
1403}
1404
1405static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1406 dwt_compose_t cs;
1407 spatial_compose53i_init(&cs, buffer, height, stride);
1408 while(cs.y <= height)
1409 spatial_compose53i_dy(&cs, buffer, width, height, stride);
115329f1
DB
1410}
1411
791e7b83 1412
aa25a462
RFI
1413static void horizontal_compose97i(DWTELEM *b, int width){
1414 DWTELEM temp[width];
791e7b83
MN
1415 const int w2= (width+1)>>1;
1416
1417 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1418 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
f5a71928 1419 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
791e7b83
MN
1420 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1421}
1422
aa25a462 1423static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1424 int i;
115329f1 1425
791e7b83
MN
1426 for(i=0; i<width; i++){
1427 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1428 }
1429}
1430
aa25a462 1431static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1432 int i;
115329f1 1433
791e7b83
MN
1434 for(i=0; i<width; i++){
1435#ifdef lift5
1436 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1437#else
1438 int r= 3*(b0[i] + b2[i]);
1439 r+= r>>4;
1440 r+= r>>8;
1441 b1[i] -= (r+W_CO)>>W_CS;
1442#endif
1443 }
1444}
1445
aa25a462 1446static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1447 int i;
115329f1 1448
791e7b83 1449 for(i=0; i<width; i++){
f5a71928 1450#ifdef liftS
791e7b83 1451 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
f5a71928
MN
1452#else
1453 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1454#endif
791e7b83
MN
1455 }
1456}
1457
aa25a462 1458static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83 1459 int i;
115329f1 1460
791e7b83
MN
1461 for(i=0; i<width; i++){
1462 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1463 }
1464}
1465
565a45ac
MN
1466static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1467 int i;
115329f1 1468
565a45ac 1469 for(i=0; i<width; i++){
62ab0b78 1470#ifndef lift5
565a45ac 1471 int r;
62ab0b78 1472#endif
565a45ac
MN
1473 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1474#ifdef lift5
1475 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1476#else
1477 r= 3*(b2[i] + b4[i]);
1478 r+= r>>4;
1479 r+= r>>8;
1480 b3[i] -= (r+W_CO)>>W_CS;
1481#endif
f5a71928 1482#ifdef liftS
565a45ac 1483 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
f5a71928
MN
1484#else
1485 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1486#endif
565a45ac
MN
1487 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1488 }
1489}
1490
a0d1931c
Y
1491static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1492 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1493 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1494 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1495 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1496 cs->y = -3;
1497}
1498
f9e6ebf7
LM
1499static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1500 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1501 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1502 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1503 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1504 cs->y = -3;
1505}
791e7b83 1506
a0d1931c
Y
1507static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1508 int y = cs->y;
115329f1 1509
a0d1931c
Y
1510 DWTELEM *b0= cs->b0;
1511 DWTELEM *b1= cs->b1;
1512 DWTELEM *b2= cs->b2;
1513 DWTELEM *b3= cs->b3;
3b6ab26c
MN
1514 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1515 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
115329f1 1516
a0d1931c 1517{START_TIMER
565a45ac
MN
1518 if(y>0 && y+4<height){
1519 vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1520 }else{
13705b69
MN
1521 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1522 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1523 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1524 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
565a45ac 1525 }
a0d1931c
Y
1526if(width>400){
1527STOP_TIMER("vertical_compose97i")}}
a0d1931c
Y
1528
1529{START_TIMER
13705b69
MN
1530 if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1531 if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
3b6ab26c 1532if(width>400 && y+0<(unsigned)height){
a0d1931c
Y
1533STOP_TIMER("horizontal_compose97i")}}
1534
1535 cs->b0=b2;
1536 cs->b1=b3;
1537 cs->b2=b4;
1538 cs->b3=b5;
1539 cs->y += 2;
1540}
1541
f9e6ebf7
LM
1542static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1543 int y = cs->y;
1544 DWTELEM *b0= cs->b0;
1545 DWTELEM *b1= cs->b1;
1546 DWTELEM *b2= cs->b2;
1547 DWTELEM *b3= cs->b3;
1548 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1549 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83 1550
791e7b83 1551{START_TIMER
13705b69
MN
1552 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1553 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1554 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1555 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
791e7b83
MN
1556if(width>400){
1557STOP_TIMER("vertical_compose97i")}}
1558
1559{START_TIMER
13705b69
MN
1560 if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1561 if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
791e7b83
MN
1562if(width>400 && b0 <= b2){
1563STOP_TIMER("horizontal_compose97i")}}
f9e6ebf7
LM
1564
1565 cs->b0=b2;
1566 cs->b1=b3;
1567 cs->b2=b4;
1568 cs->b3=b5;
1569 cs->y += 2;
1570}
1571
1572static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1573 dwt_compose_t cs;
1574 spatial_compose97i_init(&cs, buffer, height, stride);
1575 while(cs.y <= height)
1576 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1577}
1578
a0d1931c
Y
1579void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1580 int level;
1581 for(level=decomposition_count-1; level>=0; level--){
1582 switch(type){
1583 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1584 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1585 /* not slicified yet */
1586 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1587 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1588 }
1589 }
1590}
1591
f9e6ebf7
LM
1592void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1593 int level;
1594 for(level=decomposition_count-1; level>=0; level--){
1595 switch(type){
1596 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1597 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1598 /* not slicified yet */
1599 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1600 }
791e7b83
MN
1601 }
1602}
1603
f9e6ebf7
LM
1604void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1605 const int support = type==1 ? 3 : 5;
791e7b83 1606 int level;
f9e6ebf7 1607 if(type==2) return;
791e7b83 1608
46c281e8 1609 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1610 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1611 switch(type){
1612 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1613 break;
1614 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1615 break;
1616 case 2: break;
1617 }
791e7b83
MN
1618 }
1619 }
1620}
1621
a0d1931c
Y
1622void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1623 const int support = type==1 ? 3 : 5;
1624 int level;
1625 if(type==2) return;
1626
1627 for(level=decomposition_count-1; level>=0; level--){
1628 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1629 switch(type){
1630 case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1631 break;
1632 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1633 break;
1634 case 2: break;
1635 }
1636 }
1637 }
1638}
1639
f9e6ebf7
LM
1640void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1641 if(type==2){
1642 int level;
1643 for(level=decomposition_count-1; level>=0; level--)
1644 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1645 }else{
1646 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1647 int y;
1648 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1649 for(y=0; y<height; y+=4)
1650 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1651 }
1652}
1653
0ecca7a4 1654static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1655 const int w= b->width;
1656 const int h= b->height;
1657 int x, y;
1658
791e7b83 1659 if(1){
791e7b83 1660 int run=0;
a8d73e56 1661 int runs[w*h];
791e7b83 1662 int run_index=0;
b44985ba 1663 int max_index;
115329f1 1664
791e7b83
MN
1665 for(y=0; y<h; y++){
1666 for(x=0; x<w; x++){
78486403 1667 int v, p=0;
6b2f6646 1668 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1669 v= src[x + y*stride];
791e7b83
MN
1670
1671 if(y){
a8d73e56 1672 t= src[x + (y-1)*stride];
791e7b83 1673 if(x){
a8d73e56 1674 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1675 }
1676 if(x + 1 < w){
a8d73e56 1677 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1678 }
1679 }
1680 if(x){
a8d73e56 1681 l= src[x - 1 + y*stride];
6b2f6646
MN
1682 /*if(x > 1){
1683 if(orientation==1) ll= src[y + (x-2)*stride];
1684 else ll= src[x - 2 + y*stride];
791e7b83
MN
1685 }*/
1686 }
78486403 1687 if(parent){
a8d73e56
MN
1688 int px= x>>1;
1689 int py= y>>1;
115329f1 1690 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1691 p= parent[px + py*2*stride];
1692 }
1693 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1694 if(v){
1695 runs[run_index++]= run;
1696 run=0;
1697 }else{
1698 run++;
1699 }
1700 }
1701 }
1702 }
b44985ba 1703 max_index= run_index;
791e7b83
MN
1704 runs[run_index++]= run;
1705 run_index=0;
1706 run= runs[run_index++];
1707
b44985ba
MN
1708 put_symbol2(&s->c, b->state[30], max_index, 0);
1709 if(run_index <= max_index)
1710 put_symbol2(&s->c, b->state[1], run, 3);
115329f1 1711
791e7b83 1712 for(y=0; y<h; y++){
d06c75a8 1713 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1714 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1715 return -1;
1716 }
791e7b83 1717 for(x=0; x<w; x++){
78486403 1718 int v, p=0;
6b2f6646 1719 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1720 v= src[x + y*stride];
791e7b83
MN
1721
1722 if(y){
a8d73e56 1723 t= src[x + (y-1)*stride];
791e7b83 1724 if(x){
a8d73e56 1725 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1726 }
1727 if(x + 1 < w){
a8d73e56 1728 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1729 }
1730 }
1731 if(x){
a8d73e56 1732 l= src[x - 1 + y*stride];
6b2f6646
MN
1733 /*if(x > 1){
1734 if(orientation==1) ll= src[y + (x-2)*stride];
1735 else ll= src[x - 2 + y*stride];
791e7b83
MN
1736 }*/
1737 }
78486403 1738 if(parent){
a8d73e56
MN
1739 int px= x>>1;
1740 int py= y>>1;
115329f1 1741 if(px<b->parent->width && py<b->parent->height)
78486403
MN
1742 p= parent[px + py*2*stride];
1743 }
1744 if(/*ll|*/l|lt|t|rt|p){
1745 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1746
28869757 1747 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1748 }else{
1749 if(!run){
1750 run= runs[run_index++];
4f4e9633 1751
b44985ba
MN
1752 if(run_index <= max_index)
1753 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1754 assert(v);
1755 }else{
1756 run--;
1757 assert(!v);
1758 }
1759 }
1760 if(v){
78486403 1761 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
538a3841
MN
1762 int l2= 2*ABS(l) + (l<0);
1763 int t2= 2*ABS(t) + (t<0);
6b2f6646 1764
0635cbfc 1765 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
538a3841 1766 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
791e7b83
MN
1767 }
1768 }
1769 }
791e7b83 1770 }
0ecca7a4 1771 return 0;
791e7b83
MN
1772}
1773
115329f1 1774static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1775// encode_subband_qtree(s, b, src, parent, stride, orientation);
1776// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1777 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1778// encode_subband_dzr(s, b, src, parent, stride, orientation);
1779}
1780
a0d1931c 1781static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
791e7b83
MN
1782 const int w= b->width;
1783 const int h= b->height;
1784 int x,y;
115329f1 1785
791e7b83 1786 if(1){
b44985ba 1787 int run, runs;
cbb1d2b1
MN
1788 x_and_coeff *xc= b->x_coeff;
1789 x_and_coeff *prev_xc= NULL;
1790 x_and_coeff *prev2_xc= xc;
1791 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1792 x_and_coeff *prev_parent_xc= parent_xc;
791e7b83 1793
b44985ba
MN
1794 runs= get_symbol2(&s->c, b->state[30], 0);
1795 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1796 else run= INT_MAX;
1797
791e7b83 1798 for(y=0; y<h; y++){
0cea8a03
MN
1799 int v=0;
1800 int lt=0, t=0, rt=0;
1801
cbb1d2b1
MN
1802 if(y && prev_xc->x == 0){
1803 rt= prev_xc->coeff;
0cea8a03 1804 }
791e7b83 1805 for(x=0; x<w; x++){
0cea8a03
MN
1806 int p=0;
1807 const int l= v;
115329f1 1808
0cea8a03 1809 lt= t; t= rt;
791e7b83 1810
ff765159 1811 if(y){
cbb1d2b1
MN
1812 if(prev_xc->x <= x)
1813 prev_xc++;
1814 if(prev_xc->x == x + 1)
1815 rt= prev_xc->coeff;
ff765159
MN
1816 else
1817 rt=0;
1818 }
cbb1d2b1
MN
1819 if(parent_xc){
1820 if(x>>1 > parent_xc->x){
1821 parent_xc++;
7b49c309 1822 }
cbb1d2b1
MN
1823 if(x>>1 == parent_xc->x){
1824 p= parent_xc->coeff;
ff765159 1825 }
78486403
MN
1826 }
1827 if(/*ll|*/l|lt|t|rt|p){
538a3841 1828 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
6b2f6646 1829
28869757 1830 v=get_rac(&s->c, &b->state[0][context]);
3c096ac7
MN
1831 if(v){
1832 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1833 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
115329f1 1834
cbb1d2b1
MN
1835 xc->x=x;
1836 (xc++)->coeff= v;
3c096ac7 1837 }
791e7b83
MN
1838 }else{
1839 if(!run){
b44985ba
MN
1840 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1841 else run= INT_MAX;
3c096ac7
MN
1842 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1843 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
115329f1 1844
cbb1d2b1
MN
1845 xc->x=x;
1846 (xc++)->coeff= v;
791e7b83 1847 }else{
99cd59e5 1848 int max_run;
791e7b83
MN
1849 run--;
1850 v=0;
3c1adccd 1851
cbb1d2b1 1852 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
99cd59e5 1853 else max_run= FFMIN(run, w-x-1);
cbb1d2b1
MN
1854 if(parent_xc)
1855 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
99cd59e5
MN
1856 x+= max_run;
1857 run-= max_run;
791e7b83
MN
1858 }
1859 }
7b49c309 1860 }
cbb1d2b1
MN
1861 (xc++)->x= w+1; //end marker
1862 prev_xc= prev2_xc;
1863 prev2_xc= xc;
115329f1 1864
cbb1d2b1 1865 if(parent_xc){
7b49c309 1866 if(y&1){
cbb1d2b1
MN
1867 while(parent_xc->x != parent->width+1)
1868 parent_xc++;
1869 parent_xc++;
1870 prev_parent_xc= parent_xc;
7b49c309 1871 }else{
cbb1d2b1 1872 parent_xc= prev_parent_xc;
791e7b83
MN
1873 }
1874 }
1875 }
a0d1931c 1876
cbb1d2b1 1877 (xc++)->x= w+1; //end marker
a0d1931c
Y
1878 }
1879}
1880
1881static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1882 const int w= b->width;
62ab0b78 1883 int y;
c97de57c
MN
1884 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1885 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
1886 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1887 int new_index = 0;
115329f1 1888
a0d1931c
Y
1889 START_TIMER
1890
1891 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1892 qadd= 0;
1893 qmul= 1<<QEXPSHIFT;
1894 }
1895
1896 /* If we are on the second or later slice, restore our index. */
1897 if (start_y != 0)
1898 new_index = save_state[0];
1899
115329f1 1900
a0d1931c
Y
1901 for(y=start_y; y<h; y++){
1902 int x = 0;
1903 int v;
1904 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1905 memset(line, 0, b->width*sizeof(DWTELEM));
1906 v = b->x_coeff[new_index].coeff;
1907 x = b->x_coeff[new_index++].x;
1908 while(x < w)
1909 {
538a3841
MN
1910 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1911 register int u= -(v&1);
1912 line[x] = (t^u) - u;
1913
a0d1931c
Y
1914 v = b->x_coeff[new_index].coeff;
1915 x = b->x_coeff[new_index++].x;
1916 }
791e7b83 1917 }
a0d1931c
Y
1918 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1919 STOP_TIMER("decode_subband")
1920 }
115329f1 1921
a0d1931c
Y
1922 /* Save our variables for the next slice. */
1923 save_state[0] = new_index;
115329f1 1924
a0d1931c 1925 return;
791e7b83
MN
1926}
1927
1928static void reset_contexts(SnowContext *s){
1929 int plane_index, level, orientation;
1930
19aa028d 1931 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1932 for(level=0; level<s->spatial_decomposition_count; level++){
1933 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1934 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1935 }
1936 }
1937 }
28869757
MN
1938 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1939 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1940}
1941
1942static int alloc_blocks(SnowContext *s){
1943 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1944 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1 1945
155ec6ed
MN
1946 s->b_width = w;
1947 s->b_height= h;
115329f1 1948
155ec6ed
MN
1949 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1950 return 0;
1951}
1952
28869757
MN
1953static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1954 uint8_t *bytestream= d->bytestream;
1955 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1956 *d= *s;
28869757
MN
1957 d->bytestream= bytestream;
1958 d->bytestream_start= bytestream_start;
155ec6ed
MN
1959}
1960
1961//near copy & paste from dsputil, FIXME
1962static int pix_sum(uint8_t * pix, int line_size, int w)
1963{
1964 int s, i, j;
1965
1966 s = 0;
1967 for (i = 0; i < w; i++) {
1968 for (j = 0; j < w; j++) {
1969 s += pix[0];
1970 pix ++;
1971 }
1972 pix += line_size - w;
1973 }
1974 return s;
1975}
1976
1977//near copy & paste from dsputil, FIXME
1978static int pix_norm1(uint8_t * pix, int line_size, int w)
1979{
1980 int s, i, j;
1981 uint32_t *sq = squareTbl + 256;
1982
1983 s = 0;
1984 for (i = 0; i < w; i++) {
1985 for (j = 0; j < w; j ++) {
1986 s += sq[pix[0]];
1987 pix ++;
1988 }
1989 pix += line_size - w;
1990 }
1991 return s;
1992}
1993
1994static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1995 const int w= s->b_width << s->block_max_depth;
1996 const int rem_depth= s->block_max_depth - level;
1997 const int index= (x + y*w) << rem_depth;
1998 const int block_w= 1<<rem_depth;
1999 BlockNode block;
2000 int i,j;
115329f1 2001
155ec6ed
MN
2002 block.color[0]= l;
2003 block.color[1]= cb;
2004 block.color[2]= cr;
2005 block.mx= mx;
2006 block.my= my;
2007 block.type= type;
2008 block.level= level;
2009
2010 for(j=0; j<block_w; j++){
2011 for(i=0; i<block_w; i++){
2012 s->block[index + i + j*w]= block;
2013 }
2014 }
2015}
2016
2017static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2018 const int offset[3]= {
2019 y*c-> stride + x,
2020 ((y*c->uvstride + x)>>1),
2021 ((y*c->uvstride + x)>>1),
2022 };
2023 int i;
2024 for(i=0; i<3; i++){
2025 c->src[0][i]= src [i];
2026 c->ref[0][i]= ref [i] + offset[i];
2027 }
2028 assert(!ref_index);
2029}
2030
2031//FIXME copy&paste
2032#define P_LEFT P[1]
2033#define P_TOP P[2]
2034#define P_TOPRIGHT P[3]
2035#define P_MEDIAN P[4]
2036#define P_MV1 P[9]
2037#define FLAG_QPEL 1 //must be 1
2038
2039static int encode_q_branch(SnowContext *s, int level, int x, int y){
2040 uint8_t p_buffer[1024];
2041 uint8_t i_buffer[1024];
2042 uint8_t p_state[sizeof(s->block_state)];
2043 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
2044 RangeCoder pc, ic;
2045 uint8_t *pbbak= s->c.bytestream;
2046 uint8_t *pbbak_start= s->c.bytestream_start;
155ec6ed
MN
2047 int score, score2, iscore, i_len, p_len, block_s, sum;
2048 const int w= s->b_width << s->block_max_depth;
2049 const int h= s->b_height << s->block_max_depth;
2050 const int rem_depth= s->block_max_depth - level;
2051 const int index= (x + y*w) << rem_depth;
2052 const int block_w= 1<<(LOG2_MB_SIZE - level);
155ec6ed
MN
2053 int trx= (x+1)<<rem_depth;
2054 int try= (y+1)<<rem_depth;
2055 BlockNode *left = x ? &s->block[index-1] : &null_block;
2056 BlockNode *top = y ? &s->block[index-w] : &null_block;
2057 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2058 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2059 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2060 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2061 int pl = left->color[0];
2062 int pcb= left->color[1];
2063 int pcr= left->color[2];
2064 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2065 int pmy= mid_pred(left->my, top->my, tr->my);
2066 int mx=0, my=0;
51d6a3cf 2067 int l,cr,cb;
155ec6ed
MN
2068 const int stride= s->current_picture.linesize[0];
2069 const int uvstride= s->current_picture.linesize[1];
51d6a3cf
MN
2070 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2071 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2072 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
155ec6ed
MN
2073 int P[10][2];
2074 int16_t last_mv[3][2];
2075 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2076 const int shift= 1+qpel;
2077 MotionEstContext *c= &s->m.me;
2078 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2079 int my_context= av_log2(2*ABS(left->my - top->my));
2080 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2081
2082 assert(sizeof(s->block_state) >= 256);
2083 if(s->keyframe){
2084 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2085 return 0;
2086 }
2087
155ec6ed
MN
2088// clip predictors / edge ?
2089
2090 P_LEFT[0]= left->mx;
2091 P_LEFT[1]= left->my;
2092 P_TOP [0]= top->mx;
2093 P_TOP [1]= top->my;
2094 P_TOPRIGHT[0]= tr->mx;
2095 P_TOPRIGHT[1]= tr->my;
115329f1 2096
155ec6ed
MN
2097 last_mv[0][0]= s->block[index].mx;
2098 last_mv[0][1]= s->block[index].my;
2099 last_mv[1][0]= right->mx;
2100 last_mv[1][1]= right->my;
2101 last_mv[2][0]= bottom->mx;
2102 last_mv[2][1]= bottom->my;
115329f1 2103
155ec6ed 2104 s->m.mb_stride=2;
115329f1 2105 s->m.mb_x=
155ec6ed
MN
2106 s->m.mb_y= 0;
2107 s->m.me.skip= 0;
2108
2109 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
115329f1 2110
155ec6ed
MN
2111 assert(s->m.me. stride == stride);
2112 assert(s->m.me.uvstride == uvstride);
115329f1 2113
155ec6ed
MN
2114 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2115 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2116 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2117 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
115329f1 2118
ff158dc9
MN
2119 c->xmin = - x*block_w - 16+2;
2120 c->ymin = - y*block_w - 16+2;
2121 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2122 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
2123
2124 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
115329f1 2125 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
155ec6ed
MN
2126 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2127 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2128 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2129 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2130 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2131
2132 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2133 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2134
2135 if (!y) {
2136 c->pred_x= P_LEFT[0];
2137 c->pred_y= P_LEFT[1];
2138 } else {
2139 c->pred_x = P_MEDIAN[0];
2140 c->pred_y = P_MEDIAN[1];
2141 }
2142
115329f1 2143 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
155ec6ed
MN
2144 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2145
2146 assert(mx >= c->xmin);
2147 assert(mx <= c->xmax);
2148 assert(my >= c->ymin);
2149 assert(my <= c->ymax);
115329f1 2150
155ec6ed
MN
2151 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2152 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2153 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
115329f1 2154
155ec6ed
MN
2155 // subpel search
2156 pc= s->c;
28869757
MN
2157 pc.bytestream_start=
2158 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2159 memcpy(p_state, s->block_state, sizeof(s->block_state));
2160
2161 if(level!=s->block_max_depth)
28869757
MN
2162 put_rac(&pc, &p_state[4 + s_context], 1);
2163 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
155ec6ed
MN
2164 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2165 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
28869757
MN
2166 p_len= pc.bytestream - pc.bytestream_start;
2167 score += (s->lambda2*(p_len*8
2168 + (pc.outstanding_count - s->c.outstanding_count)*8
2169 + (-av_log2(pc.range) + av_log2(s->c.range))
2170 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2171
2172 block_s= block_w*block_w;
51d6a3cf 2173 sum = pix_sum(current_data[0], stride, block_w);
155ec6ed 2174 l= (sum + block_s/2)/block_s;
51d6a3cf 2175 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
115329f1 2176
155ec6ed 2177 block_s= block_w*block_w>>2;
51d6a3cf 2178 sum = pix_sum(current_data[1], uvstride, block_w>>1);
155ec6ed
MN
2179 cb= (sum + block_s/2)/block_s;
2180// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
51d6a3cf 2181 sum = pix_sum(current_data[2], uvstride, block_w>>1);
155ec6ed
MN
2182 cr= (sum + block_s/2)/block_s;
2183// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2184
2185 ic= s->c;
28869757
MN
2186 ic.bytestream_start=
2187 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
2188 memcpy(i_state, s->block_state, sizeof(s->block_state));
2189 if(level!=s->block_max_depth)
28869757
MN
2190 put_rac(&ic, &i_state[4 + s_context], 1);
2191 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
2192 put_symbol(&ic, &i_state[32], l-pl , 1);
2193 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2194 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757
MN
2195 i_len= ic.bytestream - ic.bytestream_start;
2196 iscore += (s->lambda2*(i_len*8
2197 + (ic.outstanding_count - s->c.outstanding_count)*8
2198 + (-av_log2(ic.range) + av_log2(s->c.range))
2199 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
2200
2201// assert(score==256*256*256*64-1);
2202 assert(iscore < 255*255*256 + s->lambda2*10);
2203 assert(iscore >= 0);
2204 assert(l>=0 && l<=255);
2205 assert(pl>=0 && pl<=255);
2206
2207 if(level==0){
2208 int varc= iscore >> 8;
2209 int vard= score >> 8;
2210 if (vard <= 64 || vard < varc)
2211 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2212 else
2213 c->scene_change_score+= s->m.qscale;
2214 }
115329f1 2215
155ec6ed 2216 if(level!=s->block_max_depth){
28869757 2217 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
2218 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2219 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2220 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2221 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2222 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
115329f1 2223
155ec6ed
MN
2224 if(score2 < score && score2 < iscore)
2225 return score2;
2226 }
115329f1 2227
155ec6ed 2228 if(iscore < score){
28869757 2229 memcpy(pbbak, i_buffer, i_len);
155ec6ed 2230 s->c= ic;
28869757
MN
2231 s->c.bytestream_start= pbbak_start;
2232 s->c.bytestream= pbbak + i_len;
155ec6ed
MN
2233 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2234 memcpy(s->block_state, i_state, sizeof(s->block_state));
2235 return iscore;
2236 }else{
28869757 2237 memcpy(pbbak, p_buffer, p_len);
155ec6ed 2238 s->c= pc;
28869757
MN
2239 s->c.bytestream_start= pbbak_start;
2240 s->c.bytestream= pbbak + p_len;
155ec6ed
MN
2241 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2242 memcpy(s->block_state, p_state, sizeof(s->block_state));
2243 return score;
2244 }
2245}
2246
51d6a3cf
MN
2247static always_inline int same_block(BlockNode *a, BlockNode *b){
2248 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2249 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2250 }else{
2251 return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2252 }
2253}
2254
2255static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2256 const int w= s->b_width << s->block_max_depth;
2257 const int rem_depth= s->block_max_depth - level;
2258 const int index= (x + y*w) << rem_depth;
2259 int trx= (x+1)<<rem_depth;
2260 BlockNode *b= &s->block[index];
2261 BlockNode *left = x ? &s->block[index-1] : &null_block;
2262 BlockNode *top = y ? &s->block[index-w] : &null_block;
2263 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2264 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2265 int pl = left->color[0];
2266 int pcb= left->color[1];
2267 int pcr= left->color[2];
2268 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2269 int pmy= mid_pred(left->my, top->my, tr->my);
2270 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2271 int my_context= av_log2(2*ABS(left->my - top->my));
2272 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2273
2274 if(s->keyframe){
2275 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2276 return;
2277 }
2278
2279 if(level!=s->block_max_depth){
2280 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
d6f41eed
MN
2281 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2282 }else{
51d6a3cf
MN
2283 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2284 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2285 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2286 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2287 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2288 return;
51d6a3cf
MN
2289 }
2290 }
2291 if(b->type & BLOCK_INTRA){
2292 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2293 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2294 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2295 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2296 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2297 }else{
2298 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2299 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2300 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2301 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2302 }
2303}
2304
155ec6ed
MN
2305static void decode_q_branch(SnowContext *s, int level, int x, int y){
2306 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
2307 const int rem_depth= s->block_max_depth - level;
2308 const int index= (x + y*w) << rem_depth;
155ec6ed 2309 int trx= (x+1)<<rem_depth;
155ec6ed
MN
2310 BlockNode *left = x ? &s->block[index-1] : &null_block;
2311 BlockNode *top = y ? &s->block[index-w] : &null_block;
2312 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2313 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2314 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
115329f1 2315
155ec6ed
MN
2316 if(s->keyframe){
2317 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2318 return;
2319 }
2320
28869757 2321 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
155ec6ed
MN
2322 int type;
2323 int l = left->color[0];
2324 int cb= left->color[1];
2325 int cr= left->color[2];
2326 int mx= mid_pred(left->mx, top->mx, tr->mx);
2327 int my= mid_pred(left->my, top->my, tr->my);
2328 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2329 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
115329f1 2330
28869757 2331 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
2332
2333 if(type){
2334 l += get_symbol(&s->c, &s->block_state[32], 1);
2335 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2336 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2337 }else{
2338 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2339 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2340 }
2341 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2342 }else{
2343 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2344 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2345 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2346 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2347 }
2348}
2349
2350static void encode_blocks(SnowContext *s){
2351 int x, y;
2352 int w= s->b_width;
2353 int h= s->b_height;
2354
51d6a3cf
MN
2355 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2356 iterative_me(s);
2357
155ec6ed 2358 for(y=0; y<h; y++){
d06c75a8 2359 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
2360 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2361 return;
2362 }
155ec6ed 2363 for(x=0; x<w; x++){
51d6a3cf
MN
2364 if(s->avctx->me_method == ME_ITER)
2365 encode_q_branch2(s, 0, x, y);
2366 else
2367 encode_q_branch (s, 0, x, y);
155ec6ed
MN
2368 }
2369 }
2370}
2371
2372static void decode_blocks(SnowContext *s){
2373 int x, y;
2374 int w= s->b_width;
2375 int h= s->b_height;
2376
2377 for(y=0; y<h; y++){
2378 for(x=0; x<w; x++){
2379 decode_q_branch(s, 0, x, y);
2380 }
2381 }
791e7b83
MN
2382}
2383
2384static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2385 int x, y;
3924dac4 2386START_TIMER
791e7b83
MN
2387 for(y=0; y < b_h+5; y++){
2388 for(x=0; x < b_w; x++){
3924dac4
MN
2389 int a0= src[x ];
2390 int a1= src[x + 1];
2391 int a2= src[x + 2];
2392 int a3= src[x + 3];
2393 int a4= src[x + 4];
2394 int a5= src[x + 5];
791e7b83
MN
2395// int am= 9*(a1+a2) - (a0+a3);
2396 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2397// int am= 18*(a2+a3) - 2*(a1+a4);
2398// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2399// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2400
2401// if(b_w==16) am= 8*(a1+a2);
2402
8c2515bb
Y
2403 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2404 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
115329f1 2405
8c2515bb
Y
2406 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2407 if(am&(~255)) am= ~(am>>31);
115329f1 2408
8c2515bb 2409 tmp[x] = am;
791e7b83
MN
2410
2411/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2412 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2413 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2414 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2415 }
3924dac4
MN
2416 tmp += stride;
2417 src += stride;
791e7b83 2418 }
3924dac4 2419 tmp -= (b_h+5)*stride;
115329f1 2420
791e7b83
MN
2421 for(y=0; y < b_h; y++){
2422 for(x=0; x < b_w; x++){
3924dac4
MN
2423 int a0= tmp[x + 0*stride];
2424 int a1= tmp[x + 1*stride];
2425 int a2= tmp[x + 2*stride];
2426 int a3= tmp[x + 3*stride];
2427 int a4= tmp[x + 4*stride];
2428 int a5= tmp[x + 5*stride];
791e7b83
MN
2429 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2430// int am= 18*(a2+a3) - 2*(a1+a4);
2431/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2432 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
115329f1 2433
791e7b83
MN
2434// if(b_w==16) am= 8*(a1+a2);
2435
8c2515bb
Y
2436 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2437 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83 2438
8c2515bb 2439 if(am&(~255)) am= ~(am>>31);
115329f1 2440
8c2515bb 2441 dst[x] = am;
791e7b83
MN
2442/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2443 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2444 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2445 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2446 }
3924dac4
MN
2447 dst += stride;
2448 tmp += stride;
791e7b83 2449 }
3924dac4 2450STOP_TIMER("mc_block")
791e7b83
MN
2451}
2452
791e7b83 2453#define mca(dx,dy,b_w)\
d92b5807 2454static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
791e7b83
MN
2455 uint8_t tmp[stride*(b_w+5)];\
2456 assert(h==b_w);\
2457 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2458}
2459
2460mca( 0, 0,16)
2461mca( 8, 0,16)
2462mca( 0, 8,16)
2463mca( 8, 8,16)
d92b5807
MN
2464mca( 0, 0,8)
2465mca( 8, 0,8)
2466mca( 0, 8,8)
2467mca( 8, 8,8)
791e7b83 2468
ff158dc9 2469static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf 2470 if(block->type & BLOCK_INTRA){
ff158dc9 2471 int x, y;
2692ceab
MN
2472 const int color = block->color[plane_index];
2473 const int color4= color*0x01010101;
1015631b
LM
2474 if(b_w==32){
2475 for(y=0; y < b_h; y++){
2476 *(uint32_t*)&dst[0 + y*stride]= color4;
2477 *(uint32_t*)&dst[4 + y*stride]= color4;
2478 *(uint32_t*)&dst[8 + y*stride]= color4;
2479 *(uint32_t*)&dst[12+ y*stride]= color4;
2480 *(uint32_t*)&dst[16+ y*stride]= color4;
2481 *(uint32_t*)&dst[20+ y*stride]= color4;
2482 *(uint32_t*)&dst[24+ y*stride]= color4;
2483 *(uint32_t*)&dst[28+ y*stride]= color4;
2484 }
2485 }else if(b_w==16){
2692ceab
MN
2486 for(y=0; y < b_h; y++){
2487 *(uint32_t*)&dst[0 + y*stride]= color4;
2488 *(uint32_t*)&dst[4 + y*stride]= color4;
2489 *(uint32_t*)&dst[8 + y*stride]= color4;
2490 *(uint32_t*)&dst[12+ y*stride]= color4;
2491 }
2492 }else if(b_w==8){
2493 for(y=0; y < b_h; y++){
2494 *(uint32_t*)&dst[0 + y*stride]= color4;
2495 *(uint32_t*)&dst[4 + y*stride]= color4;
2496 }
2497 }else if(b_w==4){
2498 for(y=0; y < b_h; y++){
2499 *(uint32_t*)&dst[0 + y*stride]= color4;
2500 }
2501 }else{
2502 for(y=0; y < b_h; y++){
2503 for(x=0; x < b_w; x++){
2504 dst[x + y*stride]= color;
2505 }
ff158dc9
MN
2506 }
2507 }
2508 }else{
2509 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2510 int mx= block->mx*scale;
2511 int my= block->my*scale;
ec697587
MN
2512 const int dx= mx&15;
2513 const int dy= my&15;
80e44bc3 2514 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
ff158dc9
MN
2515 sx += (mx>>4) - 2;
2516 sy += (my>>4) - 2;
2517 src += sx + sy*stride;
2518 if( (unsigned)sx >= w - b_w - 4
2519 || (unsigned)sy >= h - b_h - 4){
2520 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2521 src= tmp + MB_SIZE;
2522 }
2692ceab
MN
2523 assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2524 assert(!(b_w&(b_w-1)));
2525 assert(b_w>1 && b_h>1);
1015631b 2526 assert(tab_index>=0 && tab_index<4 || b_w==32);
80e44bc3 2527 if((dx&3) || (dy&3))
ec697587 2528 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
1015631b
LM
2529 else if(b_w==32){
2530 int y;
2531 for(y=0; y<b_h; y+=16){
2532 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2533 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2534 }
2535 }else if(b_w==b_h)
80e44bc3 2536 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2692ceab 2537 else if(b_w==2*b_h){
80e44bc3
MN
2538 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2539 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2692ceab
MN
2540 }else{
2541 assert(2*b_w==b_h);
80e44bc3
MN
2542 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2543 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2692ceab 2544 }
ff158dc9
MN
2545 }
2546}
2547
ff158dc9 2548//FIXME name clenup (b_w, block_w, b_width stuff)
a0d1931c
Y
2549static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2550 DWTELEM * dst = NULL;
2551 const int b_width = s->b_width << s->block_max_depth;
2552 const int b_height= s->b_height << s->block_max_depth;
2553 const int b_stride= b_width;
2554 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2555 BlockNode *rt= lt+1;
2556 BlockNode *lb= lt+b_stride;
2557 BlockNode *rb= lb+1;
115329f1 2558 uint8_t *block[4];
cc884a35
MN
2559 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2560 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2561 uint8_t *ptmp;
a0d1931c
Y
2562 int x,y;
2563
2564 if(b_x<0){
2565 lt= rt;
2566 lb= rb;
2567 }else if(b_x + 1 >= b_width){
2568 rt= lt;
2569 rb= lb;
2570 }
2571 if(b_y<0){
2572 lt= lb;
2573 rt= rb;
2574 }else if(b_y + 1 >= b_height){
2575 lb= lt;
2576 rb= rt;
2577 }
115329f1 2578
a0d1931c
Y
2579 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2580 obmc -= src_x;
2581 b_w += src_x;
2582 src_x=0;
2583 }else if(src_x + b_w > w){
2584 b_w = w - src_x;
2585 }
2586 if(src_y<0){
2587 obmc -= src_y*obmc_stride;
2588 b_h += src_y;
2589 src_y=0;
2590 }else if(src_y + b_h> h){
2591 b_h = h - src_y;
2592 }
115329f1 2593
a0d1931c
Y
2594 if(b_w<=0 || b_h<=0) return;
2595
cc884a35 2596assert(src_stride > 2*MB_SIZE + 5);
a0d1931c
Y
2597// old_dst += src_x + src_y*dst_stride;
2598 dst8+= src_x + src_y*src_stride;
2599// src += src_x + src_y*src_stride;
2600
cc884a35
MN
2601 ptmp= tmp + 3*tmp_step;
2602 block[0]= ptmp;
2603 ptmp+=tmp_step;
115329f1 2604 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
a0d1931c
Y
2605
2606 if(same_block(lt, rt)){
2607 block[1]= block[0];
2608 }else{
cc884a35
MN
2609 block[1]= ptmp;
2610 ptmp+=tmp_step;
a0d1931c
Y
2611 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2612 }
115329f1 2613
a0d1931c
Y
2614 if(same_block(lt, lb)){
2615 block[2]= block[0];
2616 }else if(same_block(rt, lb)){
2617 block[2]= block[1];
2618 }else{
cc884a35
MN
2619 block[2]= ptmp;
2620 ptmp+=tmp_step;
a0d1931c
Y
2621 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2622 }
2623
2624 if(same_block(lt, rb) ){
2625 block[3]= block[0];
2626 }else if(same_block(rt, rb)){
2627 block[3]= block[1];
2628 }else if(same_block(lb, rb)){
2629 block[3]= block[2];
2630 }else{
cc884a35 2631 block[3]= ptmp;
a0d1931c
Y
2632 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2633 }
2634#if 0
2635 for(y=0; y<b_h; y++){
2636 for(x=0; x<b_w; x++){
2637 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2638 if(add) dst[x + y*dst_stride] += v;
2639 else dst[x + y*dst_stride] -= v;
2640 }
2641 }
2642 for(y=0; y<b_h; y++){
2643 uint8_t *obmc2= obmc + (obmc_stride>>1);
2644 for(x=0; x<b_w; x++){
2645 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2646 if(add) dst[x + y*dst_stride] += v;
2647 else dst[x + y*dst_stride] -= v;
2648 }
2649 }
2650 for(y=0; y<b_h; y++){
2651 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2652 for(x=0; x<b_w; x++){
2653 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2654 if(add) dst[x + y*dst_stride] += v;
2655 else dst[x + y*dst_stride] -= v;
2656 }
2657 }
2658 for(y=0; y<b_h; y++){
2659 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2660 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2661 for(x=0; x<b_w; x++){
2662 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2663 if(add) dst[x + y*dst_stride] += v;
2664 else dst[x + y*dst_stride] -= v;
2665 }
2666 }
2667#else
2668{
2669
2670 START_TIMER
115329f1 2671
a0d1931c
Y
2672 for(y=0; y<b_h; y++){
2673 //FIXME ugly missue of obmc_stride
2674 uint8_t *obmc1= obmc + y*obmc_stride;
2675 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2676 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2677 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2678 dst = slice_buffer_get_line(sb, src_y + y);
2679 for(x=0; x<b_w; x++){
2680 int v= obmc1[x] * block[3][x + y*src_stride]
2681 +obmc2[x] * block[2][x + y*src_stride]
2682 +obmc3[x] * block[1][x + y*src_stride]
2683 +obmc4[x] * block[0][x + y*src_stride];
2684
2685 v <<= 8 - LOG2_OBMC_MAX;
2686 if(FRAC_BITS != 8){
2687 v += 1<<(7 - FRAC_BITS);
2688 v >>= 8 - FRAC_BITS;
2689 }
2690 if(add){
2691// v += old_dst[x + y*dst_stride];
2692 v += dst[x + src_x];
2693 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2694 if(v&(~255)) v= ~(v>>31);
2695 dst8[x + y*src_stride] = v;
2696 }else{
2697// old_dst[x + y*dst_stride] -= v;
2698 dst[x + src_x] -= v;
2699 }
2700 }
2701 }
2702 STOP_TIMER("Inner add y block")
2703}
2704#endif
2705}
2706
2707//FIXME name clenup (b_w, block_w, b_width stuff)
1015631b 2708static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
ff158dc9
MN
2709 const int b_width = s->b_width << s->block_max_depth;
2710 const int b_height= s->b_height << s->block_max_depth;
2711 const int b_stride= b_width;
2712 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2713 BlockNode *rt= lt+1;
2714 BlockNode *lb= lt+b_stride;
2715 BlockNode *rb= lb+1;
115329f1 2716 uint8_t *block[4];
cc884a35
MN
2717 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2718 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2719 uint8_t *ptmp;
791e7b83
MN
2720 int x,y;
2721
ff158dc9
MN
2722 if(b_x<0){
2723 lt= rt;
2724 lb= rb;
2725 }else if(b_x + 1 >= b_width){
2726 rt= lt;
2727 rb= lb;
791e7b83 2728 }
ff158dc9
MN
2729 if(b_y<0){
2730 lt= lb;
2731 rt= rb;
2732 }else if(b_y + 1 >= b_height){
2733 lb= lt;
2734 rb= rt;
2735 }
115329f1 2736
ff158dc9
MN
2737 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2738 obmc -= src_x;
2739 b_w += src_x;
1015631b
LM
2740 if(!offset_dst)
2741 dst -= src_x;
ff158dc9
MN
2742 src_x=0;
2743 }else if(src_x + b_w > w){
2744 b_w = w - src_x;
2745 }
2746 if(src_y<0){
2747 obmc -= src_y*obmc_stride;
2748 b_h += src_y;
1015631b
LM
2749 if(!offset_dst)
2750 dst -= src_y*dst_stride;
ff158dc9
MN
2751 src_y=0;
2752 }else if(src_y + b_h> h){
2753 b_h = h - src_y;
791e7b83 2754 }
115329f1 2755
ff158dc9 2756 if(b_w<=0 || b_h<=0) return;
155ec6ed 2757
cc884a35 2758assert(src_stride > 2*MB_SIZE + 5);
1015631b
LM
2759 if(offset_dst)
2760 dst += src_x + src_y*dst_stride;
715a97f0 2761 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2762// src += src_x + src_y*src_stride;
2763
cc884a35
MN
2764 ptmp= tmp + 3*tmp_step;
2765 block[0]= ptmp;
2766 ptmp+=tmp_step;
115329f1 2767 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
ff158dc9
MN
2768
2769 if(same_block(lt, rt)){
2770 block[1]= block[0];
791e7b83 2771 }else{
cc884a35
MN
2772 block[1]= ptmp;
2773 ptmp+=tmp_step;
ff158dc9
MN
2774 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2775 }
115329f1 2776
ff158dc9
MN
2777 if(same_block(lt, lb)){
2778 block[2]= block[0];
2779 }else if(same_block(rt, lb)){
2780 block[2]= block[1];
2781 }else{
cc884a35
MN
2782 block[2]= ptmp;
2783 ptmp+=tmp_step;
ff158dc9
MN
2784 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2785 }
791e7b83 2786
ff158dc9
MN
2787 if(same_block(lt, rb) ){
2788 block[3]= block[0];
2789 }else if(same_block(rt, rb)){
2790 block[3]= block[1];
2791 }else if(same_block(lb, rb)){
2792 block[3]= block[2];
2793 }else{
cc884a35 2794 block[3]= ptmp;
ff158dc9
MN
2795 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2796 }
2797#if 0
2798 for(y=0; y<b_h; y++){
2799 for(x=0; x<b_w; x++){
2800 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2801 if(add) dst[x + y*dst_stride] += v;
2802 else dst[x + y*dst_stride] -= v;
2803 }
2804 }
2805 for(y=0; y<b_h; y++){
2806 uint8_t *obmc2= obmc + (obmc_stride>>1);
2807 for(x=0; x<b_w; x++){
2808 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2809 if(add) dst[x + y*dst_stride] += v;
2810 else dst[x + y*dst_stride] -= v;
2811 }
2812 }
2813 for(y=0; y<b_h; y++){
2814 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2815 for(x=0; x<b_w; x++){
2816 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2817 if(add) dst[x + y*dst_stride] += v;
2818 else dst[x + y*dst_stride] -= v;
2819 }
2820 }
2821 for(y=0; y<b_h; y++){
2822 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2823 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2824 for(x=0; x<b_w; x++){
2825 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2826 if(add) dst[x + y*dst_stride] += v;
2827 else dst[x + y*dst_stride] -= v;
2828 }
2829 }
2830#else
2831 for(y=0; y<b_h; y++){
2832 //FIXME ugly missue of obmc_stride
2833 uint8_t *obmc1= obmc + y*obmc_stride;
2834 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2835 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2836 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2837 for(x=0; x<b_w; x++){
2838 int v= obmc1[x] * block[3][x + y*src_stride]
2839 +obmc2[x] * block[2][x + y*src_stride]
2840 +obmc3[x] * block[1][x + y*src_stride]
2841 +obmc4[x] * block[0][x + y*src_stride];
115329f1 2842
715a97f0 2843 v <<= 8 - LOG2_OBMC_MAX;
034aff03
MN
2844 if(FRAC_BITS != 8){
2845 v += 1<<(7 - FRAC_BITS);
2846 v >>= 8 - FRAC_BITS;
2847 }
715a97f0
MN
2848 if(add){
2849 v += dst[x + y*dst_stride];
2850 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2851 if(v&(~255)) v= ~(v>>31);
2852 dst8[x + y*src_stride] = v;
2853 }else{
2854 dst[x + y*dst_stride] -= v;
2855 }
791e7b83
MN
2856 }
2857 }
ff158dc9 2858#endif
791e7b83
MN
2859}
2860
a0d1931c
Y
2861static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2862 Plane *p= &s->plane[plane_index];
2863 const int mb_w= s->b_width << s->block_max_depth;
2864 const int mb_h= s->b_height << s->block_max_depth;
2865 int x, y, mb_x;
2866 int block_size = MB_SIZE >> s->block_max_depth;
2867 int block_w = plane_index ? block_size/2 : block_size;
2868 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2869 int obmc_stride= plane_index ? block_size : 2*block_size;
2870 int ref_stride= s->current_picture.linesize[plane_index];
2871 uint8_t *ref = s->last_picture.data[plane_index];
2872 uint8_t *dst8= s->current_picture.data[plane_index];
2873 int w= p->width;
2874 int h= p->height;
2875 START_TIMER
115329f1 2876
a0d1931c
Y
2877 if(s->keyframe || (s->avctx->debug&512)){
2878 if(mb_y==mb_h)
2879 return;
2880
2881 if(add){
86e59cc0 2882 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2883 {
2884// DWTELEM * line = slice_buffer_get_line(sb, y);
2885 DWTELEM * line = sb->line[y];
2886 for(x=0; x<w; x++)
2887 {
2888// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2889 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2890 v >>= FRAC_BITS;
2891 if(v&(~255)) v= ~(v>>31);
2892 dst8[x + y*ref_stride]= v;
2893 }
2894 }
2895 }else{
86e59cc0 2896 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
a0d1931c
Y
2897 {
2898// DWTELEM * line = slice_buffer_get_line(sb, y);
2899 DWTELEM * line = sb->line[y];
2900 for(x=0; x<w; x++)
2901 {
2902 line[x] -= 128 << FRAC_BITS;
2903// buf[x + y*w]-= 128<<FRAC_BITS;
2904 }
2905 }
2906 }
2907
2908 return;
2909 }
115329f1 2910
a0d1931c
Y
2911 for(mb_x=0; mb_x<=mb_w; mb_x++){
2912 START_TIMER
2913
115329f1 2914 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
a0d1931c
Y
2915 block_w*mb_x - block_w/2,
2916 block_w*mb_y - block_w/2,
2917 block_w, block_w,
2918 w, h,
2919 w, ref_stride, obmc_stride,
2920 mb_x - 1, mb_y - 1,
2921 add, plane_index);
115329f1 2922
a0d1931c
Y
2923 STOP_TIMER("add_yblock")
2924 }
115329f1 2925
a0d1931c
Y
2926 STOP_TIMER("predict_slice")
2927}
2928
f9e6ebf7 2929static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2930 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2931 const int mb_w= s->b_width << s->block_max_depth;
2932 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2933 int x, y, mb_x;
155ec6ed
MN
2934 int block_size = MB_SIZE >> s->block_max_depth;
2935 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2936 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
51d6a3cf 2937 const int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2938 int ref_stride= s->current_picture.linesize[plane_index];
791e7b83 2939 uint8_t *ref = s->last_picture.data[plane_index];
715a97f0 2940 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2941 int w= p->width;
2942 int h= p->height;
fff6d4ea 2943 START_TIMER
115329f1 2944
ff158dc9 2945 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2946 if(mb_y==mb_h)
2947 return;
2948
715a97f0 2949 if(add){
86e59cc0 2950 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2951 for(x=0; x<w; x++){
2952 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2953 v >>= FRAC_BITS;
2954 if(v&(~255)) v= ~(v>>31);
2955 dst8[x + y*ref_stride]= v;
2956 }
2957 }
2958 }else{
86e59cc0 2959 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
715a97f0
MN
2960 for(x=0; x<w; x++){
2961 buf[x + y*w]-= 128<<FRAC_BITS;
2962 }
ff158dc9 2963 }
791e7b83 2964 }
ff158dc9
MN
2965
2966 return;
791e7b83 2967 }
115329f1 2968
ff158dc9 2969 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2970 START_TIMER
ff158dc9 2971
115329f1 2972 add_yblock(s, buf, dst8, ref, obmc,
ff158dc9 2973 block_w*mb_x - block_w/2,
791e7b83 2974 block_w*mb_y - block_w/2,
ff158dc9 2975 block_w, block_w,
791e7b83 2976 w, h,
ff158dc9
MN
2977 w, ref_stride, obmc_stride,
2978 mb_x - 1, mb_y - 1,
1015631b 2979 add, 1, plane_index);
115329f1 2980
ff158dc9 2981 STOP_TIMER("add_yblock")
791e7b83 2982 }
115329f1 2983
f9e6ebf7
LM
2984 STOP_TIMER("predict_slice")
2985}
2986
2987static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2988 const int mb_h= s->b_height << s->block_max_depth;
2989 int mb_y;
2990 for(mb_y=0; mb_y<=mb_h; mb_y++)
2991 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2992}
2993
51d6a3cf
MN
2994static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2995 int i, x2, y2;
2996 Plane *p= &s->plane[plane_index];
2997 const int block_size = MB_SIZE >> s->block_max_depth;
2998 const int block_w = plane_index ? block_size/2 : block_size;
2999 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3000 const int obmc_stride= plane_index ? block_size : 2*block_size;
3001 const int ref_stride= s->current_picture.linesize[plane_index];
3002 uint8_t *ref= s-> last_picture.data[plane_index];
51d6a3cf 3003 uint8_t *src= s-> input_picture.data[plane_index];
1015631b 3004 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
51d6a3cf
MN
3005 const int b_stride = s->b_width << s->block_max_depth;
3006 const int w= p->width;
3007 const int h= p->height;
3008 int index= mb_x + mb_y*b_stride;
3009 BlockNode *b= &s->block[index];
3010 BlockNode backup= *b;
3011 int ab=0;
3012 int aa=0;
3013
3014 b->type|= BLOCK_INTRA;
3015 b->color[plane_index]= 0;
1015631b 3016 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
51d6a3cf
MN
3017
3018 for(i=0; i<4; i++){
3019 int mb_x2= mb_x + (i &1) - 1;
3020 int mb_y2= mb_y + (i>>1) - 1;
3021 int x= block_w*mb_x2 + block_w/2;
3022 int y= block_w*mb_y2 + block_w/2;
3023
1015631b
LM
3024 add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc,
3025 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
51d6a3cf
MN
3026
3027 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3028 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3029 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3030 int obmc_v= obmc[index];
1015631b 3031 int d;
51d6a3cf
MN
3032 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3033 if(x<0) obmc_v += obmc[index + block_w];
3034 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3035 if(x+block_w>w) obmc_v += obmc[index - block_w];
3036 //FIXME precalc this or simplify it somehow else
3037
1015631b
LM
3038 d = -dst[index] + (1<<(FRAC_BITS-1));
3039 dst[index] = d;
3040 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
51d6a3cf
MN
3041 aa += obmc_v * obmc_v; //FIXME precalclate this
3042 }
3043 }
3044 }
3045 *b= backup;
3046
3047 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3048}
3049
1015631b 3050static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
51d6a3cf
MN
3051 Plane *p= &s->plane[plane_index];
3052 const int block_size = MB_SIZE >> s->block_max_depth;
3053 const int block_w = plane_index ? block_size/2 : block_size;
3054 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3055 const int obmc_stride= plane_index ? block_size : 2*block_size;
3056 const int ref_stride= s->current_picture.linesize[plane_index];
3057 uint8_t *ref= s-> last_picture.data[plane_index];
3058 uint8_t *dst= s->current_picture.data[plane_index];
1015631b
LM
3059 uint8_t *src= s-> input_picture.data[plane_index];
3060 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3061 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3062 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
51d6a3cf
MN
3063 const int b_stride = s->b_width << s->block_max_depth;
3064 const int b_height = s->b_height<< s->block_max_depth;
3065 const int w= p->width;
3066 const int h= p->height;
1015631b 3067 int distortion;
51d6a3cf
MN
3068 int rate= 0;
3069 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
1015631b
LM
3070 int sx= block_w*mb_x - block_w/2;
3071 int sy= block_w*mb_y - block_w/2;
3072 const int x0= FFMAX(0,-sx);
3073 const int y0= FFMAX(0,-sy);
3074 const int x1= FFMIN(block_w*2, w-sx);
3075 const int y1= FFMIN(block_w*2, h-sy);
3076 int i,x,y;
3077
3078 pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3079
3080 for(y=y0; y<y1; y++){
3081 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3082 const DWTELEM *pred1 = pred + y*obmc_stride;
3083 uint8_t *cur1 = cur + y*ref_stride;
3084 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3085 for(x=x0; x<x1; x++){
3086 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3087 v = (v + pred1[x]) >> FRAC_BITS;
3088 if(v&(~255)) v= ~(v>>31);
3089 dst1[x] = v;
51d6a3cf 3090 }
1015631b 3091 }
51d6a3cf 3092
1015631b
LM
3093 //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
3094 if(block_w==16){
3095 distortion = 0;
3096 for(i=0; i<4; i++){
3097 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3098 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3099 }
3100 }else{
3101 assert(block_w==8);
3102 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
51d6a3cf
MN
3103 }
3104
3105 if(plane_index==0){
3106 for(i=0; i<4; i++){
3107/* ..RRr
3108 * .RXx.
3109 * rxx..
3110 */
3111 int x= mb_x + (i&1) - (i>>1);
3112 int y= mb_y + (i>>1);
3113 int index= x + y*b_stride;
3114 BlockNode *b = &s->block[index];
3115 BlockNode *left = x ? &s->block[index-1] : &null_block;
3116 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3117 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3118 BlockNode *tr = y && x+1<b_stride ? &s->block[index-b_stride+1] : tl;
c8a596d2 3119 int dmx, dmy;
51d6a3cf
MN
3120// int mx_context= av_log2(2*ABS(left->mx - top->mx));
3121// int my_context= av_log2(2*ABS(left->my - top->my));
3122
3123 if(x<0 || x>=b_stride || y>=b_height)
3124 continue;
c8a596d2
MN
3125 dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3126 dmy= b->my - mid_pred(left->my, top->my, tr->my);
51d6a3cf
MN
3127/*
31281 0 0
312901X 1-2 1
3130001XX 3-6 2-3
31310001XXX 7-14 4-7
313200001XXXX 15-30 8-15
3133*/
3134//FIXME try accurate rate
3135//FIXME intra and inter predictors if surrounding blocks arent the same type
3136 if(b->type & BLOCK_INTRA){
3137 rate += 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3138 + av_log2(2*ABS(left->color[1] - b->color[1]))
3139 + av_log2(2*ABS(left->color[2] - b->color[2])));
3140 }else
3141 rate += 2*(1 + av_log2(2*ABS(dmx))
3142 + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3143 }
3144 }
3145
3146 return distortion + rate*penalty_factor;
3147}
3148
1015631b 3149static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
51d6a3cf
MN
3150 const int b_stride= s->b_width << s->block_max_depth;
3151 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3152 BlockNode backup= *block;
3153 int rd, index, value;
3154
3155 assert(mb_x>=0 && mb_y>=0);
735f9f34 3156 assert(mb_x<b_stride);
51d6a3cf
MN
3157
3158 if(intra){
3159 block->color[0] = p[0];
3160 block->color[1] = p[1];
3161 block->color[2] = p[2];
3162 block->type |= BLOCK_INTRA;
3163 }else{
3164 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3165 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3166 if(s->me_cache[index] == value)
3167 return 0;
3168 s->me_cache[index]= value;
3169
3170 block->mx= p[0];
3171 block->my= p[1];
3172 block->type &= ~BLOCK_INTRA;
3173 }
3174
1015631b 3175 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
51d6a3cf
MN
3176
3177//FIXME chroma
3178 if(rd < *best_rd){
3179 *best_rd= rd;
3180 return 1;
3181 }else{
3182 *block= backup;
3183 return 0;
3184 }
3185}
3186
52137f2f 3187/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
1015631b 3188static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, const uint8_t *obmc_edged, int *best_rd){
52137f2f 3189 int p[2] = {p0, p1};
1015631b 3190 return check_block(s, mb_x, mb_y, p, intra, obmc_edged, best_rd);
52137f2f
FR
3191}
3192
51d6a3cf
MN
3193static void iterative_me(SnowContext *s){
3194 int pass, mb_x, mb_y;
3195 const int b_width = s->b_width << s->block_max_depth;
3196 const int b_height= s->b_height << s->block_max_depth;
3197 const int b_stride= b_width;
3198 int color[3];
3199
3200 for(pass=0; pass<50; pass++){
3201 int change= 0;
3202
3203 for(mb_y= 0; mb_y<b_height; mb_y++){
3204 for(mb_x= 0; mb_x<b_width; mb_x++){
3205 int dia_change, i, j;
3206 int best_rd= INT_MAX;
3207 BlockNode backup;
3208 const int index= mb_x + mb_y * b_stride;
3209 BlockNode *block= &s->block[index];
c8a596d2
MN
3210 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block;
3211 BlockNode *lb = mb_x ? &s->block[index -1] : &null_block;
3212 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : &null_block;
3213 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : &null_block;
3214 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block;
3215 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block;
3216 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : &null_block;
3217 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : &null_block;
1015631b
LM
3218 const int b_w= (MB_SIZE >> s->block_max_depth);
3219 uint8_t obmc_edged[b_w*2][b_w*2];
51d6a3cf
MN
3220
3221 if(pass && (block->type & BLOCK_OPT))
3222 continue;
3223 block->type |= BLOCK_OPT;
3224
3225 backup= *block;
3226
3227 if(!s->me_cache_generation)
3228 memset(s->me_cache, 0, sizeof(s->me_cache));
3229 s->me_cache_generation += 1<<22;
3230
1015631b
LM
3231 //FIXME precalc
3232 {
3233 int x, y;
3234 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3235 if(mb_x==0)
3236 for(y=0; y<b_w*2; y++)
3237 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3238 if(mb_x==b_stride-1)
3239 for(y=0; y<b_w*2; y++)
3240 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3241 if(mb_y==0){
3242 for(x=0; x<b_w*2; x++)
3243 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3244 for(y=1; y<b_w; y++)
3245 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3246 }
3247 if(mb_y==b_height-1){
3248 for(x=0; x<b_w*2; x++)
3249 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3250 for(y=b_w; y<b_w*2-1; y++)
3251 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3252 }
3253 }
3254
3255 //skip stuff outside the picture
3256 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3257 {
3258 uint8_t *src= s-> input_picture.data[0];
3259 uint8_t *dst= s->current_picture.data[0];
3260 const int stride= s->current_picture.linesize[0];
3261 const int block_w= MB_SIZE >> s->block_max_depth;
3262 const int sx= block_w*mb_x - block_w/2;
3263 const int sy= block_w*mb_y - block_w/2;
3264 const int w= s->plane[0].width;
3265 const int h= s->plane[0].height;
3266 int y;
3267
3268 for(y=sy; y<0; y++)
3269 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3270 for(y=h; y<sy+block_w*2; y++)
3271 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3272 if(sx<0){
3273 for(y=sy; y<sy+block_w*2; y++)
3274 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3275 }
3276 if(sx+block_w*2 > w){
3277 for(y=sy; y<sy+block_w*2; y++)
3278 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3279 }
3280 }
3281
3282 // intra(black) = neighbors' contribution to the current block
3283 for(i=0; i<3; i++)
3284 color[i]= get_dc(s, mb_x, mb_y, i);
3285
51d6a3cf 3286 // get previous score (cant be cached due to OBMC)
1015631b
LM
3287 check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, *obmc_edged, &best_rd);
3288 check_block_inter(s, mb_x, mb_y, 0, 0, 0, *obmc_edged, &best_rd);
3289 check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, *obmc_edged, &best_rd);
3290 check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, *obmc_edged, &best_rd);
3291 check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, *obmc_edged, &best_rd);
3292 check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, *obmc_edged, &best_rd);
51d6a3cf
MN
3293
3294 /* fullpel ME */
3295 //FIXME avoid subpel interpol / round to nearest integer
3296 do{
3297 dia_change=0;
3298 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3299 for(j=0; j<i; j++){
1015631b
LM
3300 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
3301 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
3302 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
3303 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
51d6a3cf
MN
3304 }
3305 }
3306 }while(dia_change);
3307 /* subpel ME */
3308 do{
3309 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3310 dia_change=0;
3311 for(i=0; i<8; i++)
1015631b 3312 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, *obmc_edged, &best_rd);
51d6a3cf
MN
3313 }while(dia_change);
3314 //FIXME or try the standard 2 pass qpel or similar
13705b69 3315#if 1
1015631b 3316 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
51d6a3cf 3317 //FIXME RD style color selection
13705b69 3318#endif
51d6a3cf
MN
3319 if(!same_block(block, &backup)){
3320 if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3321 if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3322 if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3323 if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3324 if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3325 if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3326 if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3327 if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3328 change ++;
3329 }
3330 }
3331 }
3332 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3333 if(!change)
3334 break;
3335 }
3336}
3337
791e7b83
MN
3338static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3339 const int level= b->level;
3340 const int w= b->width;
3341 const int h= b->height;
c97de57c
MN
3342 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3343 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
da66b631 3344 int x,y, thres1, thres2;
62ab0b78 3345// START_TIMER
791e7b83 3346
93fbdb5a 3347 if(s->qlog == LOSSLESS_QLOG) return;
115329f1 3348
791e7b83 3349 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
3350 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3351 thres2= 2*thres1;
115329f1 3352
791e7b83
MN
3353 if(!bias){
3354 for(y=0; y<h; y++){
3355 for(x=0; x<w; x++){
da66b631 3356 int i= src[x + y*stride];
115329f1 3357
da66b631
MN
3358 if((unsigned)(i+thres1) > thres2){
3359 if(i>=0){
3360 i<<= QEXPSHIFT;
3361 i/= qmul; //FIXME optimize
3362 src[x + y*stride]= i;
3363 }else{
3364 i= -i;
3365 i<<= QEXPSHIFT;
3366 i/= qmul; //FIXME optimize
3367 src[x + y*stride]= -i;
3368 }
3369 }else
3370 src[x + y*stride]= 0;
791e7b83
MN
3371 }
3372 }
3373 }else{
3374 for(y=0; y<h; y++){
3375 for(x=0; x<w; x++){
115329f1
DB
3376 int i= src[x + y*stride];
3377
da66b631
MN
3378 if((unsigned)(i+thres1) > thres2){
3379 if(i>=0){
3380 i<<= QEXPSHIFT;
3381 i= (i + bias) / qmul; //FIXME optimize
3382 src[x + y*stride]= i;
3383 }else{
3384 i= -i;
3385 i<<= QEXPSHIFT;
3386 i= (i + bias) / qmul; //FIXME optimize
3387 src[x + y*stride]= -i;
3388 }
3389 }else
3390 src[x + y*stride]= 0;
791e7b83
MN
3391 }
3392 }
3393 }
da66b631
MN
3394 if(level+1 == s->spatial_decomposition_count){
3395// STOP_TIMER("quantize")
3396 }
791e7b83
MN
3397}
3398
66b32bf2 3399static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
a0d1931c 3400 const int w= b->width;
c97de57c
MN
3401 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3402 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
a0d1931c
Y
3403 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3404 int x,y;
3405 START_TIMER
115329f1 3406
a0d1931c 3407 if(s->qlog == LOSSLESS_QLOG) return;
115329f1 3408
66b32bf2 3409 for(y=start_y; y<end_y; y++){
a0d1931c
Y
3410// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3411 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3412 for(x=0; x<w; x++){
3413 int i= line[x];
3414 if(i<0){
3415 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3416 }else if(i>0){
3417 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3418 }
3419 }
3420 }
3421 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3422 STOP_TIMER("dquant")
3423 }
3424}
3425
791e7b83 3426static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
791e7b83
MN
3427 const int w= b->width;
3428 const int h= b->height;
c97de57c
MN
3429 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3430 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
791e7b83
MN
3431 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3432 int x,y;
ea7d9cd4 3433 START_TIMER
115329f1 3434
93fbdb5a 3435 if(s->qlog == LOSSLESS_QLOG) return;
115329f1 3436
791e7b83
MN
3437 for(y=0; y<h; y++){
3438 for(x=0; x<w; x++){
3439 int i= src[x + y*stride];
3440 if(i<0){
3441 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3442 }else if(i>0){
3443 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3444 }
3445 }
3446 }
ea7d9cd4
MN
3447 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3448 STOP_TIMER("dquant")
3449 }
791e7b83
MN
3450}
3451
3452static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3453 const int w= b->width;
3454 const int h= b->height;
3455 int x,y;
115329f1 3456
791e7b83
MN
3457 for(y=h-1; y>=0; y--){
3458 for(x=w-1; x>=0; x--){
3459 int i= x + y*stride;
115329f1 3460
791e7b83
MN
3461 if(x){
3462 if(use_median){
3463 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3464 else src[i] -= src[i - 1];
3465 }else{
3466 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3467 else src[i] -= src[i - 1];
3468 }
3469 }else{
3470 if(y) src[i] -= src[i - stride];
3471 }
3472 }
3473 }
3474}
3475
66b32bf2 3476static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
a0d1931c 3477 const int w= b->width;
a0d1931c 3478 int x,y;
115329f1 3479
a0d1931c 3480// START_TIMER
115329f1 3481
a0d1931c
Y
3482 DWTELEM * line;
3483 DWTELEM * prev;
115329f1 3484
66b32bf2
Y
3485 if (start_y != 0)
3486 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
115329f1 3487
66b32bf2 3488 for(y=start_y; y<end_y; y++){
a0d1931c
Y
3489 prev = line;
3490// line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3491 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3492 for(x=0; x<w; x++){
3493 if(x){
3494 if(use_median){
3495 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3496 else line[x] += line[x - 1];
3497 }else{
3498 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3499 else line[x] += line[x - 1];
3500 }
3501 }else{
3502 if(y) line[x] += prev[x];
3503 }
3504 }