various security fixes and precautionary checks
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
28869757
MN
22
23#include "rangecoder.h"
24#define MID_STATE 128
791e7b83
MN
25
26#include "mpegvideo.h"
27
28#undef NDEBUG
29#include <assert.h>
30
31#define MAX_DECOMPOSITIONS 8
32#define MAX_PLANES 4
33#define DWTELEM int
34#define QROOT 8
93fbdb5a 35#define LOSSLESS_QLOG -128
034aff03 36#define FRAC_BITS 8
791e7b83
MN
37
38static const int8_t quant3[256]={
39 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
55};
56static const int8_t quant3b[256]={
57 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73};
74static const int8_t quant5[256]={
75 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
88-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
89-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
90-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
91};
92static const int8_t quant7[256]={
93 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
99 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
100 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
101-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
104-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
105-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
106-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
107-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
109};
110static const int8_t quant9[256]={
111 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
117 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
118 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
119-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
123-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
124-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
125-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
126-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
127};
128static const int8_t quant11[256]={
129 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
135 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
136 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
137-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
140-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
141-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
142-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
143-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
145};
146static const int8_t quant13[256]={
147 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
148 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
153 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
154 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
155-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
157-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
158-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
159-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
160-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
162-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
163};
164
715a97f0
MN
165#define LOG2_OBMC_MAX 6
166#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
791e7b83
MN
167#if 0 //64*cubic
168static const uint8_t obmc32[1024]={
169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
171 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
172 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
173 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
174 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
175 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
176 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
177 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
178 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
179 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
180 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
181 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
182 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
184 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
185 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
186 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
187 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
188 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
192 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
193 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
196 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
197 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
198 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
199 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201//error:0.000022
202};
203static const uint8_t obmc16[256]={
204 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
205 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
206 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
207 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
208 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
209 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
211 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
212 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
213 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
214 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
215 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
219 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
220//error:0.000033
221};
222#elif 1 // 64*linear
223static const uint8_t obmc32[1024]={
224 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
225 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
226 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
227 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
228 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
229 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
230 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
231 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
232 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
233 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
234 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
235 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
236 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
237 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
239 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
240 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
241 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
242 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
243 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
244 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
245 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
246 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
247 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
248 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
251 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
252 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
253 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
254 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
255 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
256 //error:0.000020
257};
258static const uint8_t obmc16[256]={
259 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
260 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
261 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
262 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
263 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
264 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
266 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
267 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
268 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
269 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
270 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
271 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
272 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
273 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
274 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
275//error:0.000015
276};
277#else //64*cos
278static const uint8_t obmc32[1024]={
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
282 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
283 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
284 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
285 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
286 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
287 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
288 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
289 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
290 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
291 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
292 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
294 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
295 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
296 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
297 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
298 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
302 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
303 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
306 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
307 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
308 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
309 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311//error:0.000022
312};
313static const uint8_t obmc16[256]={
314 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
315 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
316 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
317 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
318 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
319 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
321 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
322 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
323 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
324 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
325 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
329 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
330//error:0.000022
331};
332#endif
333
155ec6ed
MN
334//linear *64
335static const uint8_t obmc8[64]={
336 1, 3, 5, 7, 7, 5, 3, 1,
337 3, 9,15,21,21,15, 9, 3,
338 5,15,25,35,35,25,15, 5,
339 7,21,35,49,49,35,21, 7,
340 7,21,35,49,49,35,21, 7,
341 5,15,25,35,35,25,15, 5,
342 3, 9,15,21,21,15, 9, 3,
343 1, 3, 5, 7, 7, 5, 3, 1,
344//error:0.000000
345};
346
347//linear *64
348static const uint8_t obmc4[16]={
349 4,12,12, 4,
35012,36,36,12,
35112,36,36,12,
352 4,12,12, 4,
353//error:0.000000
354};
355
356static const uint8_t *obmc_tab[4]={
357 obmc32, obmc16, obmc8, obmc4
358};
359
360typedef struct BlockNode{
361 int16_t mx;
362 int16_t my;
363 uint8_t color[3];
364 uint8_t type;
365//#define TYPE_SPLIT 1
366#define BLOCK_INTRA 1
367//#define TYPE_NOCOLOR 4
368 uint8_t level; //FIXME merge into type?
369}BlockNode;
370
371#define LOG2_MB_SIZE 4
372#define MB_SIZE (1<<LOG2_MB_SIZE)
373
791e7b83
MN
374typedef struct SubBand{
375 int level;
376 int stride;
377 int width;
378 int height;
379 int qlog; ///< log(qscale)/log[2^(1/6)]
380 DWTELEM *buf;
7b49c309 381 int16_t *x;
0cea8a03 382 DWTELEM *coeff;
791e7b83
MN
383 struct SubBand *parent;
384 uint8_t state[/*7*2*/ 7 + 512][32];
385}SubBand;
386
387typedef struct Plane{
388 int width;
389 int height;
390 SubBand band[MAX_DECOMPOSITIONS][4];
391}Plane;
392
393typedef struct SnowContext{
394// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
395
396 AVCodecContext *avctx;
28869757 397 RangeCoder c;
791e7b83
MN
398 DSPContext dsp;
399 AVFrame input_picture;
400 AVFrame current_picture;
401 AVFrame last_picture;
402 AVFrame mconly_picture;
403// uint8_t q_context[16];
404 uint8_t header_state[32];
155ec6ed 405 uint8_t block_state[128 + 32*128];
791e7b83 406 int keyframe;
19aa028d 407 int always_reset;
791e7b83
MN
408 int version;
409 int spatial_decomposition_type;
410 int temporal_decomposition_type;
411 int spatial_decomposition_count;
412 int temporal_decomposition_count;
413 DWTELEM *spatial_dwt_buffer;
791e7b83
MN
414 int colorspace_type;
415 int chroma_h_shift;
416 int chroma_v_shift;
417 int spatial_scalability;
418 int qlog;
155ec6ed
MN
419 int lambda;
420 int lambda2;
791e7b83
MN
421 int mv_scale;
422 int qbias;
423#define QBIAS_SHIFT 3
155ec6ed
MN
424 int b_width;
425 int b_height;
426 int block_max_depth;
791e7b83 427 Plane plane[MAX_PLANES];
155ec6ed
MN
428 BlockNode *block;
429
791e7b83
MN
430 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
431}SnowContext;
432
2554db9b
MN
433#ifdef __sgi
434// Avoid a name clash on SGI IRIX
435#undef qexp
436#endif
034aff03 437#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
791e7b83
MN
438static const uint8_t qexp[8]={
439 128, 140, 152, 166, 181, 197, 215, 235
440// 64, 70, 76, 83, 91, 99, 108, 117
441// 32, 35, 38, 41, 45, 49, 54, 59
442// 16, 17, 19, 21, 23, 25, 27, 29
443// 8, 9, 10, 10, 11, 12, 13, 15
444};
445
446static inline int mirror(int v, int m){
447 if (v<0) return -v;
448 else if(v>m) return 2*m-v;
449 else return v;
450}
451
28869757 452static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
453 int i;
454
455 if(v){
456 const int a= ABS(v);
457 const int e= av_log2(a);
458#if 1
459 const int el= FFMIN(e, 10);
28869757 460 put_rac(c, state+0, 0);
791e7b83
MN
461
462 for(i=0; i<el; i++){
28869757 463 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
464 }
465 for(; i<e; i++){
28869757 466 put_rac(c, state+1+9, 1); //1..10
791e7b83 467 }
28869757 468 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
469
470 for(i=e-1; i>=el; i--){
28869757 471 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
472 }
473 for(; i>=0; i--){
28869757 474 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
475 }
476
477 if(is_signed)
28869757 478 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83
MN
479#else
480
28869757 481 put_rac(c, state+0, 0);
791e7b83
MN
482 if(e<=9){
483 for(i=0; i<e; i++){
28869757 484 put_rac(c, state+1+i, 1); //1..10
791e7b83 485 }
28869757 486 put_rac(c, state+1+i, 0);
791e7b83
MN
487
488 for(i=e-1; i>=0; i--){
28869757 489 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
490 }
491
492 if(is_signed)
28869757 493 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
494 }else{
495 for(i=0; i<e; i++){
28869757 496 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 497 }
28869757 498 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
499
500 for(i=e-1; i>=0; i--){
28869757 501 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
502 }
503
504 if(is_signed)
28869757 505 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83
MN
506 }
507#endif
508 }else{
28869757 509 put_rac(c, state+0, 1);
791e7b83
MN
510 }
511}
512
28869757
MN
513static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
514 if(get_rac(c, state+0))
791e7b83
MN
515 return 0;
516 else{
7c2425d2
LM
517 int i, e, a;
518 e= 0;
28869757 519 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 520 e++;
791e7b83 521 }
7c2425d2 522
791e7b83 523 a= 1;
7c2425d2 524 for(i=e-1; i>=0; i--){
28869757 525 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
526 }
527
28869757 528 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
529 return -a;
530 else
531 return a;
532 }
533}
534
28869757 535static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 536 int i;
0635cbfc 537 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
538
539 assert(v>=0);
0635cbfc
MN
540 assert(log2>=-4);
541
542 while(v >= r){
28869757 543 put_rac(c, state+4+log2, 1);
0635cbfc 544 v -= r;
4f4e9633 545 log2++;
0635cbfc 546 if(log2>0) r+=r;
4f4e9633 547 }
28869757 548 put_rac(c, state+4+log2, 0);
4f4e9633
MN
549
550 for(i=log2-1; i>=0; i--){
28869757 551 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 552 }
4f4e9633
MN
553}
554
28869757 555static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 556 int i;
0635cbfc 557 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
558 int v=0;
559
0635cbfc
MN
560 assert(log2>=-4);
561
28869757 562 while(get_rac(c, state+4+log2)){
0635cbfc 563 v+= r;
4f4e9633 564 log2++;
0635cbfc 565 if(log2>0) r+=r;
4f4e9633
MN
566 }
567
568 for(i=log2-1; i>=0; i--){
28869757 569 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
570 }
571
572 return v;
573}
574
791e7b83
MN
575static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
576 const int mirror_left= !highpass;
577 const int mirror_right= (width&1) ^ highpass;
578 const int w= (width>>1) - 1 + (highpass & width);
579 int i;
580
581#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
582 if(mirror_left){
583 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
584 dst += dst_step;
585 src += src_step;
586 }
587
588 for(i=0; i<w; i++){
589 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
590 }
591
592 if(mirror_right){
593 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
594 }
595}
596
597static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
598 const int mirror_left= !highpass;
599 const int mirror_right= (width&1) ^ highpass;
600 const int w= (width>>1) - 1 + (highpass & width);
601 int i;
602
603 if(mirror_left){
604 int r= 3*2*ref[0];
605 r += r>>4;
606 r += r>>8;
607 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
608 dst += dst_step;
609 src += src_step;
610 }
611
612 for(i=0; i<w; i++){
613 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
614 r += r>>4;
615 r += r>>8;
616 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
617 }
618
619 if(mirror_right){
620 int r= 3*2*ref[w*ref_step];
621 r += r>>4;
622 r += r>>8;
623 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
624 }
625}
626
627
aa25a462 628static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
629 int x, i;
630
631 for(x=start; x<width; x+=2){
632 int64_t sum=0;
633
634 for(i=0; i<n; i++){
635 int x2= x + 2*i - n + 1;
636 if (x2< 0) x2= -x2;
637 else if(x2>=width) x2= 2*width-x2-2;
638 sum += coeffs[i]*(int64_t)dst[x2];
639 }
640 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
641 else dst[x] += (sum + (1<<shift)/2)>>shift;
642 }
643}
644
aa25a462 645static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
646 int x, y, i;
647 for(y=start; y<height; y+=2){
648 for(x=0; x<width; x++){
649 int64_t sum=0;
650
651 for(i=0; i<n; i++){
652 int y2= y + 2*i - n + 1;
653 if (y2< 0) y2= -y2;
654 else if(y2>=height) y2= 2*height-y2-2;
655 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
656 }
657 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
658 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
659 }
660 }
661}
662
663#define SCALEX 1
664#define LX0 0
665#define LX1 1
666
de890c9b 667#if 0 // more accurate 9/7
791e7b83
MN
668#define N1 2
669#define SHIFT1 14
670#define COEFFS1 (int[]){-25987,-25987}
671#define N2 2
672#define SHIFT2 19
673#define COEFFS2 (int[]){-27777,-27777}
674#define N3 2
675#define SHIFT3 15
676#define COEFFS3 (int[]){28931,28931}
677#define N4 2
678#define SHIFT4 15
679#define COEFFS4 (int[]){14533,14533}
680#elif 1 // 13/7 CRF
681#define N1 4
682#define SHIFT1 4
683#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 684#define N2 4
791e7b83
MN
685#define SHIFT2 4
686#define COEFFS2 (int[]){-1,5,5,-1}
687#define N3 0
688#define SHIFT3 1
689#define COEFFS3 NULL
690#define N4 0
691#define SHIFT4 1
692#define COEFFS4 NULL
693#elif 1 // 3/5
694#define LX0 1
695#define LX1 0
696#define SCALEX 0.5
697#define N1 2
698#define SHIFT1 1
699#define COEFFS1 (int[]){1,1}
700#define N2 2
701#define SHIFT2 2
702#define COEFFS2 (int[]){-1,-1}
703#define N3 0
704#define SHIFT3 0
705#define COEFFS3 NULL
706#define N4 0
707#define SHIFT4 0
708#define COEFFS4 NULL
709#elif 1 // 11/5
710#define N1 0
711#define SHIFT1 1
712#define COEFFS1 NULL
713#define N2 2
714#define SHIFT2 2
715#define COEFFS2 (int[]){-1,-1}
716#define N3 2
717#define SHIFT3 0
718#define COEFFS3 (int[]){-1,-1}
719#define N4 4
720#define SHIFT4 7
721#define COEFFS4 (int[]){-5,29,29,-5}
722#define SCALEX 4
723#elif 1 // 9/7 CDF
724#define N1 2
725#define SHIFT1 7
726#define COEFFS1 (int[]){-203,-203}
727#define N2 2
728#define SHIFT2 12
729#define COEFFS2 (int[]){-217,-217}
730#define N3 2
731#define SHIFT3 7
732#define COEFFS3 (int[]){113,113}
733#define N4 2
734#define SHIFT4 9
735#define COEFFS4 (int[]){227,227}
736#define SCALEX 1
737#elif 1 // 7/5 CDF
738#define N1 0
739#define SHIFT1 1
740#define COEFFS1 NULL
741#define N2 2
742#define SHIFT2 2
743#define COEFFS2 (int[]){-1,-1}
744#define N3 2
745#define SHIFT3 0
746#define COEFFS3 (int[]){-1,-1}
747#define N4 2
748#define SHIFT4 4
749#define COEFFS4 (int[]){3,3}
750#elif 1 // 9/7 MN
751#define N1 4
752#define SHIFT1 4
753#define COEFFS1 (int[]){1,-9,-9,1}
754#define N2 2
755#define SHIFT2 2
756#define COEFFS2 (int[]){1,1}
757#define N3 0
758#define SHIFT3 1
759#define COEFFS3 NULL
760#define N4 0
761#define SHIFT4 1
762#define COEFFS4 NULL
763#else // 13/7 CRF
764#define N1 4
765#define SHIFT1 4
766#define COEFFS1 (int[]){1,-9,-9,1}
767#define N2 4
768#define SHIFT2 4
769#define COEFFS2 (int[]){-1,5,5,-1}
770#define N3 0
771#define SHIFT3 1
772#define COEFFS3 NULL
773#define N4 0
774#define SHIFT4 1
775#define COEFFS4 NULL
776#endif
aa25a462
RFI
777static void horizontal_decomposeX(DWTELEM *b, int width){
778 DWTELEM temp[width];
791e7b83
MN
779 const int width2= width>>1;
780 const int w2= (width+1)>>1;
781 int A1,A2,A3,A4, x;
782
783 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
784 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
785 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
786 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
787
788 for(x=0; x<width2; x++){
789 temp[x ]= b[2*x ];
790 temp[x+w2]= b[2*x + 1];
791 }
792 if(width&1)
793 temp[x ]= b[2*x ];
794 memcpy(b, temp, width*sizeof(int));
795}
796
aa25a462
RFI
797static void horizontal_composeX(DWTELEM *b, int width){
798 DWTELEM temp[width];
791e7b83
MN
799 const int width2= width>>1;
800 int A1,A2,A3,A4, x;
801 const int w2= (width+1)>>1;
802
803 memcpy(temp, b, width*sizeof(int));
804 for(x=0; x<width2; x++){
805 b[2*x ]= temp[x ];
806 b[2*x + 1]= temp[x+w2];
807 }
808 if(width&1)
809 b[2*x ]= temp[x ];
810
811 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
812 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
813 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
814 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
815}
816
aa25a462 817static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83
MN
818 int x, y;
819
820 for(y=0; y<height; y++){
821 for(x=0; x<width; x++){
822 buffer[y*stride + x] *= SCALEX;
823 }
824 }
825
826 for(y=0; y<height; y++){
827 horizontal_decomposeX(buffer + y*stride, width);
828 }
829
830 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
831 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
832 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
833 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
834}
835
aa25a462 836static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83
MN
837 int x, y;
838
839 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
840 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
841 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
842 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
843
844 for(y=0; y<height; y++){
845 horizontal_composeX(buffer + y*stride, width);
846 }
847
848 for(y=0; y<height; y++){
849 for(x=0; x<width; x++){
850 buffer[y*stride + x] /= SCALEX;
851 }
852 }
853}
854
aa25a462
RFI
855static void horizontal_decompose53i(DWTELEM *b, int width){
856 DWTELEM temp[width];
791e7b83
MN
857 const int width2= width>>1;
858 int A1,A2,A3,A4, x;
859 const int w2= (width+1)>>1;
860
861 for(x=0; x<width2; x++){
862 temp[x ]= b[2*x ];
863 temp[x+w2]= b[2*x + 1];
864 }
865 if(width&1)
866 temp[x ]= b[2*x ];
867#if 0
868 A2= temp[1 ];
869 A4= temp[0 ];
870 A1= temp[0+width2];
871 A1 -= (A2 + A4)>>1;
872 A4 += (A1 + 1)>>1;
873 b[0+width2] = A1;
874 b[0 ] = A4;
875 for(x=1; x+1<width2; x+=2){
876 A3= temp[x+width2];
877 A4= temp[x+1 ];
878 A3 -= (A2 + A4)>>1;
879 A2 += (A1 + A3 + 2)>>2;
880 b[x+width2] = A3;
881 b[x ] = A2;
882
883 A1= temp[x+1+width2];
884 A2= temp[x+2 ];
885 A1 -= (A2 + A4)>>1;
886 A4 += (A1 + A3 + 2)>>2;
887 b[x+1+width2] = A1;
888 b[x+1 ] = A4;
889 }
890 A3= temp[width-1];
891 A3 -= A2;
892 A2 += (A1 + A3 + 2)>>2;
893 b[width -1] = A3;
894 b[width2-1] = A2;
895#else
896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
898#endif
899}
900
aa25a462 901static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
902 int i;
903
904 for(i=0; i<width; i++){
905 b1[i] -= (b0[i] + b2[i])>>1;
906 }
907}
908
aa25a462 909static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
910 int i;
911
912 for(i=0; i<width; i++){
913 b1[i] += (b0[i] + b2[i] + 2)>>2;
914 }
915}
916
aa25a462 917static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 918 int y;
791e7b83
MN
919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
921
922 for(y=-2; y<height; y+=2){
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
925
926{START_TIMER
927 if(b1 <= b3) horizontal_decompose53i(b2, width);
928 if(y+2 < height) horizontal_decompose53i(b3, width);
929STOP_TIMER("horizontal_decompose53i")}
930
931{START_TIMER
932 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
933 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
934STOP_TIMER("vertical_decompose53i*")}
935
936 b0=b2;
937 b1=b3;
938 }
939}
940
941#define lift5 lift
942#if 1
943#define W_AM 3
944#define W_AO 0
945#define W_AS 1
946
947#define W_BM 1
948#define W_BO 8
949#define W_BS 4
950
951#undef lift5
952#define W_CM 9999
953#define W_CO 2
954#define W_CS 2
955
956#define W_DM 15
957#define W_DO 16
958#define W_DS 5
959#elif 0
960#define W_AM 55
961#define W_AO 16
962#define W_AS 5
963
964#define W_BM 3
965#define W_BO 32
966#define W_BS 6
967
968#define W_CM 127
969#define W_CO 64
970#define W_CS 7
971
972#define W_DM 7
973#define W_DO 8
974#define W_DS 4
975#elif 0
976#define W_AM 97
977#define W_AO 32
978#define W_AS 6
979
980#define W_BM 63
981#define W_BO 512
982#define W_BS 10
983
984#define W_CM 13
985#define W_CO 8
986#define W_CS 4
987
988#define W_DM 15
989#define W_DO 16
990#define W_DS 5
991
992#else
993
994#define W_AM 203
995#define W_AO 64
996#define W_AS 7
997
998#define W_BM 217
999#define W_BO 2048
1000#define W_BS 12
1001
1002#define W_CM 113
1003#define W_CO 64
1004#define W_CS 7
1005
1006#define W_DM 227
1007#define W_DO 128
1008#define W_DS 9
1009#endif
aa25a462
RFI
1010static void horizontal_decompose97i(DWTELEM *b, int width){
1011 DWTELEM temp[width];
791e7b83
MN
1012 const int w2= (width+1)>>1;
1013
1014 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1015 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1016 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1017 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1018}
1019
1020
aa25a462 1021static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1022 int i;
1023
1024 for(i=0; i<width; i++){
1025 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1026 }
1027}
1028
aa25a462 1029static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1030 int i;
1031
1032 for(i=0; i<width; i++){
1033#ifdef lift5
1034 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1035#else
1036 int r= 3*(b0[i] + b2[i]);
1037 r+= r>>4;
1038 r+= r>>8;
1039 b1[i] += (r+W_CO)>>W_CS;
1040#endif
1041 }
1042}
1043
aa25a462 1044static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1045 int i;
1046
1047 for(i=0; i<width; i++){
1048 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1049 }
1050}
1051
aa25a462 1052static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1053 int i;
1054
1055 for(i=0; i<width; i++){
1056 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1057 }
1058}
1059
aa25a462 1060static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1061 int y;
791e7b83
MN
1062 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1063 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1064 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1065 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1066
1067 for(y=-4; y<height; y+=2){
1068 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1069 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1070
1071{START_TIMER
1072 if(b3 <= b5) horizontal_decompose97i(b4, width);
1073 if(y+4 < height) horizontal_decompose97i(b5, width);
1074if(width>400){
1075STOP_TIMER("horizontal_decompose97i")
1076}}
1077
1078{START_TIMER
1079 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1080 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1081 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1082 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1083
1084if(width>400){
1085STOP_TIMER("vertical_decompose97i")
1086}}
1087
1088 b0=b2;
1089 b1=b3;
1090 b2=b4;
1091 b3=b5;
1092 }
1093}
1094
aa25a462 1095void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83
MN
1096 int level;
1097
46c281e8
MN
1098 for(level=0; level<decomposition_count; level++){
1099 switch(type){
791e7b83
MN
1100 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1101 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1102 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1103 }
1104 }
1105}
1106
aa25a462
RFI
1107static void horizontal_compose53i(DWTELEM *b, int width){
1108 DWTELEM temp[width];
791e7b83
MN
1109 const int width2= width>>1;
1110 const int w2= (width+1)>>1;
1111 int A1,A2,A3,A4, x;
1112
1113#if 0
1114 A2= temp[1 ];
1115 A4= temp[0 ];
1116 A1= temp[0+width2];
1117 A1 -= (A2 + A4)>>1;
1118 A4 += (A1 + 1)>>1;
1119 b[0+width2] = A1;
1120 b[0 ] = A4;
1121 for(x=1; x+1<width2; x+=2){
1122 A3= temp[x+width2];
1123 A4= temp[x+1 ];
1124 A3 -= (A2 + A4)>>1;
1125 A2 += (A1 + A3 + 2)>>2;
1126 b[x+width2] = A3;
1127 b[x ] = A2;
1128
1129 A1= temp[x+1+width2];
1130 A2= temp[x+2 ];
1131 A1 -= (A2 + A4)>>1;
1132 A4 += (A1 + A3 + 2)>>2;
1133 b[x+1+width2] = A1;
1134 b[x+1 ] = A4;
1135 }
1136 A3= temp[width-1];
1137 A3 -= A2;
1138 A2 += (A1 + A3 + 2)>>2;
1139 b[width -1] = A3;
1140 b[width2-1] = A2;
1141#else
1142 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1143 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1144#endif
1145 for(x=0; x<width2; x++){
1146 b[2*x ]= temp[x ];
1147 b[2*x + 1]= temp[x+w2];
1148 }
1149 if(width&1)
1150 b[2*x ]= temp[x ];
1151}
1152
aa25a462 1153static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1154 int i;
1155
1156 for(i=0; i<width; i++){
1157 b1[i] += (b0[i] + b2[i])>>1;
1158 }
1159}
1160
aa25a462 1161static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1162 int i;
1163
1164 for(i=0; i<width; i++){
1165 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1166 }
1167}
1168
aa25a462 1169static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1170 int y;
791e7b83
MN
1171 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1172 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride;
1173
1174 for(y=-1; y<=height; y+=2){
1175 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1176 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1177
1178{START_TIMER
1179 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1180 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1181STOP_TIMER("vertical_compose53i*")}
1182
1183{START_TIMER
1184 if(y-1 >= 0) horizontal_compose53i(b0, width);
1185 if(b0 <= b2) horizontal_compose53i(b1, width);
1186STOP_TIMER("horizontal_compose53i")}
1187
1188 b0=b2;
1189 b1=b3;
1190 }
1191}
1192
1193
aa25a462
RFI
1194static void horizontal_compose97i(DWTELEM *b, int width){
1195 DWTELEM temp[width];
791e7b83
MN
1196 const int w2= (width+1)>>1;
1197
1198 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1199 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1200 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1201 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1202}
1203
aa25a462 1204static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1205 int i;
1206
1207 for(i=0; i<width; i++){
1208 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1209 }
1210}
1211
aa25a462 1212static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1213 int i;
1214
1215 for(i=0; i<width; i++){
1216#ifdef lift5
1217 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1218#else
1219 int r= 3*(b0[i] + b2[i]);
1220 r+= r>>4;
1221 r+= r>>8;
1222 b1[i] -= (r+W_CO)>>W_CS;
1223#endif
1224 }
1225}
1226
aa25a462 1227static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1228 int i;
1229
1230 for(i=0; i<width; i++){
1231 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1232 }
1233}
1234
aa25a462 1235static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1236 int i;
1237
1238 for(i=0; i<width; i++){
1239 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1240 }
1241}
1242
aa25a462 1243static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1244 int y;
791e7b83
MN
1245 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1246 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride;
1247 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1248 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1249
1250 for(y=-3; y<=height; y+=2){
1251 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1252 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1253
1254 if(stride == width && y+4 < height && 0){
1255 int x;
1256 for(x=0; x<width/2; x++)
1257 b5[x] += 64*2;
1258 for(; x<width; x++)
1259 b5[x] += 169*2;
1260 }
1261
1262{START_TIMER
1263 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1264 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1265 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1266 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1267if(width>400){
1268STOP_TIMER("vertical_compose97i")}}
1269
1270{START_TIMER
1271 if(y-1>= 0) horizontal_compose97i(b0, width);
1272 if(b0 <= b2) horizontal_compose97i(b1, width);
1273if(width>400 && b0 <= b2){
1274STOP_TIMER("horizontal_compose97i")}}
1275
1276 b0=b2;
1277 b1=b3;
1278 b2=b4;
1279 b3=b5;
1280 }
1281}
1282
aa25a462 1283void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83
MN
1284 int level;
1285
46c281e8
MN
1286 for(level=decomposition_count-1; level>=0; level--){
1287 switch(type){
791e7b83
MN
1288 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1289 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1290 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break;
1291 }
1292 }
1293}
1294
0ecca7a4 1295static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1296 const int w= b->width;
1297 const int h= b->height;
1298 int x, y;
1299
791e7b83 1300 if(1){
791e7b83 1301 int run=0;
a8d73e56 1302 int runs[w*h];
791e7b83
MN
1303 int run_index=0;
1304
791e7b83
MN
1305 for(y=0; y<h; y++){
1306 for(x=0; x<w; x++){
78486403 1307 int v, p=0;
6b2f6646 1308 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1309 v= src[x + y*stride];
791e7b83
MN
1310
1311 if(y){
a8d73e56 1312 t= src[x + (y-1)*stride];
791e7b83 1313 if(x){
a8d73e56 1314 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1315 }
1316 if(x + 1 < w){
a8d73e56 1317 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1318 }
1319 }
1320 if(x){
a8d73e56 1321 l= src[x - 1 + y*stride];
6b2f6646
MN
1322 /*if(x > 1){
1323 if(orientation==1) ll= src[y + (x-2)*stride];
1324 else ll= src[x - 2 + y*stride];
791e7b83
MN
1325 }*/
1326 }
78486403 1327 if(parent){
a8d73e56
MN
1328 int px= x>>1;
1329 int py= y>>1;
78486403
MN
1330 if(px<b->parent->width && py<b->parent->height)
1331 p= parent[px + py*2*stride];
1332 }
1333 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1334 if(v){
1335 runs[run_index++]= run;
1336 run=0;
1337 }else{
1338 run++;
1339 }
1340 }
1341 }
1342 }
1343 runs[run_index++]= run;
1344 run_index=0;
1345 run= runs[run_index++];
1346
4f4e9633 1347 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1348
1349 for(y=0; y<h; y++){
0ecca7a4
MN
1350 if(&s->c.bytestream_end - &s->c.bytestream < w*40){
1351 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1352 return -1;
1353 }
791e7b83 1354 for(x=0; x<w; x++){
78486403 1355 int v, p=0;
6b2f6646 1356 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1357 v= src[x + y*stride];
791e7b83
MN
1358
1359 if(y){
a8d73e56 1360 t= src[x + (y-1)*stride];
791e7b83 1361 if(x){
a8d73e56 1362 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1363 }
1364 if(x + 1 < w){
a8d73e56 1365 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1366 }
1367 }
1368 if(x){
a8d73e56 1369 l= src[x - 1 + y*stride];
6b2f6646
MN
1370 /*if(x > 1){
1371 if(orientation==1) ll= src[y + (x-2)*stride];
1372 else ll= src[x - 2 + y*stride];
791e7b83
MN
1373 }*/
1374 }
78486403 1375 if(parent){
a8d73e56
MN
1376 int px= x>>1;
1377 int py= y>>1;
78486403
MN
1378 if(px<b->parent->width && py<b->parent->height)
1379 p= parent[px + py*2*stride];
1380 }
1381 if(/*ll|*/l|lt|t|rt|p){
1382 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1383
28869757 1384 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1385 }else{
1386 if(!run){
1387 run= runs[run_index++];
4f4e9633
MN
1388
1389 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1390 assert(v);
1391 }else{
1392 run--;
1393 assert(!v);
1394 }
1395 }
1396 if(v){
78486403 1397 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1398
0635cbfc 1399 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
28869757 1400 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
791e7b83
MN
1401 }
1402 }
1403 }
791e7b83 1404 }
0ecca7a4 1405 return 0;
791e7b83
MN
1406}
1407
0ecca7a4 1408static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1409// encode_subband_qtree(s, b, src, parent, stride, orientation);
1410// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1411 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1412// encode_subband_dzr(s, b, src, parent, stride, orientation);
1413}
1414
a8d73e56 1415static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
791e7b83
MN
1416 const int w= b->width;
1417 const int h= b->height;
1418 int x,y;
ea7d9cd4
MN
1419 const int qlog= clip(s->qlog + b->qlog, 0, 128);
1420 int qmul= qexp[qlog&7]<<(qlog>>3);
1421 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1422
fff6d4ea
MN
1423 START_TIMER
1424
ea7d9cd4
MN
1425 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1426 qadd= 0;
1427 qmul= 1<<QEXPSHIFT;
1428 }
791e7b83 1429
791e7b83 1430 if(1){
791e7b83 1431 int run;
7b49c309
MN
1432 int index=0;
1433 int prev_index=-1;
1434 int prev2_index=0;
1435 int parent_index= 0;
1436 int prev_parent_index= 0;
1437
791e7b83
MN
1438 for(y=0; y<b->height; y++)
1439 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1440
4f4e9633 1441 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83 1442 for(y=0; y<h; y++){
0cea8a03
MN
1443 int v=0;
1444 int lt=0, t=0, rt=0;
1445
ff765159
MN
1446 if(y && b->x[prev_index] == 0){
1447 rt= b->coeff[prev_index];
0cea8a03 1448 }
791e7b83 1449 for(x=0; x<w; x++){
0cea8a03
MN
1450 int p=0;
1451 const int l= v;
1452
1453 lt= t; t= rt;
791e7b83 1454
ff765159 1455 if(y){
ea7d9cd4 1456 if(b->x[prev_index] <= x)
ff765159
MN
1457 prev_index++;
1458 if(b->x[prev_index] == x + 1)
1459 rt= b->coeff[prev_index];
1460 else
1461 rt=0;
1462 }
78486403 1463 if(parent){
7b49c309
MN
1464 if(x>>1 > b->parent->x[parent_index]){
1465 parent_index++;
1466 }
ff765159
MN
1467 if(x>>1 == b->parent->x[parent_index]){
1468 p= b->parent->coeff[parent_index];
1469 }
78486403
MN
1470 }
1471 if(/*ll|*/l|lt|t|rt|p){
1472 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1473
28869757 1474 v=get_rac(&s->c, &b->state[0][context]);
791e7b83
MN
1475 }else{
1476 if(!run){
4f4e9633 1477 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83
MN
1478 v=1;
1479 }else{
1480 run--;
1481 v=0;
3c1adccd 1482
7b49c309
MN
1483 if(y && parent){
1484 int max_run;
7b49c309
MN
1485
1486 max_run= FFMIN(run, b->x[prev_index] - x - 2);
1487 max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1488 x+= max_run;
1489 run-= max_run;
3c1adccd 1490 }
791e7b83
MN
1491 }
1492 }
1493 if(v){
78486403 1494 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
0635cbfc 1495 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
28869757 1496 if(get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
ea7d9cd4 1497 src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
791e7b83 1498 v= -v;
ea7d9cd4
MN
1499 }else{
1500 src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1501 }
ff765159
MN
1502 b->x[index]=x; //FIXME interleave x/coeff
1503 b->coeff[index++]= v;
7b49c309
MN
1504 }
1505 }
1506 b->x[index++]= w+1; //end marker
1507 prev_index= prev2_index;
1508 prev2_index= index;
1509
1510 if(parent){
1511 while(b->parent->x[parent_index] != b->parent->width+1)
1512 parent_index++;
1513 parent_index++;
1514 if(y&1){
1515 prev_parent_index= parent_index;
1516 }else{
1517 parent_index= prev_parent_index;
791e7b83
MN
1518 }
1519 }
1520 }
7b49c309 1521 b->x[index++]= w+1; //end marker
3c1adccd 1522 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
791e7b83
MN
1523 STOP_TIMER("decode_subband")
1524 }
1525
1526 return;
1527 }
791e7b83
MN
1528}
1529
1530static void reset_contexts(SnowContext *s){
1531 int plane_index, level, orientation;
1532
19aa028d 1533 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1534 for(level=0; level<s->spatial_decomposition_count; level++){
1535 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1536 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1537 }
1538 }
1539 }
28869757
MN
1540 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1541 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1542}
1543
1544static int alloc_blocks(SnowContext *s){
1545 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1546 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1547
1548 s->b_width = w;
1549 s->b_height= h;
1550
1551 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1552 return 0;
1553}
1554
28869757
MN
1555static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1556 uint8_t *bytestream= d->bytestream;
1557 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1558 *d= *s;
28869757
MN
1559 d->bytestream= bytestream;
1560 d->bytestream_start= bytestream_start;
155ec6ed
MN
1561}
1562
1563//near copy & paste from dsputil, FIXME
1564static int pix_sum(uint8_t * pix, int line_size, int w)
1565{
1566 int s, i, j;
1567
1568 s = 0;
1569 for (i = 0; i < w; i++) {
1570 for (j = 0; j < w; j++) {
1571 s += pix[0];
1572 pix ++;
1573 }
1574 pix += line_size - w;
1575 }
1576 return s;
1577}
1578
1579//near copy & paste from dsputil, FIXME
1580static int pix_norm1(uint8_t * pix, int line_size, int w)
1581{
1582 int s, i, j;
1583 uint32_t *sq = squareTbl + 256;
1584
1585 s = 0;
1586 for (i = 0; i < w; i++) {
1587 for (j = 0; j < w; j ++) {
1588 s += sq[pix[0]];
1589 pix ++;
1590 }
1591 pix += line_size - w;
1592 }
1593 return s;
1594}
1595
1596static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1597 const int w= s->b_width << s->block_max_depth;
1598 const int rem_depth= s->block_max_depth - level;
1599 const int index= (x + y*w) << rem_depth;
1600 const int block_w= 1<<rem_depth;
1601 BlockNode block;
1602 int i,j;
1603
1604 block.color[0]= l;
1605 block.color[1]= cb;
1606 block.color[2]= cr;
1607 block.mx= mx;
1608 block.my= my;
1609 block.type= type;
1610 block.level= level;
1611
1612 for(j=0; j<block_w; j++){
1613 for(i=0; i<block_w; i++){
1614 s->block[index + i + j*w]= block;
1615 }
1616 }
1617}
1618
1619static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1620 const int offset[3]= {
1621 y*c-> stride + x,
1622 ((y*c->uvstride + x)>>1),
1623 ((y*c->uvstride + x)>>1),
1624 };
1625 int i;
1626 for(i=0; i<3; i++){
1627 c->src[0][i]= src [i];
1628 c->ref[0][i]= ref [i] + offset[i];
1629 }
1630 assert(!ref_index);
1631}
1632
1633//FIXME copy&paste
1634#define P_LEFT P[1]
1635#define P_TOP P[2]
1636#define P_TOPRIGHT P[3]
1637#define P_MEDIAN P[4]
1638#define P_MV1 P[9]
1639#define FLAG_QPEL 1 //must be 1
1640
1641static int encode_q_branch(SnowContext *s, int level, int x, int y){
1642 uint8_t p_buffer[1024];
1643 uint8_t i_buffer[1024];
1644 uint8_t p_state[sizeof(s->block_state)];
1645 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
1646 RangeCoder pc, ic;
1647 uint8_t *pbbak= s->c.bytestream;
1648 uint8_t *pbbak_start= s->c.bytestream_start;
155ec6ed
MN
1649 int score, score2, iscore, i_len, p_len, block_s, sum;
1650 const int w= s->b_width << s->block_max_depth;
1651 const int h= s->b_height << s->block_max_depth;
1652 const int rem_depth= s->block_max_depth - level;
1653 const int index= (x + y*w) << rem_depth;
1654 const int block_w= 1<<(LOG2_MB_SIZE - level);
1655 static BlockNode null_block= { //FIXME add border maybe
1656 .color= {128,128,128},
1657 .mx= 0,
1658 .my= 0,
1659 .type= 0,
1660 .level= 0,
1661 };
1662 int trx= (x+1)<<rem_depth;
1663 int try= (y+1)<<rem_depth;
1664 BlockNode *left = x ? &s->block[index-1] : &null_block;
1665 BlockNode *top = y ? &s->block[index-w] : &null_block;
1666 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1667 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1668 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1669 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1670 int pl = left->color[0];
1671 int pcb= left->color[1];
1672 int pcr= left->color[2];
1673 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1674 int pmy= mid_pred(left->my, top->my, tr->my);
1675 int mx=0, my=0;
1676 int l,cr,cb, i;
1677 const int stride= s->current_picture.linesize[0];
1678 const int uvstride= s->current_picture.linesize[1];
1679 const int instride= s->input_picture.linesize[0];
1680 const int uvinstride= s->input_picture.linesize[1];
1681 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
1682 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1683 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1684 uint8_t current_mb[3][stride*block_w];
1685 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1686 int P[10][2];
1687 int16_t last_mv[3][2];
1688 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1689 const int shift= 1+qpel;
1690 MotionEstContext *c= &s->m.me;
1691 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1692 int my_context= av_log2(2*ABS(left->my - top->my));
1693 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1694
1695 assert(sizeof(s->block_state) >= 256);
1696 if(s->keyframe){
1697 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1698 return 0;
1699 }
1700
1701 //FIXME optimize
1702 for(i=0; i<block_w; i++)
1703 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
1704 for(i=0; i<block_w>>1; i++)
1705 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1706 for(i=0; i<block_w>>1; i++)
1707 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1708
1709// clip predictors / edge ?
1710
1711 P_LEFT[0]= left->mx;
1712 P_LEFT[1]= left->my;
1713 P_TOP [0]= top->mx;
1714 P_TOP [1]= top->my;
1715 P_TOPRIGHT[0]= tr->mx;
1716 P_TOPRIGHT[1]= tr->my;
1717
1718 last_mv[0][0]= s->block[index].mx;
1719 last_mv[0][1]= s->block[index].my;
1720 last_mv[1][0]= right->mx;
1721 last_mv[1][1]= right->my;
1722 last_mv[2][0]= bottom->mx;
1723 last_mv[2][1]= bottom->my;
1724
1725 s->m.mb_stride=2;
1726 s->m.mb_x=
1727 s->m.mb_y= 0;
1728 s->m.me.skip= 0;
1729
1730 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1731
1732 assert(s->m.me. stride == stride);
1733 assert(s->m.me.uvstride == uvstride);
1734
1735 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1736 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1737 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1738 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1739
ff158dc9
MN
1740 c->xmin = - x*block_w - 16+2;
1741 c->ymin = - y*block_w - 16+2;
1742 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1743 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
1744
1745 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1746 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1747 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1748 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1749 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1750 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1751 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1752
1753 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1754 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1755
1756 if (!y) {
1757 c->pred_x= P_LEFT[0];
1758 c->pred_y= P_LEFT[1];
1759 } else {
1760 c->pred_x = P_MEDIAN[0];
1761 c->pred_y = P_MEDIAN[1];
1762 }
1763
1764 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
1765 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1766
1767 assert(mx >= c->xmin);
1768 assert(mx <= c->xmax);
1769 assert(my >= c->ymin);
1770 assert(my <= c->ymax);
1771
1772 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1773 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1774 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1775
1776 // subpel search
1777 pc= s->c;
28869757
MN
1778 pc.bytestream_start=
1779 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1780 memcpy(p_state, s->block_state, sizeof(s->block_state));
1781
1782 if(level!=s->block_max_depth)
28869757
MN
1783 put_rac(&pc, &p_state[4 + s_context], 1);
1784 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
155ec6ed
MN
1785 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1786 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
28869757
MN
1787 p_len= pc.bytestream - pc.bytestream_start;
1788 score += (s->lambda2*(p_len*8
1789 + (pc.outstanding_count - s->c.outstanding_count)*8
1790 + (-av_log2(pc.range) + av_log2(s->c.range))
1791 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1792
1793 block_s= block_w*block_w;
1794 sum = pix_sum(&current_mb[0][0], stride, block_w);
1795 l= (sum + block_s/2)/block_s;
1796 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1797
1798 block_s= block_w*block_w>>2;
1799 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1800 cb= (sum + block_s/2)/block_s;
1801// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1802 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1803 cr= (sum + block_s/2)/block_s;
1804// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1805
1806 ic= s->c;
28869757
MN
1807 ic.bytestream_start=
1808 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1809 memcpy(i_state, s->block_state, sizeof(s->block_state));
1810 if(level!=s->block_max_depth)
28869757
MN
1811 put_rac(&ic, &i_state[4 + s_context], 1);
1812 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
1813 put_symbol(&ic, &i_state[32], l-pl , 1);
1814 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1815 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757
MN
1816 i_len= ic.bytestream - ic.bytestream_start;
1817 iscore += (s->lambda2*(i_len*8
1818 + (ic.outstanding_count - s->c.outstanding_count)*8
1819 + (-av_log2(ic.range) + av_log2(s->c.range))
1820 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1821
1822// assert(score==256*256*256*64-1);
1823 assert(iscore < 255*255*256 + s->lambda2*10);
1824 assert(iscore >= 0);
1825 assert(l>=0 && l<=255);
1826 assert(pl>=0 && pl<=255);
1827
1828 if(level==0){
1829 int varc= iscore >> 8;
1830 int vard= score >> 8;
1831 if (vard <= 64 || vard < varc)
1832 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1833 else
1834 c->scene_change_score+= s->m.qscale;
1835 }
1836
1837 if(level!=s->block_max_depth){
28869757 1838 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
1839 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1840 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1841 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1842 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1843 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1844
1845 if(score2 < score && score2 < iscore)
1846 return score2;
1847 }
1848
1849 if(iscore < score){
28869757 1850 memcpy(pbbak, i_buffer, i_len);
155ec6ed 1851 s->c= ic;
28869757
MN
1852 s->c.bytestream_start= pbbak_start;
1853 s->c.bytestream= pbbak + i_len;
155ec6ed
MN
1854 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1855 memcpy(s->block_state, i_state, sizeof(s->block_state));
1856 return iscore;
1857 }else{
28869757 1858 memcpy(pbbak, p_buffer, p_len);
155ec6ed 1859 s->c= pc;
28869757
MN
1860 s->c.bytestream_start= pbbak_start;
1861 s->c.bytestream= pbbak + p_len;
155ec6ed
MN
1862 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1863 memcpy(s->block_state, p_state, sizeof(s->block_state));
1864 return score;
1865 }
1866}
1867
1868static void decode_q_branch(SnowContext *s, int level, int x, int y){
1869 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
1870 const int rem_depth= s->block_max_depth - level;
1871 const int index= (x + y*w) << rem_depth;
1872 static BlockNode null_block= { //FIXME add border maybe
1873 .color= {128,128,128},
1874 .mx= 0,
1875 .my= 0,
1876 .type= 0,
1877 .level= 0,
1878 };
1879 int trx= (x+1)<<rem_depth;
155ec6ed
MN
1880 BlockNode *left = x ? &s->block[index-1] : &null_block;
1881 BlockNode *top = y ? &s->block[index-w] : &null_block;
1882 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1883 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1884 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1885
1886 if(s->keyframe){
1887 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1888 return;
1889 }
1890
28869757 1891 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
155ec6ed
MN
1892 int type;
1893 int l = left->color[0];
1894 int cb= left->color[1];
1895 int cr= left->color[2];
1896 int mx= mid_pred(left->mx, top->mx, tr->mx);
1897 int my= mid_pred(left->my, top->my, tr->my);
1898 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1899 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1900
28869757 1901 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
1902
1903 if(type){
1904 l += get_symbol(&s->c, &s->block_state[32], 1);
1905 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1906 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1907 }else{
1908 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1909 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1910 }
1911 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1912 }else{
1913 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1914 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1915 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1916 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1917 }
1918}
1919
1920static void encode_blocks(SnowContext *s){
1921 int x, y;
1922 int w= s->b_width;
1923 int h= s->b_height;
1924
1925 for(y=0; y<h; y++){
0ecca7a4
MN
1926 if(&s->c.bytestream_end - &s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
1927 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1928 return;
1929 }
155ec6ed
MN
1930 for(x=0; x<w; x++){
1931 encode_q_branch(s, 0, x, y);
1932 }
1933 }
1934}
1935
1936static void decode_blocks(SnowContext *s){
1937 int x, y;
1938 int w= s->b_width;
1939 int h= s->b_height;
1940
1941 for(y=0; y<h; y++){
1942 for(x=0; x<w; x++){
1943 decode_q_branch(s, 0, x, y);
1944 }
1945 }
791e7b83
MN
1946}
1947
1948static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1949 int x, y;
3924dac4 1950START_TIMER
791e7b83
MN
1951 for(y=0; y < b_h+5; y++){
1952 for(x=0; x < b_w; x++){
3924dac4
MN
1953 int a0= src[x ];
1954 int a1= src[x + 1];
1955 int a2= src[x + 2];
1956 int a3= src[x + 3];
1957 int a4= src[x + 4];
1958 int a5= src[x + 5];
791e7b83
MN
1959// int am= 9*(a1+a2) - (a0+a3);
1960 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1961// int am= 18*(a2+a3) - 2*(a1+a4);
1962// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1963// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1964
1965// if(b_w==16) am= 8*(a1+a2);
1966
3924dac4
MN
1967 if(dx<8) tmp[x]= (32*a2*( 8-dx) + am* dx + 128)>>8;
1968 else tmp[x]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
791e7b83
MN
1969
1970/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
1971 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
1972 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
1973 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1974 }
3924dac4
MN
1975 tmp += stride;
1976 src += stride;
791e7b83 1977 }
3924dac4
MN
1978 tmp -= (b_h+5)*stride;
1979
791e7b83
MN
1980 for(y=0; y < b_h; y++){
1981 for(x=0; x < b_w; x++){
3924dac4
MN
1982 int a0= tmp[x + 0*stride];
1983 int a1= tmp[x + 1*stride];
1984 int a2= tmp[x + 2*stride];
1985 int a3= tmp[x + 3*stride];
1986 int a4= tmp[x + 4*stride];
1987 int a5= tmp[x + 5*stride];
791e7b83
MN
1988 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1989// int am= 18*(a2+a3) - 2*(a1+a4);
1990/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1991 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1992
1993// if(b_w==16) am= 8*(a1+a2);
1994
3924dac4
MN
1995 if(dy<8) dst[x]= (32*a2*( 8-dy) + am* dy + 128)>>8;
1996 else dst[x]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83
MN
1997
1998/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
1999 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2000 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2001 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2002 }
3924dac4
MN
2003 dst += stride;
2004 tmp += stride;
791e7b83 2005 }
3924dac4 2006STOP_TIMER("mc_block")
791e7b83
MN
2007}
2008
791e7b83 2009#define mca(dx,dy,b_w)\
d92b5807 2010static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
791e7b83
MN
2011 uint8_t tmp[stride*(b_w+5)];\
2012 assert(h==b_w);\
2013 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2014}
2015
2016mca( 0, 0,16)
2017mca( 8, 0,16)
2018mca( 0, 8,16)
2019mca( 8, 8,16)
d92b5807
MN
2020mca( 0, 0,8)
2021mca( 8, 0,8)
2022mca( 0, 8,8)
2023mca( 8, 8,8)
791e7b83 2024
ff158dc9
MN
2025static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2026 if(block->type){
2027 int x, y;
2028 const int color= block->color[plane_index];
2029 for(y=0; y < b_h; y++){
2030 for(x=0; x < b_w; x++){
2031 dst[x + y*stride]= color;
2032 }
2033 }
2034 }else{
2035 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2036 int mx= block->mx*scale;
2037 int my= block->my*scale;
ec697587
MN
2038 const int dx= mx&15;
2039 const int dy= my&15;
ff158dc9
MN
2040 sx += (mx>>4) - 2;
2041 sy += (my>>4) - 2;
2042 src += sx + sy*stride;
2043 if( (unsigned)sx >= w - b_w - 4
2044 || (unsigned)sy >= h - b_h - 4){
2045 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2046 src= tmp + MB_SIZE;
2047 }
ec697587
MN
2048 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2049 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2050 else
2051 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
ff158dc9
MN
2052 }
2053}
2054
2055static always_inline int same_block(BlockNode *a, BlockNode *b){
2056 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2057}
2058
2059//FIXME name clenup (b_w, block_w, b_width stuff)
715a97f0 2060static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
ff158dc9
MN
2061 const int b_width = s->b_width << s->block_max_depth;
2062 const int b_height= s->b_height << s->block_max_depth;
2063 const int b_stride= b_width;
2064 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2065 BlockNode *rt= lt+1;
2066 BlockNode *lb= lt+b_stride;
2067 BlockNode *rb= lb+1;
2068 uint8_t *block[4];
2069 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
791e7b83
MN
2070 int x,y;
2071
ff158dc9
MN
2072 if(b_x<0){
2073 lt= rt;
2074 lb= rb;
2075 }else if(b_x + 1 >= b_width){
2076 rt= lt;
2077 rb= lb;
791e7b83 2078 }
ff158dc9
MN
2079 if(b_y<0){
2080 lt= lb;
2081 rt= rb;
2082 }else if(b_y + 1 >= b_height){
2083 lb= lt;
2084 rb= rt;
2085 }
2086
2087 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2088 obmc -= src_x;
2089 b_w += src_x;
2090 src_x=0;
2091 }else if(src_x + b_w > w){
2092 b_w = w - src_x;
2093 }
2094 if(src_y<0){
2095 obmc -= src_y*obmc_stride;
2096 b_h += src_y;
2097 src_y=0;
2098 }else if(src_y + b_h> h){
2099 b_h = h - src_y;
791e7b83 2100 }
620ab797 2101
ff158dc9 2102 if(b_w<=0 || b_h<=0) return;
155ec6ed 2103
ff158dc9
MN
2104assert(src_stride > 7*MB_SIZE);
2105 dst += src_x + src_y*dst_stride;
715a97f0 2106 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2107// src += src_x + src_y*src_stride;
2108
2109 block[0]= tmp+3*MB_SIZE;
2110 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2111
2112 if(same_block(lt, rt)){
2113 block[1]= block[0];
791e7b83 2114 }else{
ff158dc9
MN
2115 block[1]= tmp + 4*MB_SIZE;
2116 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2117 }
2118
2119 if(same_block(lt, lb)){
2120 block[2]= block[0];
2121 }else if(same_block(rt, lb)){
2122 block[2]= block[1];
2123 }else{
2124 block[2]= tmp+5*MB_SIZE;
2125 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2126 }
791e7b83 2127
ff158dc9
MN
2128 if(same_block(lt, rb) ){
2129 block[3]= block[0];
2130 }else if(same_block(rt, rb)){
2131 block[3]= block[1];
2132 }else if(same_block(lb, rb)){
2133 block[3]= block[2];
2134 }else{
2135 block[3]= tmp+6*MB_SIZE;
2136 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2137 }
2138#if 0
2139 for(y=0; y<b_h; y++){
2140 for(x=0; x<b_w; x++){
2141 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2142 if(add) dst[x + y*dst_stride] += v;
2143 else dst[x + y*dst_stride] -= v;
2144 }
2145 }
2146 for(y=0; y<b_h; y++){
2147 uint8_t *obmc2= obmc + (obmc_stride>>1);
2148 for(x=0; x<b_w; x++){
2149 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2150 if(add) dst[x + y*dst_stride] += v;
2151 else dst[x + y*dst_stride] -= v;
2152 }
2153 }
2154 for(y=0; y<b_h; y++){
2155 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2156 for(x=0; x<b_w; x++){
2157 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2158 if(add) dst[x + y*dst_stride] += v;
2159 else dst[x + y*dst_stride] -= v;
2160 }
2161 }
2162 for(y=0; y<b_h; y++){
2163 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2164 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2165 for(x=0; x<b_w; x++){
2166 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2167 if(add) dst[x + y*dst_stride] += v;
2168 else dst[x + y*dst_stride] -= v;
2169 }
2170 }
2171#else
2172 for(y=0; y<b_h; y++){
2173 //FIXME ugly missue of obmc_stride
2174 uint8_t *obmc1= obmc + y*obmc_stride;
2175 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2176 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2177 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2178 for(x=0; x<b_w; x++){
2179 int v= obmc1[x] * block[3][x + y*src_stride]
2180 +obmc2[x] * block[2][x + y*src_stride]
2181 +obmc3[x] * block[1][x + y*src_stride]
2182 +obmc4[x] * block[0][x + y*src_stride];
715a97f0
MN
2183
2184 v <<= 8 - LOG2_OBMC_MAX;
034aff03
MN
2185 if(FRAC_BITS != 8){
2186 v += 1<<(7 - FRAC_BITS);
2187 v >>= 8 - FRAC_BITS;
2188 }
715a97f0
MN
2189 if(add){
2190 v += dst[x + y*dst_stride];
2191 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2192 if(v&(~255)) v= ~(v>>31);
2193 dst8[x + y*src_stride] = v;
2194 }else{
2195 dst[x + y*dst_stride] -= v;
2196 }
791e7b83
MN
2197 }
2198 }
ff158dc9 2199#endif
791e7b83
MN
2200}
2201
ff158dc9 2202static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
791e7b83 2203 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2204 const int mb_w= s->b_width << s->block_max_depth;
2205 const int mb_h= s->b_height << s->block_max_depth;
791e7b83 2206 int x, y, mb_x, mb_y;
155ec6ed
MN
2207 int block_size = MB_SIZE >> s->block_max_depth;
2208 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2209 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
155ec6ed 2210 int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2211 int ref_stride= s->current_picture.linesize[plane_index];
791e7b83 2212 uint8_t *ref = s->last_picture.data[plane_index];
715a97f0 2213 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2214 int w= p->width;
2215 int h= p->height;
fff6d4ea 2216 START_TIMER
791e7b83 2217
ff158dc9 2218 if(s->keyframe || (s->avctx->debug&512)){
715a97f0
MN
2219 if(add){
2220 for(y=0; y<h; y++){
2221 for(x=0; x<w; x++){
2222 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2223 v >>= FRAC_BITS;
2224 if(v&(~255)) v= ~(v>>31);
2225 dst8[x + y*ref_stride]= v;
2226 }
2227 }
2228 }else{
2229 for(y=0; y<h; y++){
2230 for(x=0; x<w; x++){
2231 buf[x + y*w]-= 128<<FRAC_BITS;
2232 }
ff158dc9 2233 }
791e7b83 2234 }
ff158dc9
MN
2235
2236 return;
791e7b83
MN
2237 }
2238
ff158dc9
MN
2239 for(mb_y=0; mb_y<=mb_h; mb_y++){
2240 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2241 START_TIMER
ff158dc9 2242
715a97f0 2243 add_yblock(s, buf, dst8, ref, obmc,
ff158dc9 2244 block_w*mb_x - block_w/2,
791e7b83 2245 block_w*mb_y - block_w/2,
ff158dc9 2246 block_w, block_w,
791e7b83 2247 w, h,
ff158dc9
MN
2248 w, ref_stride, obmc_stride,
2249 mb_x - 1, mb_y - 1,
2250 add, plane_index);
2251
2252 STOP_TIMER("add_yblock")
791e7b83
MN
2253 }
2254 }
fff6d4ea
MN
2255
2256 STOP_TIMER("predict_plane")
791e7b83
MN
2257}
2258
2259static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2260 const int level= b->level;
2261 const int w= b->width;
2262 const int h= b->height;
2263 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2264 const int qmul= qexp[qlog&7]<<(qlog>>3);
da66b631
MN
2265 int x,y, thres1, thres2;
2266 START_TIMER
791e7b83
MN
2267
2268 assert(QROOT==8);
2269
93fbdb5a
MN
2270 if(s->qlog == LOSSLESS_QLOG) return;
2271
791e7b83 2272 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
2273 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2274 thres2= 2*thres1;
791e7b83
MN
2275
2276 if(!bias){
2277 for(y=0; y<h; y++){
2278 for(x=0; x<w; x++){
da66b631
MN
2279 int i= src[x + y*stride];
2280
2281 if((unsigned)(i+thres1) > thres2){
2282 if(i>=0){
2283 i<<= QEXPSHIFT;
2284 i/= qmul; //FIXME optimize
2285 src[x + y*stride]= i;
2286 }else{
2287 i= -i;
2288 i<<= QEXPSHIFT;
2289 i/= qmul; //FIXME optimize
2290 src[x + y*stride]= -i;
2291 }
2292 }else
2293 src[x + y*stride]= 0;
791e7b83
MN
2294 }
2295 }
2296 }else{
2297 for(y=0; y<h; y++){
2298 for(x=0; x<w; x++){
2299 int i= src[x + y*stride];
2300
da66b631
MN
2301 if((unsigned)(i+thres1) > thres2){
2302 if(i>=0){
2303 i<<= QEXPSHIFT;
2304 i= (i + bias) / qmul; //FIXME optimize
2305 src[x + y*stride]= i;
2306 }else{
2307 i= -i;
2308 i<<= QEXPSHIFT;
2309 i= (i + bias) / qmul; //FIXME optimize
2310 src[x + y*stride]= -i;
2311 }
2312 }else
2313 src[x + y*stride]= 0;
791e7b83
MN
2314 }
2315 }
2316 }
da66b631
MN
2317 if(level+1 == s->spatial_decomposition_count){
2318// STOP_TIMER("quantize")
2319 }
791e7b83
MN
2320}
2321
2322static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
791e7b83
MN
2323 const int w= b->width;
2324 const int h= b->height;
2325 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2326 const int qmul= qexp[qlog&7]<<(qlog>>3);
2327 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2328 int x,y;
ea7d9cd4 2329 START_TIMER
791e7b83 2330
93fbdb5a
MN
2331 if(s->qlog == LOSSLESS_QLOG) return;
2332
791e7b83
MN
2333 assert(QROOT==8);
2334
2335 for(y=0; y<h; y++){
2336 for(x=0; x<w; x++){
2337 int i= src[x + y*stride];
2338 if(i<0){
2339 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2340 }else if(i>0){
2341 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
2342 }
2343 }
2344 }
ea7d9cd4
MN
2345 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2346 STOP_TIMER("dquant")
2347 }
791e7b83
MN
2348}
2349
2350static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2351 const int w= b->width;
2352 const int h= b->height;
2353 int x,y;
2354
2355 for(y=h-1; y>=0; y--){
2356 for(x=w-1; x>=0; x--){
2357 int i= x + y*stride;
2358
2359 if(x){
2360 if(use_median){
2361 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2362 else src[i] -= src[i - 1];
2363 }else{
2364 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2365 else src[i] -= src[i - 1];
2366 }
2367 }else{
2368 if(y) src[i] -= src[i - stride];
2369 }
2370 }
2371 }
2372}
2373
2374static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2375 const int w= b->width;
2376 const int h= b->height;
2377 int x,y;
2378
2379 for(y=0; y<h; y++){
2380 for(x=0; x<w; x++){
2381 int i= x + y*stride;
2382
2383 if(x){
2384 if(use_median){
2385 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2386 else src[i] += src[i - 1];
2387 }else{
2388 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2389 else src[i] += src[i - 1];
2390 }
2391 }else{
2392 if(y) src[i] += src[i - stride];
2393 }
2394 }
2395 }
2396}
2397
2398static void encode_header(SnowContext *s){
2399 int plane_index, level, orientation;
28869757
MN
2400 uint8_t kstate[32];
2401
2402 memset(kstate, MID_STATE, sizeof(kstate));
791e7b83 2403
28869757 2404 put_rac(&s->c, kstate, s->keyframe);
19aa028d
MN
2405 if(s->keyframe || s->always_reset)
2406 reset_contexts(s);
791e7b83
MN
2407 if(s->keyframe){
2408 put_symbol(&s->c, s->header_state, s->version, 0);
28869757 2409 put_rac(&s->c, s->header_state, s->always_reset);
791e7b83
MN
2410 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2411 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2412 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2413 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
791e7b83
MN
2414 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2415 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
28869757
MN
2416 put_rac(&s->c, s->header_state, s->spatial_scalability);
2417// put_rac(&s->c, s->header_state, s->rate_scalability);
791e7b83
MN
2418
2419 for(plane_index=0; plane_index<2; plane_index++){
2420 for(level=0; level<s->spatial_decomposition_count; level++){
2421 for(orientation=level ? 1:0; orientation<4; orientation++){
2422 if(orientation==2) continue;
2423 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2424 }
2425 }
2426 }
2427 }
2428 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2429 put_symbol(&s->c, s->header_state, s->qlog, 1);
2430 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
2431 put_symbol(&s->c, s->header_state, s->qbias, 1);
155ec6ed 2432 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
791e7b83
MN
2433}
2434
2435static int decode_header(SnowContext *s){
2436 int plane_index, level, orientation;
28869757
MN
2437 uint8_t kstate[32];
2438
2439 memset(kstate, MID_STATE, sizeof(kstate));
791e7b83 2440
28869757 2441 s->keyframe= get_rac(&s->c, kstate);
19aa028d
MN
2442 if(s->keyframe || s->always_reset)
2443 reset_contexts(s);
791e7b83
MN
2444 if(s->keyframe){
2445 s->version= get_symbol(&s->c, s->header_state, 0);
2446 if(s->version>0){
2447 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2448 return -1;
2449 }
28869757 2450 s->always_reset= get_rac(&s->c, s->header_state);
791e7b83
MN
2451 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2452 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2453 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2454 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
2455 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2456 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
28869757
MN
2457 s->spatial_scalability= get_rac(&s->c, s->header_state);
2458// s->rate_scalability= get_rac(&s->c, s->header_state);
791e7b83
MN
2459
2460 for(plane_index=0; plane_index<3; plane_index++){
2461 for(level=0; level<s->spatial_decomposition_count; level++){
2462 for(orientation=level ? 1:0; orientation<4; orientation++){
2463 int q;
2464 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2465 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2466 else q= get_symbol(&s->c, s->header_state, 1);
2467 s->plane[plane_index].band[level][orientation].qlog= q;
2468 }
2469 }
2470 }
2471 }
2472
2473 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2474 if(s->spatial_decomposition_type > 2){
2475 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2476 return -1;
2477 }
2478
2479 s->qlog= get_symbol(&s->c, s->header_state, 1);
2480 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2481 s->qbias= get_symbol(&s->c, s->header_state, 1);
155ec6ed 2482 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
2483
2484 return 0;
2485}
2486
2487static int common_init(AVCodecContext *avctx){
2488 SnowContext *s = avctx->priv_data;
2489 int width, height;
2490 int level, orientation, plane_index, dec;
2491
2492 s->avctx= avctx;
2493
2494 dsputil_init(&s->dsp, avctx);
2495
2496#define mcf(dx,dy)\
2497 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
2498 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
d92b5807
MN
2499 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
2500 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
2501 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
2502 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
791e7b83
MN
2503
2504 mcf( 0, 0)
2505 mcf( 4, 0)
2506 mcf( 8, 0)
2507 mcf(12, 0)
2508 mcf( 0, 4)
2509 mcf( 4, 4)
2510 mcf( 8, 4)
2511 mcf(12, 4)
2512 mcf( 0, 8)
2513 mcf( 4, 8)
2514 mcf( 8, 8)
2515 mcf(12, 8)
2516 mcf( 0,12)
2517 mcf( 4,12)
2518 mcf( 8,12)
2519 mcf(12,12)
2520
2521#define mcfh(dx,dy)\
2522 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
2523 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
d92b5807
MN
2524 mc_block_hpel ## dx ## dy ## 16;\
2525 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
2526 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
2527 mc_block_hpel ## dx ## dy ## 8;
791e7b83
MN
2528
2529 mcfh(0, 0)
2530 mcfh(8, 0)
2531 mcfh(0, 8)
2532 mcfh(8, 8)
2533
2534 dec= s->spatial_decomposition_count= 5;
2535 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2536
2537 s->chroma_h_shift= 1; //FIXME XXX
2538 s->chroma_v_shift= 1;
2539
2540// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2541
155ec6ed
MN
2542 width= s->avctx->width;
2543 height= s->avctx->height;
2544
2545 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
791e7b83
MN
2546
2547 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
155ec6ed 2548 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
791e7b83
MN
2549
2550 for(plane_index=0; plane_index<3; plane_index++){
2551 int w= s->avctx->width;
2552 int h= s->avctx->height;
2553
2554 if(plane_index){
2555 w>>= s->chroma_h_shift;
2556 h>>= s->chroma_v_shift;
2557 }
2558 s->plane[plane_index].width = w;
2559 s->plane[plane_index].height= h;
3bb9f096 2560//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
791e7b83
MN
2561 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2562 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2563 SubBand *b= &s->plane[plane_index].band[level][orientation];
2564
2565 b->buf= s->spatial_dwt_buffer;
2566 b->level= level;
2567 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2568 b->width = (w + !(orientation&1))>>1;
2569 b->height= (h + !(orientation>1))>>1;
2570
2571 if(orientation&1) b->buf += (w+1)>>1;
2572 if(orientation>1) b->buf += b->stride>>1;
2573
2574 if(level)
2575 b->parent= &s->plane[plane_index].band[level-1][orientation];
0cea8a03
MN
2576 b->x = av_mallocz(((b->width+1) * b->height+1)*sizeof(int16_t));
2577 b->coeff= av_mallocz(((b->width+1) * b->height+1)*sizeof(DWTELEM));
791e7b83
MN
2578 }
2579 w= (w+1)>>1;
2580 h= (h+1)>>1;
2581 }
2582 }
2583
791e7b83
MN
2584 reset_contexts(s);
2585/*
2586 width= s->width= avctx->width;
2587 height= s->height= avctx->height;
2588
2589 assert(width && height);
2590*/
2591 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2592
2593 return 0;
2594}
2595
2596
2597static void calculate_vissual_weight(SnowContext *s, Plane *p){
2598 int width = p->width;
2599 int height= p->height;
39c61bbb 2600 int level, orientation, x, y;
791e7b83
MN
2601
2602 for(level=0; level<s->spatial_decomposition_count; level++){
2603 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2604 SubBand *b= &p->band[level][orientation];
2605 DWTELEM *buf= b->buf;
2606 int64_t error=0;
2607
2608 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2609 buf[b->width/2 + b->height/2*b->stride]= 256*256;
46c281e8 2610 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
2611 for(y=0; y<height; y++){
2612 for(x=0; x<width; x++){
2613 int64_t d= s->spatial_dwt_buffer[x + y*width];
2614 error += d*d;
2615 }
2616 }
2617
2618 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
46c281e8 2619// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
791e7b83
MN
2620 }
2621 }
2622}
2623
2624static int encode_init(AVCodecContext *avctx)
2625{
2626 SnowContext *s = avctx->priv_data;
39c61bbb 2627 int plane_index;
791e7b83 2628
2ff9ff5b
MN
2629 if(avctx->strict_std_compliance >= 0){
2630 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2631 "use vstrict=-1 to use it anyway\n");
2632 return -1;
2633 }
2634
791e7b83 2635 common_init(avctx);
155ec6ed 2636 alloc_blocks(s);
791e7b83
MN
2637
2638 s->version=0;
2639
2640 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2641 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2642 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
791e7b83
MN
2643 h263_encode_init(&s->m); //mv_penalty
2644
2645 for(plane_index=0; plane_index<3; plane_index++){
2646 calculate_vissual_weight(s, &s->plane[plane_index]);
2647 }
2648
2649
2650 avctx->coded_frame= &s->current_picture;
2651 switch(avctx->pix_fmt){
2652// case PIX_FMT_YUV444P:
2653// case PIX_FMT_YUV422P:
2654 case PIX_FMT_YUV420P:
2655 case PIX_FMT_GRAY8:
2656// case PIX_FMT_YUV411P:
2657// case PIX_FMT_YUV410P:
2658 s->colorspace_type= 0;
2659 break;
2660/* case PIX_FMT_RGBA32:
2661 s->colorspace= 1;
2662 break;*/
2663 default:
2664 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2665 return -1;
2666 }
2667// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2668 s->chroma_h_shift= 1;
2669 s->chroma_v_shift= 1;
2670 return 0;
2671}
2672
2673static int frame_start(SnowContext *s){
2674 AVFrame tmp;
64886072
MN
2675 int w= s->avctx->width; //FIXME round up to x16 ?
2676 int h= s->avctx->height;
791e7b83 2677
64886072
MN
2678 if(s->current_picture.data[0]){
2679 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
2680 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2681 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2682 }
2683
791e7b83
MN
2684 tmp= s->last_picture;
2685 s->last_picture= s->current_picture;
2686 s->current_picture= tmp;
2687
2688 s->current_picture.reference= 1;
2689 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2690 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2691 return -1;
2692 }
2693
2694 return 0;
2695}
2696
2697static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2698 SnowContext *s = avctx->priv_data;
28869757 2699 RangeCoder * const c= &s->c;
791e7b83
MN
2700 AVFrame *pict = data;
2701 const int width= s->avctx->width;
2702 const int height= s->avctx->height;
39c61bbb 2703 int level, orientation, plane_index;
791e7b83 2704
28869757
MN
2705 ff_init_range_encoder(c, buf, buf_size);
2706 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
791e7b83
MN
2707
2708 s->input_picture = *pict;
2709
791e7b83
MN
2710 s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2711 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2712
93fbdb5a
MN
2713 if(pict->quality){
2714 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2715 //<64 >60
2716 s->qlog += 61;
2717 }else{
2718 s->qlog= LOSSLESS_QLOG;
2719 }
791e7b83 2720
791e7b83 2721 frame_start(s);
19aa028d 2722 s->current_picture.key_frame= s->keyframe;
791e7b83
MN
2723
2724 if(pict->pict_type == P_TYPE){
2725 int block_width = (width +15)>>4;
2726 int block_height= (height+15)>>4;
2727 int stride= s->current_picture.linesize[0];
791e7b83
MN
2728
2729 assert(s->current_picture.data[0]);
2730 assert(s->last_picture.data[0]);
2731
2732 s->m.avctx= s->avctx;
2733 s->m.current_picture.data[0]= s->current_picture.data[0];
2734 s->m. last_picture.data[0]= s-> last_picture.data[0];
2735 s->m. new_picture.data[0]= s-> input_picture.data[0];
2736 s->m.current_picture_ptr= &s->m.current_picture;
2737 s->m. last_picture_ptr= &s->m. last_picture;
2738 s->m.linesize=
2739 s->m. last_picture.linesize[0]=
2740 s->m. new_picture.linesize[0]=
2741 s->m.current_picture.linesize[0]= stride;
155ec6ed 2742 s->m.uvlinesize= s->current_picture.linesize[1];
791e7b83
MN
2743 s->m.width = width;
2744 s->m.height= height;
2745 s->m.mb_width = block_width;
2746 s->m.mb_height= block_height;
2747 s->m.mb_stride= s->m.mb_width+1;
2748 s->m.b8_stride= 2*s->m.mb_width+1;
2749 s->m.f_code=1;
2750 s->m.pict_type= pict->pict_type;
2751 s->m.me_method= s->avctx->me_method;
2752 s->m.me.scene_change_score=0;
2753 s->m.flags= s->avctx->flags;
2754 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2755 s->m.out_format= FMT_H263;
2756 s->m.unrestricted_mv= 1;
2757
155ec6ed 2758 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
791e7b83 2759 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
155ec6ed 2760 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
791e7b83 2761
791e7b83
MN
2762 s->m.dsp= s->dsp; //move
2763 ff_init_me(&s->m);
791e7b83 2764 }
791e7b83 2765
155ec6ed
MN
2766redo_frame:
2767
791e7b83
MN
2768 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2769
2770 encode_header(s);
155ec6ed
MN
2771 encode_blocks(s);
2772
791e7b83
MN
2773 for(plane_index=0; plane_index<3; plane_index++){
2774 Plane *p= &s->plane[plane_index];
2775 int w= p->width;
2776 int h= p->height;
2777 int x, y;
39c61bbb 2778// int bits= put_bits_count(&s->c.pb);
791e7b83
MN
2779
2780 //FIXME optimize
791e7b83
MN
2781 if(pict->data[plane_index]) //FIXME gray hack
2782 for(y=0; y<h; y++){
2783 for(x=0; x<w; x++){
034aff03 2784 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
791e7b83
MN
2785 }
2786 }
2787 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
155ec6ed
MN
2788
2789 if( plane_index==0
2790 && pict->pict_type == P_TYPE
2791 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
28869757
MN
2792 ff_init_range_encoder(c, buf, buf_size);
2793 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
155ec6ed
MN
2794 pict->pict_type= FF_I_TYPE;
2795 s->keyframe=1;
2796 reset_contexts(s);
2797 goto redo_frame;
2798 }
2799
93fbdb5a
MN
2800 if(s->qlog == LOSSLESS_QLOG){
2801 for(y=0; y<h; y++){
2802 for(x=0; x<w; x++){
034aff03 2803 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1)))>>FRAC_BITS;
93fbdb5a
MN
2804 }
2805 }
2806 }
791e7b83 2807
46c281e8 2808 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a 2809
791e7b83
MN
2810 for(level=0; level<s->spatial_decomposition_count; level++){
2811 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2812 SubBand *b= &p->band[level][orientation];
2813
2814 quantize(s, b, b->buf, b->stride, s->qbias);
2815 if(orientation==0)
2816 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2817 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2818 assert(b->parent==NULL || b->parent->stride == b->stride*2);
2819 if(orientation==0)
2820 correlate(s, b, b->buf, b->stride, 1, 0);
2821 }
2822 }
2823// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2824
2825 for(level=0; level<s->spatial_decomposition_count; level++){
2826 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2827 SubBand *b= &p->band[level][orientation];
2828
2829 dequantize(s, b, b->buf, b->stride);
2830 }
2831 }
93fbdb5a 2832
46c281e8 2833 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a
MN
2834 if(s->qlog == LOSSLESS_QLOG){
2835 for(y=0; y<h; y++){
2836 for(x=0; x<w; x++){
034aff03 2837 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
93fbdb5a
MN
2838 }
2839 }
2840 }
715a97f0 2841{START_TIMER
791e7b83 2842 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
715a97f0 2843STOP_TIMER("pred-conv")}
791e7b83
MN
2844 if(s->avctx->flags&CODEC_FLAG_PSNR){
2845 int64_t error= 0;
2846
2847 if(pict->data[plane_index]) //FIXME gray hack
2848 for(y=0; y<h; y++){
2849 for(x=0; x<w; x++){
93fbdb5a 2850 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
791e7b83
MN
2851 error += d*d;
2852 }
2853 }
791e7b83 2854 s->avctx->error[plane_index] += error;
bd368b56 2855 s->current_picture.error[plane_index] = error;
791e7b83
MN
2856 }
2857 }
2858
2859 if(s->last_picture.data[0])
2860 avctx->release_buffer(avctx, &s->last_picture);
2861
2862 emms_c();
2863
28869757 2864 return ff_rac_terminate(c);
791e7b83
MN
2865}
2866
2867static void common_end(SnowContext *s){
7b49c309
MN
2868 int plane_index, level, orientation;
2869
791e7b83 2870 av_freep(&s->spatial_dwt_buffer);
791e7b83
MN
2871
2872 av_freep(&s->m.me.scratchpad);
2873 av_freep(&s->m.me.map);
2874 av_freep(&s->m.me.score_map);
155ec6ed
MN
2875
2876 av_freep(&s->block);
7b49c309
MN
2877
2878 for(plane_index=0; plane_index<3; plane_index++){
2879 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2880 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2881 SubBand *b= &s->plane[plane_index].band[level][orientation];
2882
2883 av_freep(&b->x);
0cea8a03 2884 av_freep(&b->coeff);
7b49c309
MN
2885 }
2886 }
2887 }
791e7b83
MN
2888}
2889
2890static int encode_end(AVCodecContext *avctx)
2891{
2892 SnowContext *s = avctx->priv_data;
2893
2894 common_end(s);
2895
2896 return 0;
2897}
2898
2899static int decode_init(AVCodecContext *avctx)
2900{
2901// SnowContext *s = avctx->priv_data;
2902
2903 common_init(avctx);
2904
2905 return 0;
2906}
2907
2908static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2909 SnowContext *s = avctx->priv_data;
28869757 2910 RangeCoder * const c= &s->c;
791e7b83
MN
2911 int bytes_read;
2912 AVFrame *picture = data;
39c61bbb 2913 int level, orientation, plane_index;
791e7b83
MN
2914
2915
2916 /* no supplementary picture */
2917 if (buf_size == 0)
2918 return 0;
2919
28869757
MN
2920 ff_init_range_decoder(c, buf, buf_size);
2921 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
791e7b83 2922
791e7b83
MN
2923 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2924 decode_header(s);
155ec6ed 2925 if(!s->block) alloc_blocks(s);
791e7b83
MN
2926
2927 frame_start(s);
2928 //keyframe flag dupliaction mess FIXME
2929 if(avctx->debug&FF_DEBUG_PICT_INFO)
2930 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2931
155ec6ed 2932 decode_blocks(s);
791e7b83
MN
2933
2934 for(plane_index=0; plane_index<3; plane_index++){
2935 Plane *p= &s->plane[plane_index];
2936 int w= p->width;
2937 int h= p->height;
2938 int x, y;
2939
2940if(s->avctx->debug&2048){
2941 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2942 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2943
2944 for(y=0; y<h; y++){
2945 for(x=0; x<w; x++){
715a97f0 2946 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
791e7b83
MN
2947 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2948 }
2949 }
2950}
2951 for(level=0; level<s->spatial_decomposition_count; level++){
2952 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2953 SubBand *b= &p->band[level][orientation];
2954
2955 decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
ea7d9cd4 2956 if(orientation==0){
791e7b83 2957 correlate(s, b, b->buf, b->stride, 1, 0);
ea7d9cd4
MN
2958 dequantize(s, b, b->buf, b->stride);
2959 assert(b->buf == s->spatial_dwt_buffer);
2960 }
791e7b83
MN
2961 }
2962 }
2963
46c281e8 2964 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a
MN
2965 if(s->qlog == LOSSLESS_QLOG){
2966 for(y=0; y<h; y++){
2967 for(x=0; x<w; x++){
034aff03 2968 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
93fbdb5a
MN
2969 }
2970 }
2971 }
715a97f0 2972{START_TIMER
791e7b83 2973 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
715a97f0 2974STOP_TIMER("predict_plane conv2")}
791e7b83
MN
2975 }
2976
2977 emms_c();
2978
2979 if(s->last_picture.data[0])
2980 avctx->release_buffer(avctx, &s->last_picture);
2981
2982if(!(s->avctx->debug&2048))
2983 *picture= s->current_picture;
2984else
2985 *picture= s->mconly_picture;
2986
2987 *data_size = sizeof(AVFrame);
2988
28869757
MN
2989 bytes_read= c->bytestream - c->bytestream_start;
2990 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
791e7b83
MN
2991
2992 return bytes_read;
2993}
2994
2995static int decode_end(AVCodecContext *avctx)
2996{
2997 SnowContext *s = avctx->priv_data;
2998
2999 common_end(s);
3000
3001 return 0;
3002}
3003
3004AVCodec snow_decoder = {
3005 "snow",
3006 CODEC_TYPE_VIDEO,
3007 CODEC_ID_SNOW,
3008 sizeof(SnowContext),
3009 decode_init,
3010 NULL,
3011 decode_end,
3012 decode_frame,
3013 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3014 NULL
3015};
3016
8ff53f5b 3017#ifdef CONFIG_ENCODERS
791e7b83
MN
3018AVCodec snow_encoder = {
3019 "snow",
3020 CODEC_TYPE_VIDEO,
3021 CODEC_ID_SNOW,
3022 sizeof(SnowContext),
3023 encode_init,
3024 encode_frame,
3025 encode_end,
3026};
8ff53f5b 3027#endif
791e7b83
MN
3028
3029
3030#if 0
3031#undef malloc
3032#undef free
3033#undef printf
3034
3035int main(){
3036 int width=256;
3037 int height=256;
3038 int buffer[2][width*height];
3039 SnowContext s;
3040 int i;
3041 s.spatial_decomposition_count=6;
3042 s.spatial_decomposition_type=1;
3043
3044 printf("testing 5/3 DWT\n");
3045 for(i=0; i<width*height; i++)
3046 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3047
46c281e8
MN
3048 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3049 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3050
3051 for(i=0; i<width*height; i++)
3052 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3053
3054 printf("testing 9/7 DWT\n");
3055 s.spatial_decomposition_type=0;
3056 for(i=0; i<width*height; i++)
3057 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3058
46c281e8
MN
3059 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3060 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3061
3062 for(i=0; i<width*height; i++)
3063 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3064
3065 printf("testing AC coder\n");
3066 memset(s.header_state, 0, sizeof(s.header_state));
28869757 3067 ff_init_range_encoder(&s.c, buffer[0], 256*256);
791e7b83
MN
3068 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3069
3070 for(i=-256; i<256; i++){
3071START_TIMER
3072 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3073STOP_TIMER("put_symbol")
3074 }
28869757 3075 ff_rac_terminate(&s.c);
791e7b83
MN
3076
3077 memset(s.header_state, 0, sizeof(s.header_state));
28869757 3078 ff_init_range_decoder(&s.c, buffer[0], 256*256);
791e7b83
MN
3079 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3080
3081 for(i=-256; i<256; i++){
3082 int j;
3083START_TIMER
3084 j= get_symbol(&s.c, s.header_state, 1);
3085STOP_TIMER("get_symbol")
3086 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3087 }
3088{
3089int level, orientation, x, y;
3090int64_t errors[8][4];
3091int64_t g=0;
3092
3093 memset(errors, 0, sizeof(errors));
3094 s.spatial_decomposition_count=3;
3095 s.spatial_decomposition_type=0;
3096 for(level=0; level<s.spatial_decomposition_count; level++){
3097 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3098 int w= width >> (s.spatial_decomposition_count-level);
3099 int h= height >> (s.spatial_decomposition_count-level);
3100 int stride= width << (s.spatial_decomposition_count-level);
3101 DWTELEM *buf= buffer[0];
3102 int64_t error=0;
3103
3104 if(orientation&1) buf+=w;
3105 if(orientation>1) buf+=stride>>1;
3106
3107 memset(buffer[0], 0, sizeof(int)*width*height);
3108 buf[w/2 + h/2*stride]= 256*256;
46c281e8 3109 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3110 for(y=0; y<height; y++){
3111 for(x=0; x<width; x++){
3112 int64_t d= buffer[0][x + y*width];
3113 error += d*d;
3114 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3115 }
3116 if(ABS(height/2-y)<9 && level==2) printf("\n");
3117 }
3118 error= (int)(sqrt(error)+0.5);
3119 errors[level][orientation]= error;
3120 if(g) g=ff_gcd(g, error);
3121 else g= error;
3122 }
3123 }
3124 printf("static int const visual_weight[][4]={\n");
3125 for(level=0; level<s.spatial_decomposition_count; level++){
3126 printf(" {");
3127 for(orientation=0; orientation<4; orientation++){
3128 printf("%8lld,", errors[level][orientation]/g);
3129 }
3130 printf("},\n");
3131 }
3132 printf("};\n");
3133 {
3134 int level=2;
3135 int orientation=3;
3136 int w= width >> (s.spatial_decomposition_count-level);
3137 int h= height >> (s.spatial_decomposition_count-level);
3138 int stride= width << (s.spatial_decomposition_count-level);
3139 DWTELEM *buf= buffer[0];
3140 int64_t error=0;
3141
3142 buf+=w;
3143 buf+=stride>>1;
3144
3145 memset(buffer[0], 0, sizeof(int)*width*height);
3146#if 1
3147 for(y=0; y<height; y++){
3148 for(x=0; x<width; x++){
3149 int tab[4]={0,2,3,1};
3150 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3151 }
3152 }
46c281e8 3153 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3154#else
3155 for(y=0; y<h; y++){
3156 for(x=0; x<w; x++){
3157 buf[x + y*stride ]=169;
3158 buf[x + y*stride-w]=64;
3159 }
3160 }
46c281e8 3161 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3162#endif
3163 for(y=0; y<height; y++){
3164 for(x=0; x<width; x++){
3165 int64_t d= buffer[0][x + y*width];
3166 error += d*d;
3167 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3168 }
3169 if(ABS(height/2-y)<9) printf("\n");
3170 }
3171 }
3172
3173}
3174 return 0;
3175}
3176#endif
3177