snow regression test
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
28869757
MN
22
23#include "rangecoder.h"
24#define MID_STATE 128
791e7b83
MN
25
26#include "mpegvideo.h"
27
28#undef NDEBUG
29#include <assert.h>
30
31#define MAX_DECOMPOSITIONS 8
32#define MAX_PLANES 4
33#define DWTELEM int
34#define QROOT 8
93fbdb5a 35#define LOSSLESS_QLOG -128
034aff03 36#define FRAC_BITS 8
791e7b83
MN
37
38static const int8_t quant3[256]={
39 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
55};
56static const int8_t quant3b[256]={
57 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73};
74static const int8_t quant5[256]={
75 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
88-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
89-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
90-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
91};
92static const int8_t quant7[256]={
93 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
99 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
100 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
101-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
104-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
105-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
106-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
107-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
109};
110static const int8_t quant9[256]={
111 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
117 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
118 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
119-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
123-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
124-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
125-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
126-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
127};
128static const int8_t quant11[256]={
129 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
135 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
136 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
137-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
140-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
141-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
142-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
143-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
145};
146static const int8_t quant13[256]={
147 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
148 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
153 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
154 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
155-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
157-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
158-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
159-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
160-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
162-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
163};
164
715a97f0
MN
165#define LOG2_OBMC_MAX 6
166#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
791e7b83
MN
167#if 0 //64*cubic
168static const uint8_t obmc32[1024]={
169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
171 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
172 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
173 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
174 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
175 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
176 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
177 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
178 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
179 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
180 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
181 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
182 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
184 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
185 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
186 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
187 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
188 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
192 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
193 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
196 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
197 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
198 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
199 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201//error:0.000022
202};
203static const uint8_t obmc16[256]={
204 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
205 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
206 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
207 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
208 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
209 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
211 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
212 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
213 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
214 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
215 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
219 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
220//error:0.000033
221};
222#elif 1 // 64*linear
223static const uint8_t obmc32[1024]={
224 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
225 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
226 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
227 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
228 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
229 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
230 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
231 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
232 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
233 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
234 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
235 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
236 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
237 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
239 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
240 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
241 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
242 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
243 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
244 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
245 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
246 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
247 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
248 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
251 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
252 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
253 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
254 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
255 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
256 //error:0.000020
257};
258static const uint8_t obmc16[256]={
259 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
260 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
261 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
262 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
263 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
264 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
266 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
267 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
268 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
269 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
270 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
271 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
272 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
273 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
274 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
275//error:0.000015
276};
277#else //64*cos
278static const uint8_t obmc32[1024]={
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
282 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
283 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
284 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
285 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
286 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
287 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
288 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
289 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
290 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
291 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
292 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
294 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
295 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
296 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
297 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
298 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
302 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
303 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
306 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
307 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
308 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
309 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311//error:0.000022
312};
313static const uint8_t obmc16[256]={
314 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
315 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
316 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
317 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
318 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
319 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
321 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
322 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
323 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
324 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
325 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
329 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
330//error:0.000022
331};
332#endif
333
155ec6ed
MN
334//linear *64
335static const uint8_t obmc8[64]={
336 1, 3, 5, 7, 7, 5, 3, 1,
337 3, 9,15,21,21,15, 9, 3,
338 5,15,25,35,35,25,15, 5,
339 7,21,35,49,49,35,21, 7,
340 7,21,35,49,49,35,21, 7,
341 5,15,25,35,35,25,15, 5,
342 3, 9,15,21,21,15, 9, 3,
343 1, 3, 5, 7, 7, 5, 3, 1,
344//error:0.000000
345};
346
347//linear *64
348static const uint8_t obmc4[16]={
349 4,12,12, 4,
35012,36,36,12,
35112,36,36,12,
352 4,12,12, 4,
353//error:0.000000
354};
355
356static const uint8_t *obmc_tab[4]={
357 obmc32, obmc16, obmc8, obmc4
358};
359
360typedef struct BlockNode{
361 int16_t mx;
362 int16_t my;
363 uint8_t color[3];
364 uint8_t type;
365//#define TYPE_SPLIT 1
366#define BLOCK_INTRA 1
367//#define TYPE_NOCOLOR 4
368 uint8_t level; //FIXME merge into type?
369}BlockNode;
370
371#define LOG2_MB_SIZE 4
372#define MB_SIZE (1<<LOG2_MB_SIZE)
373
791e7b83
MN
374typedef struct SubBand{
375 int level;
376 int stride;
377 int width;
378 int height;
379 int qlog; ///< log(qscale)/log[2^(1/6)]
380 DWTELEM *buf;
7b49c309 381 int16_t *x;
0cea8a03 382 DWTELEM *coeff;
791e7b83
MN
383 struct SubBand *parent;
384 uint8_t state[/*7*2*/ 7 + 512][32];
385}SubBand;
386
387typedef struct Plane{
388 int width;
389 int height;
390 SubBand band[MAX_DECOMPOSITIONS][4];
391}Plane;
392
393typedef struct SnowContext{
394// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
395
396 AVCodecContext *avctx;
28869757 397 RangeCoder c;
791e7b83
MN
398 DSPContext dsp;
399 AVFrame input_picture;
400 AVFrame current_picture;
401 AVFrame last_picture;
402 AVFrame mconly_picture;
403// uint8_t q_context[16];
404 uint8_t header_state[32];
155ec6ed 405 uint8_t block_state[128 + 32*128];
791e7b83 406 int keyframe;
19aa028d 407 int always_reset;
791e7b83
MN
408 int version;
409 int spatial_decomposition_type;
410 int temporal_decomposition_type;
411 int spatial_decomposition_count;
412 int temporal_decomposition_count;
413 DWTELEM *spatial_dwt_buffer;
791e7b83
MN
414 int colorspace_type;
415 int chroma_h_shift;
416 int chroma_v_shift;
417 int spatial_scalability;
418 int qlog;
155ec6ed
MN
419 int lambda;
420 int lambda2;
791e7b83
MN
421 int mv_scale;
422 int qbias;
423#define QBIAS_SHIFT 3
155ec6ed
MN
424 int b_width;
425 int b_height;
426 int block_max_depth;
791e7b83 427 Plane plane[MAX_PLANES];
155ec6ed
MN
428 BlockNode *block;
429
791e7b83
MN
430 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
431}SnowContext;
432
f9e6ebf7
LM
433typedef struct {
434 DWTELEM *b0;
435 DWTELEM *b1;
436 DWTELEM *b2;
437 DWTELEM *b3;
438 int y;
439} dwt_compose_t;
440
2554db9b
MN
441#ifdef __sgi
442// Avoid a name clash on SGI IRIX
443#undef qexp
444#endif
034aff03 445#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
791e7b83
MN
446static const uint8_t qexp[8]={
447 128, 140, 152, 166, 181, 197, 215, 235
448// 64, 70, 76, 83, 91, 99, 108, 117
449// 32, 35, 38, 41, 45, 49, 54, 59
450// 16, 17, 19, 21, 23, 25, 27, 29
451// 8, 9, 10, 10, 11, 12, 13, 15
452};
453
454static inline int mirror(int v, int m){
455 if (v<0) return -v;
456 else if(v>m) return 2*m-v;
457 else return v;
458}
459
28869757 460static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
791e7b83
MN
461 int i;
462
463 if(v){
464 const int a= ABS(v);
465 const int e= av_log2(a);
466#if 1
467 const int el= FFMIN(e, 10);
28869757 468 put_rac(c, state+0, 0);
791e7b83
MN
469
470 for(i=0; i<el; i++){
28869757 471 put_rac(c, state+1+i, 1); //1..10
791e7b83
MN
472 }
473 for(; i<e; i++){
28869757 474 put_rac(c, state+1+9, 1); //1..10
791e7b83 475 }
28869757 476 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
477
478 for(i=e-1; i>=el; i--){
28869757 479 put_rac(c, state+22+9, (a>>i)&1); //22..31
791e7b83
MN
480 }
481 for(; i>=0; i--){
28869757 482 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
483 }
484
485 if(is_signed)
28869757 486 put_rac(c, state+11 + el, v < 0); //11..21
791e7b83
MN
487#else
488
28869757 489 put_rac(c, state+0, 0);
791e7b83
MN
490 if(e<=9){
491 for(i=0; i<e; i++){
28869757 492 put_rac(c, state+1+i, 1); //1..10
791e7b83 493 }
28869757 494 put_rac(c, state+1+i, 0);
791e7b83
MN
495
496 for(i=e-1; i>=0; i--){
28869757 497 put_rac(c, state+22+i, (a>>i)&1); //22..31
791e7b83
MN
498 }
499
500 if(is_signed)
28869757 501 put_rac(c, state+11 + e, v < 0); //11..21
791e7b83
MN
502 }else{
503 for(i=0; i<e; i++){
28869757 504 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
791e7b83 505 }
28869757 506 put_rac(c, state+1+FFMIN(i,9), 0);
791e7b83
MN
507
508 for(i=e-1; i>=0; i--){
28869757 509 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
791e7b83
MN
510 }
511
512 if(is_signed)
28869757 513 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
791e7b83
MN
514 }
515#endif
516 }else{
28869757 517 put_rac(c, state+0, 1);
791e7b83
MN
518 }
519}
520
28869757
MN
521static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
522 if(get_rac(c, state+0))
791e7b83
MN
523 return 0;
524 else{
7c2425d2
LM
525 int i, e, a;
526 e= 0;
28869757 527 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
7c2425d2 528 e++;
791e7b83 529 }
7c2425d2 530
791e7b83 531 a= 1;
7c2425d2 532 for(i=e-1; i>=0; i--){
28869757 533 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
534 }
535
28869757 536 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
537 return -a;
538 else
539 return a;
540 }
541}
542
28869757 543static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
4f4e9633 544 int i;
0635cbfc 545 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
546
547 assert(v>=0);
0635cbfc
MN
548 assert(log2>=-4);
549
550 while(v >= r){
28869757 551 put_rac(c, state+4+log2, 1);
0635cbfc 552 v -= r;
4f4e9633 553 log2++;
0635cbfc 554 if(log2>0) r+=r;
4f4e9633 555 }
28869757 556 put_rac(c, state+4+log2, 0);
4f4e9633
MN
557
558 for(i=log2-1; i>=0; i--){
28869757 559 put_rac(c, state+31-i, (v>>i)&1);
4f4e9633 560 }
4f4e9633
MN
561}
562
28869757 563static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
4f4e9633 564 int i;
0635cbfc 565 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
566 int v=0;
567
0635cbfc
MN
568 assert(log2>=-4);
569
28869757 570 while(get_rac(c, state+4+log2)){
0635cbfc 571 v+= r;
4f4e9633 572 log2++;
0635cbfc 573 if(log2>0) r+=r;
4f4e9633
MN
574 }
575
576 for(i=log2-1; i>=0; i--){
28869757 577 v+= get_rac(c, state+31-i)<<i;
4f4e9633
MN
578 }
579
580 return v;
581}
582
791e7b83
MN
583static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
584 const int mirror_left= !highpass;
585 const int mirror_right= (width&1) ^ highpass;
586 const int w= (width>>1) - 1 + (highpass & width);
587 int i;
588
589#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
590 if(mirror_left){
591 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
592 dst += dst_step;
593 src += src_step;
594 }
595
596 for(i=0; i<w; i++){
597 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
598 }
599
600 if(mirror_right){
601 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
602 }
603}
604
605static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
606 const int mirror_left= !highpass;
607 const int mirror_right= (width&1) ^ highpass;
608 const int w= (width>>1) - 1 + (highpass & width);
609 int i;
610
611 if(mirror_left){
612 int r= 3*2*ref[0];
613 r += r>>4;
614 r += r>>8;
615 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
616 dst += dst_step;
617 src += src_step;
618 }
619
620 for(i=0; i<w; i++){
621 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
622 r += r>>4;
623 r += r>>8;
624 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
625 }
626
627 if(mirror_right){
628 int r= 3*2*ref[w*ref_step];
629 r += r>>4;
630 r += r>>8;
631 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
632 }
633}
634
635
aa25a462 636static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
637 int x, i;
638
639 for(x=start; x<width; x+=2){
640 int64_t sum=0;
641
642 for(i=0; i<n; i++){
643 int x2= x + 2*i - n + 1;
644 if (x2< 0) x2= -x2;
645 else if(x2>=width) x2= 2*width-x2-2;
646 sum += coeffs[i]*(int64_t)dst[x2];
647 }
648 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
649 else dst[x] += (sum + (1<<shift)/2)>>shift;
650 }
651}
652
aa25a462 653static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
654 int x, y, i;
655 for(y=start; y<height; y+=2){
656 for(x=0; x<width; x++){
657 int64_t sum=0;
658
659 for(i=0; i<n; i++){
660 int y2= y + 2*i - n + 1;
661 if (y2< 0) y2= -y2;
662 else if(y2>=height) y2= 2*height-y2-2;
663 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
664 }
665 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
666 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
667 }
668 }
669}
670
671#define SCALEX 1
672#define LX0 0
673#define LX1 1
674
de890c9b 675#if 0 // more accurate 9/7
791e7b83
MN
676#define N1 2
677#define SHIFT1 14
678#define COEFFS1 (int[]){-25987,-25987}
679#define N2 2
680#define SHIFT2 19
681#define COEFFS2 (int[]){-27777,-27777}
682#define N3 2
683#define SHIFT3 15
684#define COEFFS3 (int[]){28931,28931}
685#define N4 2
686#define SHIFT4 15
687#define COEFFS4 (int[]){14533,14533}
688#elif 1 // 13/7 CRF
689#define N1 4
690#define SHIFT1 4
691#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 692#define N2 4
791e7b83
MN
693#define SHIFT2 4
694#define COEFFS2 (int[]){-1,5,5,-1}
695#define N3 0
696#define SHIFT3 1
697#define COEFFS3 NULL
698#define N4 0
699#define SHIFT4 1
700#define COEFFS4 NULL
701#elif 1 // 3/5
702#define LX0 1
703#define LX1 0
704#define SCALEX 0.5
705#define N1 2
706#define SHIFT1 1
707#define COEFFS1 (int[]){1,1}
708#define N2 2
709#define SHIFT2 2
710#define COEFFS2 (int[]){-1,-1}
711#define N3 0
712#define SHIFT3 0
713#define COEFFS3 NULL
714#define N4 0
715#define SHIFT4 0
716#define COEFFS4 NULL
717#elif 1 // 11/5
718#define N1 0
719#define SHIFT1 1
720#define COEFFS1 NULL
721#define N2 2
722#define SHIFT2 2
723#define COEFFS2 (int[]){-1,-1}
724#define N3 2
725#define SHIFT3 0
726#define COEFFS3 (int[]){-1,-1}
727#define N4 4
728#define SHIFT4 7
729#define COEFFS4 (int[]){-5,29,29,-5}
730#define SCALEX 4
731#elif 1 // 9/7 CDF
732#define N1 2
733#define SHIFT1 7
734#define COEFFS1 (int[]){-203,-203}
735#define N2 2
736#define SHIFT2 12
737#define COEFFS2 (int[]){-217,-217}
738#define N3 2
739#define SHIFT3 7
740#define COEFFS3 (int[]){113,113}
741#define N4 2
742#define SHIFT4 9
743#define COEFFS4 (int[]){227,227}
744#define SCALEX 1
745#elif 1 // 7/5 CDF
746#define N1 0
747#define SHIFT1 1
748#define COEFFS1 NULL
749#define N2 2
750#define SHIFT2 2
751#define COEFFS2 (int[]){-1,-1}
752#define N3 2
753#define SHIFT3 0
754#define COEFFS3 (int[]){-1,-1}
755#define N4 2
756#define SHIFT4 4
757#define COEFFS4 (int[]){3,3}
758#elif 1 // 9/7 MN
759#define N1 4
760#define SHIFT1 4
761#define COEFFS1 (int[]){1,-9,-9,1}
762#define N2 2
763#define SHIFT2 2
764#define COEFFS2 (int[]){1,1}
765#define N3 0
766#define SHIFT3 1
767#define COEFFS3 NULL
768#define N4 0
769#define SHIFT4 1
770#define COEFFS4 NULL
771#else // 13/7 CRF
772#define N1 4
773#define SHIFT1 4
774#define COEFFS1 (int[]){1,-9,-9,1}
775#define N2 4
776#define SHIFT2 4
777#define COEFFS2 (int[]){-1,5,5,-1}
778#define N3 0
779#define SHIFT3 1
780#define COEFFS3 NULL
781#define N4 0
782#define SHIFT4 1
783#define COEFFS4 NULL
784#endif
aa25a462
RFI
785static void horizontal_decomposeX(DWTELEM *b, int width){
786 DWTELEM temp[width];
791e7b83
MN
787 const int width2= width>>1;
788 const int w2= (width+1)>>1;
789 int A1,A2,A3,A4, x;
790
791 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
792 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
793 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
794 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
795
796 for(x=0; x<width2; x++){
797 temp[x ]= b[2*x ];
798 temp[x+w2]= b[2*x + 1];
799 }
800 if(width&1)
801 temp[x ]= b[2*x ];
802 memcpy(b, temp, width*sizeof(int));
803}
804
aa25a462
RFI
805static void horizontal_composeX(DWTELEM *b, int width){
806 DWTELEM temp[width];
791e7b83
MN
807 const int width2= width>>1;
808 int A1,A2,A3,A4, x;
809 const int w2= (width+1)>>1;
810
811 memcpy(temp, b, width*sizeof(int));
812 for(x=0; x<width2; x++){
813 b[2*x ]= temp[x ];
814 b[2*x + 1]= temp[x+w2];
815 }
816 if(width&1)
817 b[2*x ]= temp[x ];
818
819 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
820 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
821 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
822 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
823}
824
aa25a462 825static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83
MN
826 int x, y;
827
828 for(y=0; y<height; y++){
829 for(x=0; x<width; x++){
830 buffer[y*stride + x] *= SCALEX;
831 }
832 }
833
834 for(y=0; y<height; y++){
835 horizontal_decomposeX(buffer + y*stride, width);
836 }
837
838 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
839 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
840 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
841 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
842}
843
aa25a462 844static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83
MN
845 int x, y;
846
847 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
848 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
849 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
850 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
851
852 for(y=0; y<height; y++){
853 horizontal_composeX(buffer + y*stride, width);
854 }
855
856 for(y=0; y<height; y++){
857 for(x=0; x<width; x++){
858 buffer[y*stride + x] /= SCALEX;
859 }
860 }
861}
862
aa25a462
RFI
863static void horizontal_decompose53i(DWTELEM *b, int width){
864 DWTELEM temp[width];
791e7b83
MN
865 const int width2= width>>1;
866 int A1,A2,A3,A4, x;
867 const int w2= (width+1)>>1;
868
869 for(x=0; x<width2; x++){
870 temp[x ]= b[2*x ];
871 temp[x+w2]= b[2*x + 1];
872 }
873 if(width&1)
874 temp[x ]= b[2*x ];
875#if 0
876 A2= temp[1 ];
877 A4= temp[0 ];
878 A1= temp[0+width2];
879 A1 -= (A2 + A4)>>1;
880 A4 += (A1 + 1)>>1;
881 b[0+width2] = A1;
882 b[0 ] = A4;
883 for(x=1; x+1<width2; x+=2){
884 A3= temp[x+width2];
885 A4= temp[x+1 ];
886 A3 -= (A2 + A4)>>1;
887 A2 += (A1 + A3 + 2)>>2;
888 b[x+width2] = A3;
889 b[x ] = A2;
890
891 A1= temp[x+1+width2];
892 A2= temp[x+2 ];
893 A1 -= (A2 + A4)>>1;
894 A4 += (A1 + A3 + 2)>>2;
895 b[x+1+width2] = A1;
896 b[x+1 ] = A4;
897 }
898 A3= temp[width-1];
899 A3 -= A2;
900 A2 += (A1 + A3 + 2)>>2;
901 b[width -1] = A3;
902 b[width2-1] = A2;
903#else
904 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
905 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
906#endif
907}
908
aa25a462 909static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
910 int i;
911
912 for(i=0; i<width; i++){
913 b1[i] -= (b0[i] + b2[i])>>1;
914 }
915}
916
aa25a462 917static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
918 int i;
919
920 for(i=0; i<width; i++){
921 b1[i] += (b0[i] + b2[i] + 2)>>2;
922 }
923}
924
aa25a462 925static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 926 int y;
791e7b83
MN
927 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
928 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
929
930 for(y=-2; y<height; y+=2){
931 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
932 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
933
934{START_TIMER
935 if(b1 <= b3) horizontal_decompose53i(b2, width);
936 if(y+2 < height) horizontal_decompose53i(b3, width);
937STOP_TIMER("horizontal_decompose53i")}
938
939{START_TIMER
940 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
941 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
942STOP_TIMER("vertical_decompose53i*")}
943
944 b0=b2;
945 b1=b3;
946 }
947}
948
949#define lift5 lift
950#if 1
951#define W_AM 3
952#define W_AO 0
953#define W_AS 1
954
955#define W_BM 1
956#define W_BO 8
957#define W_BS 4
958
959#undef lift5
960#define W_CM 9999
961#define W_CO 2
962#define W_CS 2
963
964#define W_DM 15
965#define W_DO 16
966#define W_DS 5
967#elif 0
968#define W_AM 55
969#define W_AO 16
970#define W_AS 5
971
972#define W_BM 3
973#define W_BO 32
974#define W_BS 6
975
976#define W_CM 127
977#define W_CO 64
978#define W_CS 7
979
980#define W_DM 7
981#define W_DO 8
982#define W_DS 4
983#elif 0
984#define W_AM 97
985#define W_AO 32
986#define W_AS 6
987
988#define W_BM 63
989#define W_BO 512
990#define W_BS 10
991
992#define W_CM 13
993#define W_CO 8
994#define W_CS 4
995
996#define W_DM 15
997#define W_DO 16
998#define W_DS 5
999
1000#else
1001
1002#define W_AM 203
1003#define W_AO 64
1004#define W_AS 7
1005
1006#define W_BM 217
1007#define W_BO 2048
1008#define W_BS 12
1009
1010#define W_CM 113
1011#define W_CO 64
1012#define W_CS 7
1013
1014#define W_DM 227
1015#define W_DO 128
1016#define W_DS 9
1017#endif
aa25a462
RFI
1018static void horizontal_decompose97i(DWTELEM *b, int width){
1019 DWTELEM temp[width];
791e7b83
MN
1020 const int w2= (width+1)>>1;
1021
1022 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1023 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1024 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1025 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1026}
1027
1028
aa25a462 1029static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1030 int i;
1031
1032 for(i=0; i<width; i++){
1033 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1034 }
1035}
1036
aa25a462 1037static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1038 int i;
1039
1040 for(i=0; i<width; i++){
1041#ifdef lift5
1042 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1043#else
1044 int r= 3*(b0[i] + b2[i]);
1045 r+= r>>4;
1046 r+= r>>8;
1047 b1[i] += (r+W_CO)>>W_CS;
1048#endif
1049 }
1050}
1051
aa25a462 1052static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1053 int i;
1054
1055 for(i=0; i<width; i++){
1056 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1057 }
1058}
1059
aa25a462 1060static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1061 int i;
1062
1063 for(i=0; i<width; i++){
1064 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1065 }
1066}
1067
aa25a462 1068static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1069 int y;
791e7b83
MN
1070 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1071 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1072 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1073 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1074
1075 for(y=-4; y<height; y+=2){
1076 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1077 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1078
1079{START_TIMER
1080 if(b3 <= b5) horizontal_decompose97i(b4, width);
1081 if(y+4 < height) horizontal_decompose97i(b5, width);
1082if(width>400){
1083STOP_TIMER("horizontal_decompose97i")
1084}}
1085
1086{START_TIMER
1087 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1088 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1089 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1090 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1091
1092if(width>400){
1093STOP_TIMER("vertical_decompose97i")
1094}}
1095
1096 b0=b2;
1097 b1=b3;
1098 b2=b4;
1099 b3=b5;
1100 }
1101}
1102
aa25a462 1103void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83
MN
1104 int level;
1105
46c281e8
MN
1106 for(level=0; level<decomposition_count; level++){
1107 switch(type){
791e7b83
MN
1108 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1109 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1110 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1111 }
1112 }
1113}
1114
aa25a462
RFI
1115static void horizontal_compose53i(DWTELEM *b, int width){
1116 DWTELEM temp[width];
791e7b83
MN
1117 const int width2= width>>1;
1118 const int w2= (width+1)>>1;
1119 int A1,A2,A3,A4, x;
1120
1121#if 0
1122 A2= temp[1 ];
1123 A4= temp[0 ];
1124 A1= temp[0+width2];
1125 A1 -= (A2 + A4)>>1;
1126 A4 += (A1 + 1)>>1;
1127 b[0+width2] = A1;
1128 b[0 ] = A4;
1129 for(x=1; x+1<width2; x+=2){
1130 A3= temp[x+width2];
1131 A4= temp[x+1 ];
1132 A3 -= (A2 + A4)>>1;
1133 A2 += (A1 + A3 + 2)>>2;
1134 b[x+width2] = A3;
1135 b[x ] = A2;
1136
1137 A1= temp[x+1+width2];
1138 A2= temp[x+2 ];
1139 A1 -= (A2 + A4)>>1;
1140 A4 += (A1 + A3 + 2)>>2;
1141 b[x+1+width2] = A1;
1142 b[x+1 ] = A4;
1143 }
1144 A3= temp[width-1];
1145 A3 -= A2;
1146 A2 += (A1 + A3 + 2)>>2;
1147 b[width -1] = A3;
1148 b[width2-1] = A2;
1149#else
1150 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1151 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1152#endif
1153 for(x=0; x<width2; x++){
1154 b[2*x ]= temp[x ];
1155 b[2*x + 1]= temp[x+w2];
1156 }
1157 if(width&1)
1158 b[2*x ]= temp[x ];
1159}
1160
aa25a462 1161static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1162 int i;
1163
1164 for(i=0; i<width; i++){
1165 b1[i] += (b0[i] + b2[i])>>1;
1166 }
1167}
1168
aa25a462 1169static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1170 int i;
1171
1172 for(i=0; i<width; i++){
1173 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1174 }
1175}
1176
f9e6ebf7
LM
1177static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1178 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1179 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1180 cs->y = -1;
1181}
1182
1183static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1184 int y= cs->y;
1185 DWTELEM *b0= cs->b0;
1186 DWTELEM *b1= cs->b1;
1187 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1188 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
791e7b83
MN
1189
1190{START_TIMER
1191 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1192 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1193STOP_TIMER("vertical_compose53i*")}
1194
1195{START_TIMER
1196 if(y-1 >= 0) horizontal_compose53i(b0, width);
1197 if(b0 <= b2) horizontal_compose53i(b1, width);
1198STOP_TIMER("horizontal_compose53i")}
1199
f9e6ebf7
LM
1200 cs->b0 = b2;
1201 cs->b1 = b3;
1202 cs->y += 2;
1203}
1204
1205static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1206 dwt_compose_t cs;
1207 spatial_compose53i_init(&cs, buffer, height, stride);
1208 while(cs.y <= height)
1209 spatial_compose53i_dy(&cs, buffer, width, height, stride);
791e7b83
MN
1210}
1211
1212
aa25a462
RFI
1213static void horizontal_compose97i(DWTELEM *b, int width){
1214 DWTELEM temp[width];
791e7b83
MN
1215 const int w2= (width+1)>>1;
1216
1217 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1218 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1219 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1220 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1221}
1222
aa25a462 1223static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1224 int i;
1225
1226 for(i=0; i<width; i++){
1227 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1228 }
1229}
1230
aa25a462 1231static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1232 int i;
1233
1234 for(i=0; i<width; i++){
1235#ifdef lift5
1236 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1237#else
1238 int r= 3*(b0[i] + b2[i]);
1239 r+= r>>4;
1240 r+= r>>8;
1241 b1[i] -= (r+W_CO)>>W_CS;
1242#endif
1243 }
1244}
1245
aa25a462 1246static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1247 int i;
1248
1249 for(i=0; i<width; i++){
1250 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1251 }
1252}
1253
aa25a462 1254static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1255 int i;
1256
1257 for(i=0; i<width; i++){
1258 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1259 }
1260}
1261
f9e6ebf7
LM
1262static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1263 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1264 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1265 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1266 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1267 cs->y = -3;
1268}
791e7b83 1269
f9e6ebf7
LM
1270static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1271 int y = cs->y;
1272 DWTELEM *b0= cs->b0;
1273 DWTELEM *b1= cs->b1;
1274 DWTELEM *b2= cs->b2;
1275 DWTELEM *b3= cs->b3;
1276 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1277 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
791e7b83
MN
1278
1279 if(stride == width && y+4 < height && 0){
1280 int x;
1281 for(x=0; x<width/2; x++)
1282 b5[x] += 64*2;
1283 for(; x<width; x++)
1284 b5[x] += 169*2;
1285 }
1286
1287{START_TIMER
1288 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1289 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1290 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1291 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1292if(width>400){
1293STOP_TIMER("vertical_compose97i")}}
1294
1295{START_TIMER
1296 if(y-1>= 0) horizontal_compose97i(b0, width);
1297 if(b0 <= b2) horizontal_compose97i(b1, width);
1298if(width>400 && b0 <= b2){
1299STOP_TIMER("horizontal_compose97i")}}
f9e6ebf7
LM
1300
1301 cs->b0=b2;
1302 cs->b1=b3;
1303 cs->b2=b4;
1304 cs->b3=b5;
1305 cs->y += 2;
1306}
1307
1308static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1309 dwt_compose_t cs;
1310 spatial_compose97i_init(&cs, buffer, height, stride);
1311 while(cs.y <= height)
1312 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1313}
1314
1315void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1316 int level;
1317 for(level=decomposition_count-1; level>=0; level--){
1318 switch(type){
1319 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1320 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1321 /* not slicified yet */
1322 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1323 }
791e7b83
MN
1324 }
1325}
1326
f9e6ebf7
LM
1327void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1328 const int support = type==1 ? 3 : 5;
791e7b83 1329 int level;
f9e6ebf7 1330 if(type==2) return;
791e7b83 1331
46c281e8 1332 for(level=decomposition_count-1; level>=0; level--){
f9e6ebf7
LM
1333 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1334 switch(type){
1335 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1336 break;
1337 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1338 break;
1339 case 2: break;
1340 }
791e7b83
MN
1341 }
1342 }
1343}
1344
f9e6ebf7
LM
1345void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1346 if(type==2){
1347 int level;
1348 for(level=decomposition_count-1; level>=0; level--)
1349 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1350 }else{
1351 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1352 int y;
1353 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1354 for(y=0; y<height; y+=4)
1355 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1356 }
1357}
1358
0ecca7a4 1359static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1360 const int w= b->width;
1361 const int h= b->height;
1362 int x, y;
1363
791e7b83 1364 if(1){
791e7b83 1365 int run=0;
a8d73e56 1366 int runs[w*h];
791e7b83
MN
1367 int run_index=0;
1368
791e7b83
MN
1369 for(y=0; y<h; y++){
1370 for(x=0; x<w; x++){
78486403 1371 int v, p=0;
6b2f6646 1372 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1373 v= src[x + y*stride];
791e7b83
MN
1374
1375 if(y){
a8d73e56 1376 t= src[x + (y-1)*stride];
791e7b83 1377 if(x){
a8d73e56 1378 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1379 }
1380 if(x + 1 < w){
a8d73e56 1381 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1382 }
1383 }
1384 if(x){
a8d73e56 1385 l= src[x - 1 + y*stride];
6b2f6646
MN
1386 /*if(x > 1){
1387 if(orientation==1) ll= src[y + (x-2)*stride];
1388 else ll= src[x - 2 + y*stride];
791e7b83
MN
1389 }*/
1390 }
78486403 1391 if(parent){
a8d73e56
MN
1392 int px= x>>1;
1393 int py= y>>1;
78486403
MN
1394 if(px<b->parent->width && py<b->parent->height)
1395 p= parent[px + py*2*stride];
1396 }
1397 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1398 if(v){
1399 runs[run_index++]= run;
1400 run=0;
1401 }else{
1402 run++;
1403 }
1404 }
1405 }
1406 }
1407 runs[run_index++]= run;
1408 run_index=0;
1409 run= runs[run_index++];
1410
4f4e9633 1411 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1412
1413 for(y=0; y<h; y++){
d06c75a8 1414 if(s->c.bytestream_end - s->c.bytestream < w*40){
0ecca7a4
MN
1415 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1416 return -1;
1417 }
791e7b83 1418 for(x=0; x<w; x++){
78486403 1419 int v, p=0;
6b2f6646 1420 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1421 v= src[x + y*stride];
791e7b83
MN
1422
1423 if(y){
a8d73e56 1424 t= src[x + (y-1)*stride];
791e7b83 1425 if(x){
a8d73e56 1426 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1427 }
1428 if(x + 1 < w){
a8d73e56 1429 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1430 }
1431 }
1432 if(x){
a8d73e56 1433 l= src[x - 1 + y*stride];
6b2f6646
MN
1434 /*if(x > 1){
1435 if(orientation==1) ll= src[y + (x-2)*stride];
1436 else ll= src[x - 2 + y*stride];
791e7b83
MN
1437 }*/
1438 }
78486403 1439 if(parent){
a8d73e56
MN
1440 int px= x>>1;
1441 int py= y>>1;
78486403
MN
1442 if(px<b->parent->width && py<b->parent->height)
1443 p= parent[px + py*2*stride];
1444 }
1445 if(/*ll|*/l|lt|t|rt|p){
1446 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1447
28869757 1448 put_rac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1449 }else{
1450 if(!run){
1451 run= runs[run_index++];
4f4e9633
MN
1452
1453 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1454 assert(v);
1455 }else{
1456 run--;
1457 assert(!v);
1458 }
1459 }
1460 if(v){
78486403 1461 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1462
0635cbfc 1463 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
28869757 1464 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
791e7b83
MN
1465 }
1466 }
1467 }
791e7b83 1468 }
0ecca7a4 1469 return 0;
791e7b83
MN
1470}
1471
0ecca7a4 1472static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1473// encode_subband_qtree(s, b, src, parent, stride, orientation);
1474// encode_subband_z0run(s, b, src, parent, stride, orientation);
0ecca7a4 1475 return encode_subband_c0run(s, b, src, parent, stride, orientation);
4f4e9633
MN
1476// encode_subband_dzr(s, b, src, parent, stride, orientation);
1477}
1478
a8d73e56 1479static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
791e7b83
MN
1480 const int w= b->width;
1481 const int h= b->height;
1482 int x,y;
ea7d9cd4
MN
1483 const int qlog= clip(s->qlog + b->qlog, 0, 128);
1484 int qmul= qexp[qlog&7]<<(qlog>>3);
1485 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1486
fff6d4ea
MN
1487 START_TIMER
1488
ea7d9cd4
MN
1489 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1490 qadd= 0;
1491 qmul= 1<<QEXPSHIFT;
1492 }
791e7b83 1493
791e7b83 1494 if(1){
791e7b83 1495 int run;
7b49c309
MN
1496 int index=0;
1497 int prev_index=-1;
1498 int prev2_index=0;
1499 int parent_index= 0;
1500 int prev_parent_index= 0;
1501
791e7b83
MN
1502 for(y=0; y<b->height; y++)
1503 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1504
4f4e9633 1505 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83 1506 for(y=0; y<h; y++){
0cea8a03
MN
1507 int v=0;
1508 int lt=0, t=0, rt=0;
1509
ff765159
MN
1510 if(y && b->x[prev_index] == 0){
1511 rt= b->coeff[prev_index];
0cea8a03 1512 }
791e7b83 1513 for(x=0; x<w; x++){
0cea8a03
MN
1514 int p=0;
1515 const int l= v;
1516
1517 lt= t; t= rt;
791e7b83 1518
ff765159 1519 if(y){
ea7d9cd4 1520 if(b->x[prev_index] <= x)
ff765159
MN
1521 prev_index++;
1522 if(b->x[prev_index] == x + 1)
1523 rt= b->coeff[prev_index];
1524 else
1525 rt=0;
1526 }
78486403 1527 if(parent){
7b49c309
MN
1528 if(x>>1 > b->parent->x[parent_index]){
1529 parent_index++;
1530 }
ff765159
MN
1531 if(x>>1 == b->parent->x[parent_index]){
1532 p= b->parent->coeff[parent_index];
1533 }
78486403
MN
1534 }
1535 if(/*ll|*/l|lt|t|rt|p){
1536 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1537
28869757 1538 v=get_rac(&s->c, &b->state[0][context]);
791e7b83
MN
1539 }else{
1540 if(!run){
4f4e9633 1541 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83
MN
1542 v=1;
1543 }else{
1544 run--;
1545 v=0;
3c1adccd 1546
7b49c309
MN
1547 if(y && parent){
1548 int max_run;
7b49c309
MN
1549
1550 max_run= FFMIN(run, b->x[prev_index] - x - 2);
1551 max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1552 x+= max_run;
1553 run-= max_run;
3c1adccd 1554 }
791e7b83
MN
1555 }
1556 }
1557 if(v){
78486403 1558 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
0635cbfc 1559 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
28869757 1560 if(get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
ea7d9cd4 1561 src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
791e7b83 1562 v= -v;
ea7d9cd4
MN
1563 }else{
1564 src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1565 }
ff765159
MN
1566 b->x[index]=x; //FIXME interleave x/coeff
1567 b->coeff[index++]= v;
7b49c309
MN
1568 }
1569 }
1570 b->x[index++]= w+1; //end marker
1571 prev_index= prev2_index;
1572 prev2_index= index;
1573
1574 if(parent){
1575 while(b->parent->x[parent_index] != b->parent->width+1)
1576 parent_index++;
1577 parent_index++;
1578 if(y&1){
1579 prev_parent_index= parent_index;
1580 }else{
1581 parent_index= prev_parent_index;
791e7b83
MN
1582 }
1583 }
1584 }
7b49c309 1585 b->x[index++]= w+1; //end marker
3c1adccd 1586 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
791e7b83
MN
1587 STOP_TIMER("decode_subband")
1588 }
1589
1590 return;
1591 }
791e7b83
MN
1592}
1593
1594static void reset_contexts(SnowContext *s){
1595 int plane_index, level, orientation;
1596
19aa028d 1597 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1598 for(level=0; level<s->spatial_decomposition_count; level++){
1599 for(orientation=level ? 1:0; orientation<4; orientation++){
28869757 1600 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
MN
1601 }
1602 }
1603 }
28869757
MN
1604 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1605 memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
MN
1606}
1607
1608static int alloc_blocks(SnowContext *s){
1609 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1610 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1611
1612 s->b_width = w;
1613 s->b_height= h;
1614
1615 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1616 return 0;
1617}
1618
28869757
MN
1619static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1620 uint8_t *bytestream= d->bytestream;
1621 uint8_t *bytestream_start= d->bytestream_start;
155ec6ed 1622 *d= *s;
28869757
MN
1623 d->bytestream= bytestream;
1624 d->bytestream_start= bytestream_start;
155ec6ed
MN
1625}
1626
1627//near copy & paste from dsputil, FIXME
1628static int pix_sum(uint8_t * pix, int line_size, int w)
1629{
1630 int s, i, j;
1631
1632 s = 0;
1633 for (i = 0; i < w; i++) {
1634 for (j = 0; j < w; j++) {
1635 s += pix[0];
1636 pix ++;
1637 }
1638 pix += line_size - w;
1639 }
1640 return s;
1641}
1642
1643//near copy & paste from dsputil, FIXME
1644static int pix_norm1(uint8_t * pix, int line_size, int w)
1645{
1646 int s, i, j;
1647 uint32_t *sq = squareTbl + 256;
1648
1649 s = 0;
1650 for (i = 0; i < w; i++) {
1651 for (j = 0; j < w; j ++) {
1652 s += sq[pix[0]];
1653 pix ++;
1654 }
1655 pix += line_size - w;
1656 }
1657 return s;
1658}
1659
1660static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1661 const int w= s->b_width << s->block_max_depth;
1662 const int rem_depth= s->block_max_depth - level;
1663 const int index= (x + y*w) << rem_depth;
1664 const int block_w= 1<<rem_depth;
1665 BlockNode block;
1666 int i,j;
1667
1668 block.color[0]= l;
1669 block.color[1]= cb;
1670 block.color[2]= cr;
1671 block.mx= mx;
1672 block.my= my;
1673 block.type= type;
1674 block.level= level;
1675
1676 for(j=0; j<block_w; j++){
1677 for(i=0; i<block_w; i++){
1678 s->block[index + i + j*w]= block;
1679 }
1680 }
1681}
1682
1683static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1684 const int offset[3]= {
1685 y*c-> stride + x,
1686 ((y*c->uvstride + x)>>1),
1687 ((y*c->uvstride + x)>>1),
1688 };
1689 int i;
1690 for(i=0; i<3; i++){
1691 c->src[0][i]= src [i];
1692 c->ref[0][i]= ref [i] + offset[i];
1693 }
1694 assert(!ref_index);
1695}
1696
1697//FIXME copy&paste
1698#define P_LEFT P[1]
1699#define P_TOP P[2]
1700#define P_TOPRIGHT P[3]
1701#define P_MEDIAN P[4]
1702#define P_MV1 P[9]
1703#define FLAG_QPEL 1 //must be 1
1704
1705static int encode_q_branch(SnowContext *s, int level, int x, int y){
1706 uint8_t p_buffer[1024];
1707 uint8_t i_buffer[1024];
1708 uint8_t p_state[sizeof(s->block_state)];
1709 uint8_t i_state[sizeof(s->block_state)];
28869757
MN
1710 RangeCoder pc, ic;
1711 uint8_t *pbbak= s->c.bytestream;
1712 uint8_t *pbbak_start= s->c.bytestream_start;
155ec6ed
MN
1713 int score, score2, iscore, i_len, p_len, block_s, sum;
1714 const int w= s->b_width << s->block_max_depth;
1715 const int h= s->b_height << s->block_max_depth;
1716 const int rem_depth= s->block_max_depth - level;
1717 const int index= (x + y*w) << rem_depth;
1718 const int block_w= 1<<(LOG2_MB_SIZE - level);
1719 static BlockNode null_block= { //FIXME add border maybe
1720 .color= {128,128,128},
1721 .mx= 0,
1722 .my= 0,
1723 .type= 0,
1724 .level= 0,
1725 };
1726 int trx= (x+1)<<rem_depth;
1727 int try= (y+1)<<rem_depth;
1728 BlockNode *left = x ? &s->block[index-1] : &null_block;
1729 BlockNode *top = y ? &s->block[index-w] : &null_block;
1730 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1731 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1732 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1733 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1734 int pl = left->color[0];
1735 int pcb= left->color[1];
1736 int pcr= left->color[2];
1737 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1738 int pmy= mid_pred(left->my, top->my, tr->my);
1739 int mx=0, my=0;
1740 int l,cr,cb, i;
1741 const int stride= s->current_picture.linesize[0];
1742 const int uvstride= s->current_picture.linesize[1];
1743 const int instride= s->input_picture.linesize[0];
1744 const int uvinstride= s->input_picture.linesize[1];
1745 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
1746 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1747 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1748 uint8_t current_mb[3][stride*block_w];
1749 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1750 int P[10][2];
1751 int16_t last_mv[3][2];
1752 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1753 const int shift= 1+qpel;
1754 MotionEstContext *c= &s->m.me;
1755 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1756 int my_context= av_log2(2*ABS(left->my - top->my));
1757 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1758
1759 assert(sizeof(s->block_state) >= 256);
1760 if(s->keyframe){
1761 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1762 return 0;
1763 }
1764
1765 //FIXME optimize
1766 for(i=0; i<block_w; i++)
1767 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
1768 for(i=0; i<block_w>>1; i++)
1769 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1770 for(i=0; i<block_w>>1; i++)
1771 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1772
1773// clip predictors / edge ?
1774
1775 P_LEFT[0]= left->mx;
1776 P_LEFT[1]= left->my;
1777 P_TOP [0]= top->mx;
1778 P_TOP [1]= top->my;
1779 P_TOPRIGHT[0]= tr->mx;
1780 P_TOPRIGHT[1]= tr->my;
1781
1782 last_mv[0][0]= s->block[index].mx;
1783 last_mv[0][1]= s->block[index].my;
1784 last_mv[1][0]= right->mx;
1785 last_mv[1][1]= right->my;
1786 last_mv[2][0]= bottom->mx;
1787 last_mv[2][1]= bottom->my;
1788
1789 s->m.mb_stride=2;
1790 s->m.mb_x=
1791 s->m.mb_y= 0;
1792 s->m.me.skip= 0;
1793
1794 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1795
1796 assert(s->m.me. stride == stride);
1797 assert(s->m.me.uvstride == uvstride);
1798
1799 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1800 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1801 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1802 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1803
ff158dc9
MN
1804 c->xmin = - x*block_w - 16+2;
1805 c->ymin = - y*block_w - 16+2;
1806 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1807 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
1808
1809 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1810 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1811 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1812 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1813 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1814 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1815 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1816
1817 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1818 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1819
1820 if (!y) {
1821 c->pred_x= P_LEFT[0];
1822 c->pred_y= P_LEFT[1];
1823 } else {
1824 c->pred_x = P_MEDIAN[0];
1825 c->pred_y = P_MEDIAN[1];
1826 }
1827
1828 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
1829 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1830
1831 assert(mx >= c->xmin);
1832 assert(mx <= c->xmax);
1833 assert(my >= c->ymin);
1834 assert(my <= c->ymax);
1835
1836 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1837 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1838 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1839
1840 // subpel search
1841 pc= s->c;
28869757
MN
1842 pc.bytestream_start=
1843 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1844 memcpy(p_state, s->block_state, sizeof(s->block_state));
1845
1846 if(level!=s->block_max_depth)
28869757
MN
1847 put_rac(&pc, &p_state[4 + s_context], 1);
1848 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
155ec6ed
MN
1849 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1850 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
28869757
MN
1851 p_len= pc.bytestream - pc.bytestream_start;
1852 score += (s->lambda2*(p_len*8
1853 + (pc.outstanding_count - s->c.outstanding_count)*8
1854 + (-av_log2(pc.range) + av_log2(s->c.range))
1855 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1856
1857 block_s= block_w*block_w;
1858 sum = pix_sum(&current_mb[0][0], stride, block_w);
1859 l= (sum + block_s/2)/block_s;
1860 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1861
1862 block_s= block_w*block_w>>2;
1863 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1864 cb= (sum + block_s/2)/block_s;
1865// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1866 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1867 cr= (sum + block_s/2)/block_s;
1868// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1869
1870 ic= s->c;
28869757
MN
1871 ic.bytestream_start=
1872 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
155ec6ed
MN
1873 memcpy(i_state, s->block_state, sizeof(s->block_state));
1874 if(level!=s->block_max_depth)
28869757
MN
1875 put_rac(&ic, &i_state[4 + s_context], 1);
1876 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
155ec6ed
MN
1877 put_symbol(&ic, &i_state[32], l-pl , 1);
1878 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1879 put_symbol(&ic, &i_state[96], cr-pcr, 1);
28869757
MN
1880 i_len= ic.bytestream - ic.bytestream_start;
1881 iscore += (s->lambda2*(i_len*8
1882 + (ic.outstanding_count - s->c.outstanding_count)*8
1883 + (-av_log2(ic.range) + av_log2(s->c.range))
1884 ))>>FF_LAMBDA_SHIFT;
155ec6ed
MN
1885
1886// assert(score==256*256*256*64-1);
1887 assert(iscore < 255*255*256 + s->lambda2*10);
1888 assert(iscore >= 0);
1889 assert(l>=0 && l<=255);
1890 assert(pl>=0 && pl<=255);
1891
1892 if(level==0){
1893 int varc= iscore >> 8;
1894 int vard= score >> 8;
1895 if (vard <= 64 || vard < varc)
1896 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1897 else
1898 c->scene_change_score+= s->m.qscale;
1899 }
1900
1901 if(level!=s->block_max_depth){
28869757 1902 put_rac(&s->c, &s->block_state[4 + s_context], 0);
155ec6ed
MN
1903 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1904 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1905 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1906 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1907 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1908
1909 if(score2 < score && score2 < iscore)
1910 return score2;
1911 }
1912
1913 if(iscore < score){
28869757 1914 memcpy(pbbak, i_buffer, i_len);
155ec6ed 1915 s->c= ic;
28869757
MN
1916 s->c.bytestream_start= pbbak_start;
1917 s->c.bytestream= pbbak + i_len;
155ec6ed
MN
1918 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1919 memcpy(s->block_state, i_state, sizeof(s->block_state));
1920 return iscore;
1921 }else{
28869757 1922 memcpy(pbbak, p_buffer, p_len);
155ec6ed 1923 s->c= pc;
28869757
MN
1924 s->c.bytestream_start= pbbak_start;
1925 s->c.bytestream= pbbak + p_len;
155ec6ed
MN
1926 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1927 memcpy(s->block_state, p_state, sizeof(s->block_state));
1928 return score;
1929 }
1930}
1931
1932static void decode_q_branch(SnowContext *s, int level, int x, int y){
1933 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
1934 const int rem_depth= s->block_max_depth - level;
1935 const int index= (x + y*w) << rem_depth;
1936 static BlockNode null_block= { //FIXME add border maybe
1937 .color= {128,128,128},
1938 .mx= 0,
1939 .my= 0,
1940 .type= 0,
1941 .level= 0,
1942 };
1943 int trx= (x+1)<<rem_depth;
155ec6ed
MN
1944 BlockNode *left = x ? &s->block[index-1] : &null_block;
1945 BlockNode *top = y ? &s->block[index-w] : &null_block;
1946 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1947 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1948 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1949
1950 if(s->keyframe){
1951 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1952 return;
1953 }
1954
28869757 1955 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
155ec6ed
MN
1956 int type;
1957 int l = left->color[0];
1958 int cb= left->color[1];
1959 int cr= left->color[2];
1960 int mx= mid_pred(left->mx, top->mx, tr->mx);
1961 int my= mid_pred(left->my, top->my, tr->my);
1962 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1963 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1964
28869757 1965 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
155ec6ed
MN
1966
1967 if(type){
1968 l += get_symbol(&s->c, &s->block_state[32], 1);
1969 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1970 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1971 }else{
1972 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1973 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1974 }
1975 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1976 }else{
1977 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1978 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1979 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1980 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1981 }
1982}
1983
1984static void encode_blocks(SnowContext *s){
1985 int x, y;
1986 int w= s->b_width;
1987 int h= s->b_height;
1988
1989 for(y=0; y<h; y++){
d06c75a8 1990 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
0ecca7a4
MN
1991 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1992 return;
1993 }
155ec6ed
MN
1994 for(x=0; x<w; x++){
1995 encode_q_branch(s, 0, x, y);
1996 }
1997 }
1998}
1999
2000static void decode_blocks(SnowContext *s){
2001 int x, y;
2002 int w= s->b_width;
2003 int h= s->b_height;
2004
2005 for(y=0; y<h; y++){
2006 for(x=0; x<w; x++){
2007 decode_q_branch(s, 0, x, y);
2008 }
2009 }
791e7b83
MN
2010}
2011
2012static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2013 int x, y;
3924dac4 2014START_TIMER
791e7b83
MN
2015 for(y=0; y < b_h+5; y++){
2016 for(x=0; x < b_w; x++){
3924dac4
MN
2017 int a0= src[x ];
2018 int a1= src[x + 1];
2019 int a2= src[x + 2];
2020 int a3= src[x + 3];
2021 int a4= src[x + 4];
2022 int a5= src[x + 5];
791e7b83
MN
2023// int am= 9*(a1+a2) - (a0+a3);
2024 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2025// int am= 18*(a2+a3) - 2*(a1+a4);
2026// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2027// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2028
2029// if(b_w==16) am= 8*(a1+a2);
2030
3924dac4
MN
2031 if(dx<8) tmp[x]= (32*a2*( 8-dx) + am* dx + 128)>>8;
2032 else tmp[x]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
791e7b83
MN
2033
2034/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2035 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2036 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2037 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2038 }
3924dac4
MN
2039 tmp += stride;
2040 src += stride;
791e7b83 2041 }
3924dac4
MN
2042 tmp -= (b_h+5)*stride;
2043
791e7b83
MN
2044 for(y=0; y < b_h; y++){
2045 for(x=0; x < b_w; x++){
3924dac4
MN
2046 int a0= tmp[x + 0*stride];
2047 int a1= tmp[x + 1*stride];
2048 int a2= tmp[x + 2*stride];
2049 int a3= tmp[x + 3*stride];
2050 int a4= tmp[x + 4*stride];
2051 int a5= tmp[x + 5*stride];
791e7b83
MN
2052 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2053// int am= 18*(a2+a3) - 2*(a1+a4);
2054/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2055 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2056
2057// if(b_w==16) am= 8*(a1+a2);
2058
3924dac4
MN
2059 if(dy<8) dst[x]= (32*a2*( 8-dy) + am* dy + 128)>>8;
2060 else dst[x]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83
MN
2061
2062/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2063 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2064 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2065 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2066 }
3924dac4
MN
2067 dst += stride;
2068 tmp += stride;
791e7b83 2069 }
3924dac4 2070STOP_TIMER("mc_block")
791e7b83
MN
2071}
2072
791e7b83 2073#define mca(dx,dy,b_w)\
d92b5807 2074static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
791e7b83
MN
2075 uint8_t tmp[stride*(b_w+5)];\
2076 assert(h==b_w);\
2077 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2078}
2079
2080mca( 0, 0,16)
2081mca( 8, 0,16)
2082mca( 0, 8,16)
2083mca( 8, 8,16)
d92b5807
MN
2084mca( 0, 0,8)
2085mca( 8, 0,8)
2086mca( 0, 8,8)
2087mca( 8, 8,8)
791e7b83 2088
ff158dc9
MN
2089static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2090 if(block->type){
2091 int x, y;
2092 const int color= block->color[plane_index];
2093 for(y=0; y < b_h; y++){
2094 for(x=0; x < b_w; x++){
2095 dst[x + y*stride]= color;
2096 }
2097 }
2098 }else{
2099 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2100 int mx= block->mx*scale;
2101 int my= block->my*scale;
ec697587
MN
2102 const int dx= mx&15;
2103 const int dy= my&15;
ff158dc9
MN
2104 sx += (mx>>4) - 2;
2105 sy += (my>>4) - 2;
2106 src += sx + sy*stride;
2107 if( (unsigned)sx >= w - b_w - 4
2108 || (unsigned)sy >= h - b_h - 4){
2109 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2110 src= tmp + MB_SIZE;
2111 }
ec697587
MN
2112 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2113 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2114 else
2115 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
ff158dc9
MN
2116 }
2117}
2118
2119static always_inline int same_block(BlockNode *a, BlockNode *b){
2120 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2121}
2122
2123//FIXME name clenup (b_w, block_w, b_width stuff)
715a97f0 2124static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
ff158dc9
MN
2125 const int b_width = s->b_width << s->block_max_depth;
2126 const int b_height= s->b_height << s->block_max_depth;
2127 const int b_stride= b_width;
2128 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2129 BlockNode *rt= lt+1;
2130 BlockNode *lb= lt+b_stride;
2131 BlockNode *rb= lb+1;
2132 uint8_t *block[4];
2133 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
791e7b83
MN
2134 int x,y;
2135
ff158dc9
MN
2136 if(b_x<0){
2137 lt= rt;
2138 lb= rb;
2139 }else if(b_x + 1 >= b_width){
2140 rt= lt;
2141 rb= lb;
791e7b83 2142 }
ff158dc9
MN
2143 if(b_y<0){
2144 lt= lb;
2145 rt= rb;
2146 }else if(b_y + 1 >= b_height){
2147 lb= lt;
2148 rb= rt;
2149 }
2150
2151 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2152 obmc -= src_x;
2153 b_w += src_x;
2154 src_x=0;
2155 }else if(src_x + b_w > w){
2156 b_w = w - src_x;
2157 }
2158 if(src_y<0){
2159 obmc -= src_y*obmc_stride;
2160 b_h += src_y;
2161 src_y=0;
2162 }else if(src_y + b_h> h){
2163 b_h = h - src_y;
791e7b83 2164 }
620ab797 2165
ff158dc9 2166 if(b_w<=0 || b_h<=0) return;
155ec6ed 2167
ff158dc9
MN
2168assert(src_stride > 7*MB_SIZE);
2169 dst += src_x + src_y*dst_stride;
715a97f0 2170 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2171// src += src_x + src_y*src_stride;
2172
2173 block[0]= tmp+3*MB_SIZE;
2174 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2175
2176 if(same_block(lt, rt)){
2177 block[1]= block[0];
791e7b83 2178 }else{
ff158dc9
MN
2179 block[1]= tmp + 4*MB_SIZE;
2180 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2181 }
2182
2183 if(same_block(lt, lb)){
2184 block[2]= block[0];
2185 }else if(same_block(rt, lb)){
2186 block[2]= block[1];
2187 }else{
2188 block[2]= tmp+5*MB_SIZE;
2189 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2190 }
791e7b83 2191
ff158dc9
MN
2192 if(same_block(lt, rb) ){
2193 block[3]= block[0];
2194 }else if(same_block(rt, rb)){
2195 block[3]= block[1];
2196 }else if(same_block(lb, rb)){
2197 block[3]= block[2];
2198 }else{
2199 block[3]= tmp+6*MB_SIZE;
2200 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2201 }
2202#if 0
2203 for(y=0; y<b_h; y++){
2204 for(x=0; x<b_w; x++){
2205 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2206 if(add) dst[x + y*dst_stride] += v;
2207 else dst[x + y*dst_stride] -= v;
2208 }
2209 }
2210 for(y=0; y<b_h; y++){
2211 uint8_t *obmc2= obmc + (obmc_stride>>1);
2212 for(x=0; x<b_w; x++){
2213 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2214 if(add) dst[x + y*dst_stride] += v;
2215 else dst[x + y*dst_stride] -= v;
2216 }
2217 }
2218 for(y=0; y<b_h; y++){
2219 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2220 for(x=0; x<b_w; x++){
2221 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2222 if(add) dst[x + y*dst_stride] += v;
2223 else dst[x + y*dst_stride] -= v;
2224 }
2225 }
2226 for(y=0; y<b_h; y++){
2227 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2228 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2229 for(x=0; x<b_w; x++){
2230 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2231 if(add) dst[x + y*dst_stride] += v;
2232 else dst[x + y*dst_stride] -= v;
2233 }
2234 }
2235#else
2236 for(y=0; y<b_h; y++){
2237 //FIXME ugly missue of obmc_stride
2238 uint8_t *obmc1= obmc + y*obmc_stride;
2239 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2240 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2241 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2242 for(x=0; x<b_w; x++){
2243 int v= obmc1[x] * block[3][x + y*src_stride]
2244 +obmc2[x] * block[2][x + y*src_stride]
2245 +obmc3[x] * block[1][x + y*src_stride]
2246 +obmc4[x] * block[0][x + y*src_stride];
715a97f0
MN
2247
2248 v <<= 8 - LOG2_OBMC_MAX;
034aff03
MN
2249 if(FRAC_BITS != 8){
2250 v += 1<<(7 - FRAC_BITS);
2251 v >>= 8 - FRAC_BITS;
2252 }
715a97f0
MN
2253 if(add){
2254 v += dst[x + y*dst_stride];
2255 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2256 if(v&(~255)) v= ~(v>>31);
2257 dst8[x + y*src_stride] = v;
2258 }else{
2259 dst[x + y*dst_stride] -= v;
2260 }
791e7b83
MN
2261 }
2262 }
ff158dc9 2263#endif
791e7b83
MN
2264}
2265
f9e6ebf7 2266static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
791e7b83 2267 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2268 const int mb_w= s->b_width << s->block_max_depth;
2269 const int mb_h= s->b_height << s->block_max_depth;
f9e6ebf7 2270 int x, y, mb_x;
155ec6ed
MN
2271 int block_size = MB_SIZE >> s->block_max_depth;
2272 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2273 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
155ec6ed 2274 int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2275 int ref_stride= s->current_picture.linesize[plane_index];
791e7b83 2276 uint8_t *ref = s->last_picture.data[plane_index];
715a97f0 2277 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2278 int w= p->width;
2279 int h= p->height;
fff6d4ea 2280 START_TIMER
791e7b83 2281
ff158dc9 2282 if(s->keyframe || (s->avctx->debug&512)){
f9e6ebf7
LM
2283 if(mb_y==mb_h)
2284 return;
2285
715a97f0 2286 if(add){
f9e6ebf7 2287 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
715a97f0
MN
2288 for(x=0; x<w; x++){
2289 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2290 v >>= FRAC_BITS;
2291 if(v&(~255)) v= ~(v>>31);
2292 dst8[x + y*ref_stride]= v;
2293 }
2294 }
2295 }else{
f9e6ebf7 2296 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
715a97f0
MN
2297 for(x=0; x<w; x++){
2298 buf[x + y*w]-= 128<<FRAC_BITS;
2299 }
ff158dc9 2300 }
791e7b83 2301 }
ff158dc9
MN
2302
2303 return;
791e7b83
MN
2304 }
2305
ff158dc9 2306 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2307 START_TIMER
ff158dc9 2308
715a97f0 2309 add_yblock(s, buf, dst8, ref, obmc,
ff158dc9 2310 block_w*mb_x - block_w/2,
791e7b83 2311 block_w*mb_y - block_w/2,
ff158dc9 2312 block_w, block_w,
791e7b83 2313 w, h,
ff158dc9
MN
2314 w, ref_stride, obmc_stride,
2315 mb_x - 1, mb_y - 1,
2316 add, plane_index);
2317
2318 STOP_TIMER("add_yblock")
791e7b83 2319 }
fff6d4ea 2320
f9e6ebf7
LM
2321 STOP_TIMER("predict_slice")
2322}
2323
2324static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2325 const int mb_h= s->b_height << s->block_max_depth;
2326 int mb_y;
2327 for(mb_y=0; mb_y<=mb_h; mb_y++)
2328 predict_slice(s, buf, plane_index, add, mb_y);
791e7b83
MN
2329}
2330
2331static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2332 const int level= b->level;
2333 const int w= b->width;
2334 const int h= b->height;
2335 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2336 const int qmul= qexp[qlog&7]<<(qlog>>3);
da66b631
MN
2337 int x,y, thres1, thres2;
2338 START_TIMER
791e7b83
MN
2339
2340 assert(QROOT==8);
2341
93fbdb5a
MN
2342 if(s->qlog == LOSSLESS_QLOG) return;
2343
791e7b83 2344 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
2345 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2346 thres2= 2*thres1;
791e7b83
MN
2347
2348 if(!bias){
2349 for(y=0; y<h; y++){
2350 for(x=0; x<w; x++){
da66b631
MN
2351 int i= src[x + y*stride];
2352
2353 if((unsigned)(i+thres1) > thres2){
2354 if(i>=0){
2355 i<<= QEXPSHIFT;
2356 i/= qmul; //FIXME optimize
2357 src[x + y*stride]= i;
2358 }else{
2359 i= -i;
2360 i<<= QEXPSHIFT;
2361 i/= qmul; //FIXME optimize
2362 src[x + y*stride]= -i;
2363 }
2364 }else
2365 src[x + y*stride]= 0;
791e7b83
MN
2366 }
2367 }
2368 }else{
2369 for(y=0; y<h; y++){
2370 for(x=0; x<w; x++){
2371 int i= src[x + y*stride];
2372
da66b631
MN
2373 if((unsigned)(i+thres1) > thres2){
2374 if(i>=0){
2375 i<<= QEXPSHIFT;
2376 i= (i + bias) / qmul; //FIXME optimize
2377 src[x + y*stride]= i;
2378 }else{
2379 i= -i;
2380 i<<= QEXPSHIFT;
2381 i= (i + bias) / qmul; //FIXME optimize
2382 src[x + y*stride]= -i;
2383 }
2384 }else
2385 src[x + y*stride]= 0;
791e7b83
MN
2386 }
2387 }
2388 }
da66b631
MN
2389 if(level+1 == s->spatial_decomposition_count){
2390// STOP_TIMER("quantize")
2391 }
791e7b83
MN
2392}
2393
2394static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
791e7b83
MN
2395 const int w= b->width;
2396 const int h= b->height;
2397 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2398 const int qmul= qexp[qlog&7]<<(qlog>>3);
2399 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2400 int x,y;
ea7d9cd4 2401 START_TIMER
791e7b83 2402
93fbdb5a
MN
2403 if(s->qlog == LOSSLESS_QLOG) return;
2404
791e7b83
MN
2405 assert(QROOT==8);
2406
2407 for(y=0; y<h; y++){
2408 for(x=0; x<w; x++){
2409 int i= src[x + y*stride];
2410 if(i<0){
2411 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2412 }else if(i>0){
2413 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
2414 }
2415 }
2416 }
ea7d9cd4
MN
2417 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2418 STOP_TIMER("dquant")
2419 }
791e7b83
MN
2420}
2421
2422static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2423 const int w= b->width;
2424 const int h= b->height;
2425 int x,y;
2426
2427 for(y=h-1; y>=0; y--){
2428 for(x=w-1; x>=0; x--){
2429 int i= x + y*stride;
2430
2431 if(x){
2432 if(use_median){
2433 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2434 else src[i] -= src[i - 1];
2435 }else{
2436 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2437 else src[i] -= src[i - 1];
2438 }
2439 }else{
2440 if(y) src[i] -= src[i - stride];
2441 }
2442 }
2443 }
2444}
2445
2446static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2447 const int w= b->width;
2448 const int h= b->height;
2449 int x,y;
2450
2451 for(y=0; y<h; y++){
2452 for(x=0; x<w; x++){
2453 int i= x + y*stride;
2454
2455 if(x){
2456 if(use_median){
2457 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2458 else src[i] += src[i - 1];
2459 }else{
2460 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2461 else src[i] += src[i - 1];
2462 }
2463 }else{
2464 if(y) src[i] += src[i - stride];
2465 }
2466 }
2467 }
2468}
2469
2470static void encode_header(SnowContext *s){
2471 int plane_index, level, orientation;
28869757
MN
2472 uint8_t kstate[32];
2473
2474 memset(kstate, MID_STATE, sizeof(kstate));
791e7b83 2475
28869757 2476 put_rac(&s->c, kstate, s->keyframe);
19aa028d
MN
2477 if(s->keyframe || s->always_reset)
2478 reset_contexts(s);
791e7b83
MN
2479 if(s->keyframe){
2480 put_symbol(&s->c, s->header_state, s->version, 0);
28869757 2481 put_rac(&s->c, s->header_state, s->always_reset);
791e7b83
MN
2482 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2483 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2484 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2485 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
791e7b83
MN
2486 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2487 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
28869757
MN
2488 put_rac(&s->c, s->header_state, s->spatial_scalability);
2489// put_rac(&s->c, s->header_state, s->rate_scalability);
791e7b83
MN
2490
2491 for(plane_index=0; plane_index<2; plane_index++){
2492 for(level=0; level<s->spatial_decomposition_count; level++){
2493 for(orientation=level ? 1:0; orientation<4; orientation++){
2494 if(orientation==2) continue;
2495 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2496 }
2497 }
2498 }
2499 }
2500 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2501 put_symbol(&s->c, s->header_state, s->qlog, 1);
2502 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
2503 put_symbol(&s->c, s->header_state, s->qbias, 1);
155ec6ed 2504 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
791e7b83
MN
2505}
2506
2507static int decode_header(SnowContext *s){
2508 int plane_index, level, orientation;
28869757
MN
2509 uint8_t kstate[32];
2510
2511 memset(kstate, MID_STATE, sizeof(kstate));
791e7b83 2512
28869757 2513 s->keyframe= get_rac(&s->c, kstate);
19aa028d
MN
2514 if(s->keyframe || s->always_reset)
2515 reset_contexts(s);
791e7b83
MN
2516 if(s->keyframe){
2517 s->version= get_symbol(&s->c, s->header_state, 0);
2518 if(s->version>0){
2519 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2520 return -1;
2521 }
28869757 2522 s->always_reset= get_rac(&s->c, s->header_state);
791e7b83
MN
2523 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2524 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2525 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2526 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
2527 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2528 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
28869757
MN
2529 s->spatial_scalability= get_rac(&s->c, s->header_state);
2530// s->rate_scalability= get_rac(&s->c, s->header_state);
791e7b83
MN
2531
2532 for(plane_index=0; plane_index<3; plane_index++){
2533 for(level=0; level<s->spatial_decomposition_count; level++){
2534 for(orientation=level ? 1:0; orientation<4; orientation++){
2535 int q;
2536 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2537 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2538 else q= get_symbol(&s->c, s->header_state, 1);
2539 s->plane[plane_index].band[level][orientation].qlog= q;
2540 }
2541 }
2542 }
2543 }
2544
2545 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2546 if(s->spatial_decomposition_type > 2){
2547 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2548 return -1;
2549 }
2550
2551 s->qlog= get_symbol(&s->c, s->header_state, 1);
2552 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2553 s->qbias= get_symbol(&s->c, s->header_state, 1);
155ec6ed 2554 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
2555
2556 return 0;
2557}
2558
2559static int common_init(AVCodecContext *avctx){
2560 SnowContext *s = avctx->priv_data;
2561 int width, height;
2562 int level, orientation, plane_index, dec;
2563
2564 s->avctx= avctx;
2565
2566 dsputil_init(&s->dsp, avctx);
2567
2568#define mcf(dx,dy)\
2569 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
2570 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
d92b5807
MN
2571 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
2572 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
2573 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
2574 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
791e7b83
MN
2575
2576 mcf( 0, 0)
2577 mcf( 4, 0)
2578 mcf( 8, 0)
2579 mcf(12, 0)
2580 mcf( 0, 4)
2581 mcf( 4, 4)
2582 mcf( 8, 4)
2583 mcf(12, 4)
2584 mcf( 0, 8)
2585 mcf( 4, 8)
2586 mcf( 8, 8)
2587 mcf(12, 8)
2588 mcf( 0,12)
2589 mcf( 4,12)
2590 mcf( 8,12)
2591 mcf(12,12)
2592
2593#define mcfh(dx,dy)\
2594 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
2595 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
d92b5807
MN
2596 mc_block_hpel ## dx ## dy ## 16;\
2597 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
2598 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
2599 mc_block_hpel ## dx ## dy ## 8;
791e7b83
MN
2600
2601 mcfh(0, 0)
2602 mcfh(8, 0)
2603 mcfh(0, 8)
2604 mcfh(8, 8)
2605
2606 dec= s->spatial_decomposition_count= 5;
2607 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2608
2609 s->chroma_h_shift= 1; //FIXME XXX
2610 s->chroma_v_shift= 1;
2611
2612// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2613
155ec6ed
MN
2614 width= s->avctx->width;
2615 height= s->avctx->height;
2616
2617 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
791e7b83
MN
2618
2619 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
155ec6ed 2620 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
791e7b83
MN
2621
2622 for(plane_index=0; plane_index<3; plane_index++){
2623 int w= s->avctx->width;
2624 int h= s->avctx->height;
2625
2626 if(plane_index){
2627 w>>= s->chroma_h_shift;
2628 h>>= s->chroma_v_shift;
2629 }
2630 s->plane[plane_index].width = w;
2631 s->plane[plane_index].height= h;
3bb9f096 2632//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
791e7b83
MN
2633 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2634 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2635 SubBand *b= &s->plane[plane_index].band[level][orientation];
2636
2637 b->buf= s->spatial_dwt_buffer;
2638 b->level= level;
2639 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2640 b->width = (w + !(orientation&1))>>1;
2641 b->height= (h + !(orientation>1))>>1;
2642
2643 if(orientation&1) b->buf += (w+1)>>1;
2644 if(orientation>1) b->buf += b->stride>>1;
2645
2646 if(level)
2647 b->parent= &s->plane[plane_index].band[level-1][orientation];
0cea8a03
MN
2648 b->x = av_mallocz(((b->width+1) * b->height+1)*sizeof(int16_t));
2649 b->coeff= av_mallocz(((b->width+1) * b->height+1)*sizeof(DWTELEM));
791e7b83
MN
2650 }
2651 w= (w+1)>>1;
2652 h= (h+1)>>1;
2653 }
2654 }
2655
791e7b83
MN
2656 reset_contexts(s);
2657/*
2658 width= s->width= avctx->width;
2659 height= s->height= avctx->height;
2660
2661 assert(width && height);
2662*/
2663 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2664
2665 return 0;
2666}
2667
2668
2669static void calculate_vissual_weight(SnowContext *s, Plane *p){
2670 int width = p->width;
2671 int height= p->height;
39c61bbb 2672 int level, orientation, x, y;
791e7b83
MN
2673
2674 for(level=0; level<s->spatial_decomposition_count; level++){
2675 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2676 SubBand *b= &p->band[level][orientation];
2677 DWTELEM *buf= b->buf;
2678 int64_t error=0;
2679
2680 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2681 buf[b->width/2 + b->height/2*b->stride]= 256*256;
46c281e8 2682 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
2683 for(y=0; y<height; y++){
2684 for(x=0; x<width; x++){
2685 int64_t d= s->spatial_dwt_buffer[x + y*width];
2686 error += d*d;
2687 }
2688 }
2689
2690 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
46c281e8 2691// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
791e7b83
MN
2692 }
2693 }
2694}
2695
2696static int encode_init(AVCodecContext *avctx)
2697{
2698 SnowContext *s = avctx->priv_data;
39c61bbb 2699 int plane_index;
791e7b83 2700
2ff9ff5b
MN
2701 if(avctx->strict_std_compliance >= 0){
2702 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
53513831 2703 "use vstrict=-1 / -strict -1 to use it anyway\n");
2ff9ff5b
MN
2704 return -1;
2705 }
2706
791e7b83 2707 common_init(avctx);
155ec6ed 2708 alloc_blocks(s);
791e7b83
MN
2709
2710 s->version=0;
2711
2712 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2713 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2714 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
791e7b83
MN
2715 h263_encode_init(&s->m); //mv_penalty
2716
2717 for(plane_index=0; plane_index<3; plane_index++){
2718 calculate_vissual_weight(s, &s->plane[plane_index]);
2719 }
2720
2721
2722 avctx->coded_frame= &s->current_picture;
2723 switch(avctx->pix_fmt){
2724// case PIX_FMT_YUV444P:
2725// case PIX_FMT_YUV422P:
2726 case PIX_FMT_YUV420P:
2727 case PIX_FMT_GRAY8:
2728// case PIX_FMT_YUV411P:
2729// case PIX_FMT_YUV410P:
2730 s->colorspace_type= 0;
2731 break;
2732/* case PIX_FMT_RGBA32:
2733 s->colorspace= 1;
2734 break;*/
2735 default:
2736 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2737 return -1;
2738 }
2739// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2740 s->chroma_h_shift= 1;
2741 s->chroma_v_shift= 1;
2742 return 0;
2743}
2744
2745static int frame_start(SnowContext *s){
2746 AVFrame tmp;
64886072
MN
2747 int w= s->avctx->width; //FIXME round up to x16 ?
2748 int h= s->avctx->height;
791e7b83 2749
64886072
MN
2750 if(s->current_picture.data[0]){
2751 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
2752 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2753 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2754 }
2755
791e7b83
MN
2756 tmp= s->last_picture;
2757 s->last_picture= s->current_picture;
2758 s->current_picture= tmp;
2759
2760 s->current_picture.reference= 1;
2761 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2762 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2763 return -1;
2764 }
2765
2766 return 0;
2767}
2768
2769static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2770 SnowContext *s = avctx->priv_data;
28869757 2771 RangeCoder * const c= &s->c;
791e7b83
MN
2772 AVFrame *pict = data;
2773 const int width= s->avctx->width;
2774 const int height= s->avctx->height;
39c61bbb 2775 int level, orientation, plane_index;
791e7b83 2776
28869757
MN
2777 ff_init_range_encoder(c, buf, buf_size);
2778 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
791e7b83
MN
2779
2780 s->input_picture = *pict;
2781
791e7b83
MN
2782 s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2783 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2784
93fbdb5a
MN
2785 if(pict->quality){
2786 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2787 //<64 >60
2788 s->qlog += 61;
2789 }else{
2790 s->qlog= LOSSLESS_QLOG;
2791 }
791e7b83 2792
791e7b83 2793 frame_start(s);
19aa028d 2794 s->current_picture.key_frame= s->keyframe;
791e7b83
MN
2795
2796 if(pict->pict_type == P_TYPE){
2797 int block_width = (width +15)>>4;
2798 int block_height= (height+15)>>4;
2799 int stride= s->current_picture.linesize[0];
791e7b83
MN
2800
2801 assert(s->current_picture.data[0]);
2802 assert(s->last_picture.data[0]);
2803
2804 s->m.avctx= s->avctx;
2805 s->m.current_picture.data[0]= s->current_picture.data[0];
2806 s->m. last_picture.data[0]= s-> last_picture.data[0];
2807 s->m. new_picture.data[0]= s-> input_picture.data[0];
2808 s->m.current_picture_ptr= &s->m.current_picture;
2809 s->m. last_picture_ptr= &s->m. last_picture;
2810 s->m.linesize=
2811 s->m. last_picture.linesize[0]=
2812 s->m. new_picture.linesize[0]=
2813 s->m.current_picture.linesize[0]= stride;
155ec6ed 2814 s->m.uvlinesize= s->current_picture.linesize[1];
791e7b83
MN
2815 s->m.width = width;
2816 s->m.height= height;
2817 s->m.mb_width = block_width;
2818 s->m.mb_height= block_height;
2819 s->m.mb_stride= s->m.mb_width+1;
2820 s->m.b8_stride= 2*s->m.mb_width+1;
2821 s->m.f_code=1;
2822 s->m.pict_type= pict->pict_type;
2823 s->m.me_method= s->avctx->me_method;
2824 s->m.me.scene_change_score=0;
2825 s->m.flags= s->avctx->flags;
2826 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2827 s->m.out_format= FMT_H263;
2828 s->m.unrestricted_mv= 1;
2829
155ec6ed 2830 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
791e7b83 2831 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
155ec6ed 2832 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
791e7b83 2833
791e7b83
MN
2834 s->m.dsp= s->dsp; //move
2835 ff_init_me(&s->m);
791e7b83 2836 }
791e7b83 2837
155ec6ed
MN
2838redo_frame:
2839
791e7b83
MN
2840 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2841
2842 encode_header(s);
155ec6ed
MN
2843 encode_blocks(s);
2844
791e7b83
MN
2845 for(plane_index=0; plane_index<3; plane_index++){
2846 Plane *p= &s->plane[plane_index];
2847 int w= p->width;
2848 int h= p->height;
2849 int x, y;
39c61bbb 2850// int bits= put_bits_count(&s->c.pb);
791e7b83
MN
2851
2852 //FIXME optimize
791e7b83
MN
2853 if(pict->data[plane_index]) //FIXME gray hack
2854 for(y=0; y<h; y++){
2855 for(x=0; x<w; x++){
034aff03 2856 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
791e7b83
MN
2857 }
2858 }
2859 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
155ec6ed
MN
2860
2861 if( plane_index==0
2862 && pict->pict_type == P_TYPE
2863 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
28869757
MN
2864 ff_init_range_encoder(c, buf, buf_size);
2865 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
155ec6ed
MN
2866 pict->pict_type= FF_I_TYPE;
2867 s->keyframe=1;
2868 reset_contexts(s);
2869 goto redo_frame;
2870 }
2871
93fbdb5a
MN
2872 if(s->qlog == LOSSLESS_QLOG){
2873 for(y=0; y<h; y++){
2874 for(x=0; x<w; x++){
034aff03 2875 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1)))>>FRAC_BITS;
93fbdb5a
MN
2876 }
2877 }
2878 }
791e7b83 2879
46c281e8 2880 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a 2881
791e7b83
MN
2882 for(level=0; level<s->spatial_decomposition_count; level++){
2883 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2884 SubBand *b= &p->band[level][orientation];
2885
2886 quantize(s, b, b->buf, b->stride, s->qbias);
2887 if(orientation==0)
2888 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2889 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2890 assert(b->parent==NULL || b->parent->stride == b->stride*2);
2891 if(orientation==0)
2892 correlate(s, b, b->buf, b->stride, 1, 0);
2893 }
2894 }
2895// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2896
2897 for(level=0; level<s->spatial_decomposition_count; level++){
2898 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2899 SubBand *b= &p->band[level][orientation];
2900
2901 dequantize(s, b, b->buf, b->stride);
2902 }
2903 }
93fbdb5a 2904
46c281e8 2905 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a
MN
2906 if(s->qlog == LOSSLESS_QLOG){
2907 for(y=0; y<h; y++){
2908 for(x=0; x<w; x++){
034aff03 2909 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
93fbdb5a
MN
2910 }
2911 }
2912 }
715a97f0 2913{START_TIMER
791e7b83 2914 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
715a97f0 2915STOP_TIMER("pred-conv")}
791e7b83
MN
2916 if(s->avctx->flags&CODEC_FLAG_PSNR){
2917 int64_t error= 0;
2918
2919 if(pict->data[plane_index]) //FIXME gray hack
2920 for(y=0; y<h; y++){
2921 for(x=0; x<w; x++){
93fbdb5a 2922 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
791e7b83
MN
2923 error += d*d;
2924 }
2925 }
791e7b83 2926 s->avctx->error[plane_index] += error;
bd368b56 2927 s->current_picture.error[plane_index] = error;
791e7b83
MN
2928 }
2929 }
2930
2931 if(s->last_picture.data[0])
2932 avctx->release_buffer(avctx, &s->last_picture);
2933
2934 emms_c();
2935
28869757 2936 return ff_rac_terminate(c);
791e7b83
MN
2937}
2938
2939static void common_end(SnowContext *s){
7b49c309
MN
2940 int plane_index, level, orientation;
2941
791e7b83 2942 av_freep(&s->spatial_dwt_buffer);
791e7b83
MN
2943
2944 av_freep(&s->m.me.scratchpad);
2945 av_freep(&s->m.me.map);
2946 av_freep(&s->m.me.score_map);
155ec6ed
MN
2947
2948 av_freep(&s->block);
7b49c309
MN
2949
2950 for(plane_index=0; plane_index<3; plane_index++){
2951 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2952 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2953 SubBand *b= &s->plane[plane_index].band[level][orientation];
2954
2955 av_freep(&b->x);
0cea8a03 2956 av_freep(&b->coeff);
7b49c309
MN
2957 }
2958 }
2959 }
791e7b83
MN
2960}
2961
2962static int encode_end(AVCodecContext *avctx)
2963{
2964 SnowContext *s = avctx->priv_data;
2965
2966 common_end(s);
2967
2968 return 0;
2969}
2970
2971static int decode_init(AVCodecContext *avctx)
2972{
2973// SnowContext *s = avctx->priv_data;
2974
2975 common_init(avctx);
2976
2977 return 0;
2978}
2979
2980static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2981 SnowContext *s = avctx->priv_data;
28869757 2982 RangeCoder * const c= &s->c;
791e7b83
MN
2983 int bytes_read;
2984 AVFrame *picture = data;
39c61bbb 2985 int level, orientation, plane_index;
791e7b83 2986
28869757
MN
2987 ff_init_range_decoder(c, buf, buf_size);
2988 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
791e7b83 2989
791e7b83
MN
2990 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2991 decode_header(s);
155ec6ed 2992 if(!s->block) alloc_blocks(s);
791e7b83
MN
2993
2994 frame_start(s);
2995 //keyframe flag dupliaction mess FIXME
2996 if(avctx->debug&FF_DEBUG_PICT_INFO)
2997 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2998
155ec6ed 2999 decode_blocks(s);
791e7b83
MN
3000
3001 for(plane_index=0; plane_index<3; plane_index++){
3002 Plane *p= &s->plane[plane_index];
3003 int w= p->width;
3004 int h= p->height;
3005 int x, y;
3006
3007if(s->avctx->debug&2048){
3008 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3009 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3010
3011 for(y=0; y<h; y++){
3012 for(x=0; x<w; x++){
715a97f0 3013 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
791e7b83
MN
3014 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3015 }
3016 }
3017}
3018 for(level=0; level<s->spatial_decomposition_count; level++){
3019 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3020 SubBand *b= &p->band[level][orientation];
3021
3022 decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
ea7d9cd4 3023 if(orientation==0){
791e7b83 3024 correlate(s, b, b->buf, b->stride, 1, 0);
ea7d9cd4
MN
3025 dequantize(s, b, b->buf, b->stride);
3026 assert(b->buf == s->spatial_dwt_buffer);
3027 }
791e7b83
MN
3028 }
3029 }
3030
f9e6ebf7
LM
3031{START_TIMER
3032 const int mb_h= s->b_height << s->block_max_depth;
3033 const int block_size = MB_SIZE >> s->block_max_depth;
3034 const int block_w = plane_index ? block_size/2 : block_size;
3035 int mb_y;
3036 dwt_compose_t cs[MAX_DECOMPOSITIONS];
3037 int yd=0, yq=0;
3038
3039 ff_spatial_idwt_init(cs, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3040 for(mb_y=0; mb_y<=mb_h; mb_y++){
3041 const int slice_h = block_w*(mb_y+1);
3042 for(; yd<slice_h; yd+=4)
3043 ff_spatial_idwt_slice(cs, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3044
93fbdb5a 3045 if(s->qlog == LOSSLESS_QLOG){
f9e6ebf7 3046 for(; yq<slice_h && yq<h; yq++){
93fbdb5a 3047 for(x=0; x<w; x++){
f9e6ebf7 3048 s->spatial_dwt_buffer[yq*w + x]<<=FRAC_BITS;
93fbdb5a
MN
3049 }
3050 }
3051 }
f9e6ebf7
LM
3052
3053 predict_slice(s, s->spatial_dwt_buffer, plane_index, 1, mb_y);
3054 }
3055STOP_TIMER("idwt + predict_slices")}
791e7b83
MN
3056 }
3057
3058 emms_c();
3059
3060 if(s->last_picture.data[0])
3061 avctx->release_buffer(avctx, &s->last_picture);
3062
3063if(!(s->avctx->debug&2048))
3064 *picture= s->current_picture;
3065else
3066 *picture= s->mconly_picture;
3067
3068 *data_size = sizeof(AVFrame);
3069
28869757
MN
3070 bytes_read= c->bytestream - c->bytestream_start;
3071 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
791e7b83
MN
3072
3073 return bytes_read;
3074}
3075
3076static int decode_end(AVCodecContext *avctx)
3077{
3078 SnowContext *s = avctx->priv_data;
3079
3080 common_end(s);
3081
3082 return 0;
3083}
3084
3085AVCodec snow_decoder = {
3086 "snow",
3087 CODEC_TYPE_VIDEO,
3088 CODEC_ID_SNOW,
3089 sizeof(SnowContext),
3090 decode_init,
3091 NULL,
3092 decode_end,
3093 decode_frame,
3094 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3095 NULL
3096};
3097
8ff53f5b 3098#ifdef CONFIG_ENCODERS
791e7b83
MN
3099AVCodec snow_encoder = {
3100 "snow",
3101 CODEC_TYPE_VIDEO,
3102 CODEC_ID_SNOW,
3103 sizeof(SnowContext),
3104 encode_init,
3105 encode_frame,
3106 encode_end,
3107};
8ff53f5b 3108#endif
791e7b83
MN
3109
3110
3111#if 0
3112#undef malloc
3113#undef free
3114#undef printf
3115
3116int main(){
3117 int width=256;
3118 int height=256;
3119 int buffer[2][width*height];
3120 SnowContext s;
3121 int i;
3122 s.spatial_decomposition_count=6;
3123 s.spatial_decomposition_type=1;
3124
3125 printf("testing 5/3 DWT\n");
3126 for(i=0; i<width*height; i++)
3127 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3128
46c281e8
MN
3129 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3130 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3131
3132 for(i=0; i<width*height; i++)
3133 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3134
3135 printf("testing 9/7 DWT\n");
3136 s.spatial_decomposition_type=0;
3137 for(i=0; i<width*height; i++)
3138 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3139
46c281e8
MN
3140 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3141 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3142
3143 for(i=0; i<width*height; i++)
3144 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3145
3146 printf("testing AC coder\n");
3147 memset(s.header_state, 0, sizeof(s.header_state));
28869757 3148 ff_init_range_encoder(&s.c, buffer[0], 256*256);
791e7b83
MN
3149 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3150
3151 for(i=-256; i<256; i++){
3152START_TIMER
3153 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3154STOP_TIMER("put_symbol")
3155 }
28869757 3156 ff_rac_terminate(&s.c);
791e7b83
MN
3157
3158 memset(s.header_state, 0, sizeof(s.header_state));
28869757 3159 ff_init_range_decoder(&s.c, buffer[0], 256*256);
791e7b83
MN
3160 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3161
3162 for(i=-256; i<256; i++){
3163 int j;
3164START_TIMER
3165 j= get_symbol(&s.c, s.header_state, 1);
3166STOP_TIMER("get_symbol")
3167 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3168 }
3169{
3170int level, orientation, x, y;
3171int64_t errors[8][4];
3172int64_t g=0;
3173
3174 memset(errors, 0, sizeof(errors));
3175 s.spatial_decomposition_count=3;
3176 s.spatial_decomposition_type=0;
3177 for(level=0; level<s.spatial_decomposition_count; level++){
3178 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3179 int w= width >> (s.spatial_decomposition_count-level);
3180 int h= height >> (s.spatial_decomposition_count-level);
3181 int stride= width << (s.spatial_decomposition_count-level);
3182 DWTELEM *buf= buffer[0];
3183 int64_t error=0;
3184
3185 if(orientation&1) buf+=w;
3186 if(orientation>1) buf+=stride>>1;
3187
3188 memset(buffer[0], 0, sizeof(int)*width*height);
3189 buf[w/2 + h/2*stride]= 256*256;
46c281e8 3190 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3191 for(y=0; y<height; y++){
3192 for(x=0; x<width; x++){
3193 int64_t d= buffer[0][x + y*width];
3194 error += d*d;
3195 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3196 }
3197 if(ABS(height/2-y)<9 && level==2) printf("\n");
3198 }
3199 error= (int)(sqrt(error)+0.5);
3200 errors[level][orientation]= error;
3201 if(g) g=ff_gcd(g, error);
3202 else g= error;
3203 }
3204 }
3205 printf("static int const visual_weight[][4]={\n");
3206 for(level=0; level<s.spatial_decomposition_count; level++){
3207 printf(" {");
3208 for(orientation=0; orientation<4; orientation++){
3209 printf("%8lld,", errors[level][orientation]/g);
3210 }
3211 printf("},\n");
3212 }
3213 printf("};\n");
3214 {
3215 int level=2;
3216 int orientation=3;
3217 int w= width >> (s.spatial_decomposition_count-level);
3218 int h= height >> (s.spatial_decomposition_count-level);
3219 int stride= width << (s.spatial_decomposition_count-level);
3220 DWTELEM *buf= buffer[0];
3221 int64_t error=0;
3222
3223 buf+=w;
3224 buf+=stride>>1;
3225
3226 memset(buffer[0], 0, sizeof(int)*width*height);
3227#if 1
3228 for(y=0; y<height; y++){
3229 for(x=0; x<width; x++){
3230 int tab[4]={0,2,3,1};
3231 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3232 }
3233 }
46c281e8 3234 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3235#else
3236 for(y=0; y<h; y++){
3237 for(x=0; x<w; x++){
3238 buf[x + y*stride ]=169;
3239 buf[x + y*stride-w]=64;
3240 }
3241 }
46c281e8 3242 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3243#endif
3244 for(y=0; y<height; y++){
3245 for(x=0; x<width; x++){
3246 int64_t d= buffer[0][x + y*width];
3247 error += d*d;
3248 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3249 }
3250 if(ABS(height/2-y)<9) printf("\n");
3251 }
3252 }
3253
3254}
3255 return 0;
3256}
3257#endif
3258