range coder
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
22#include "cabac.h"
23
24#include "mpegvideo.h"
25
26#undef NDEBUG
27#include <assert.h>
28
29#define MAX_DECOMPOSITIONS 8
30#define MAX_PLANES 4
31#define DWTELEM int
32#define QROOT 8
93fbdb5a 33#define LOSSLESS_QLOG -128
034aff03 34#define FRAC_BITS 8
791e7b83
MN
35
36static const int8_t quant3[256]={
37 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
53};
54static const int8_t quant3b[256]={
55 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71};
72static const int8_t quant5[256]={
73 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
88-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
89};
90static const int8_t quant7[256]={
91 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
94 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
99-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
104-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
105-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
107};
108static const int8_t quant9[256]={
109 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
117-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
123-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
124-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
125};
126static const int8_t quant11[256]={
127 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
135-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
140-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
141-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
143};
144static const int8_t quant13[256]={
145 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
146 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
153-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
157-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
158-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
161};
162
715a97f0
MN
163#define LOG2_OBMC_MAX 6
164#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
791e7b83
MN
165#if 0 //64*cubic
166static const uint8_t obmc32[1024]={
167 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
168 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
169 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
170 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
171 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
172 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
173 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
174 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
175 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
176 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
177 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
178 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
179 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
180 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
181 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
182 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
183 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
184 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
185 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
186 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
187 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
188 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
189 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
190 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
191 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
192 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
193 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
195 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
196 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
197 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199//error:0.000022
200};
201static const uint8_t obmc16[256]={
202 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
203 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
204 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
205 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
206 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
207 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
208 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
209 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
210 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
211 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
212 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
213 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
214 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
215 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
216 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
217 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
218//error:0.000033
219};
220#elif 1 // 64*linear
221static const uint8_t obmc32[1024]={
222 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
223 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
224 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
225 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
226 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
227 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
228 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
229 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
230 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
231 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
232 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
233 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
234 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
235 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
236 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
237 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
238 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
239 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
240 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
241 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
242 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
243 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
244 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
245 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
246 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
247 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
248 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
250 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
251 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
252 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
253 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
254 //error:0.000020
255};
256static const uint8_t obmc16[256]={
257 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
258 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
259 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
260 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
261 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
262 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
263 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
264 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
265 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
266 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
267 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
268 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
269 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
270 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
271 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
272 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
273//error:0.000015
274};
275#else //64*cos
276static const uint8_t obmc32[1024]={
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
279 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
280 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
281 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
282 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
283 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
284 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
285 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
286 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
287 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
288 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
289 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
290 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
291 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
292 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
293 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
294 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
295 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
296 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
297 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
298 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
299 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
300 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
301 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
302 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
303 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
305 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
306 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
307 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
308 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
309//error:0.000022
310};
311static const uint8_t obmc16[256]={
312 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
313 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
314 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
315 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
316 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
317 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
318 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
319 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
320 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
321 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
322 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
323 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
324 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
325 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
326 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
327 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
328//error:0.000022
329};
330#endif
331
155ec6ed
MN
332//linear *64
333static const uint8_t obmc8[64]={
334 1, 3, 5, 7, 7, 5, 3, 1,
335 3, 9,15,21,21,15, 9, 3,
336 5,15,25,35,35,25,15, 5,
337 7,21,35,49,49,35,21, 7,
338 7,21,35,49,49,35,21, 7,
339 5,15,25,35,35,25,15, 5,
340 3, 9,15,21,21,15, 9, 3,
341 1, 3, 5, 7, 7, 5, 3, 1,
342//error:0.000000
343};
344
345//linear *64
346static const uint8_t obmc4[16]={
347 4,12,12, 4,
34812,36,36,12,
34912,36,36,12,
350 4,12,12, 4,
351//error:0.000000
352};
353
354static const uint8_t *obmc_tab[4]={
355 obmc32, obmc16, obmc8, obmc4
356};
357
358typedef struct BlockNode{
359 int16_t mx;
360 int16_t my;
361 uint8_t color[3];
362 uint8_t type;
363//#define TYPE_SPLIT 1
364#define BLOCK_INTRA 1
365//#define TYPE_NOCOLOR 4
366 uint8_t level; //FIXME merge into type?
367}BlockNode;
368
369#define LOG2_MB_SIZE 4
370#define MB_SIZE (1<<LOG2_MB_SIZE)
371
791e7b83
MN
372typedef struct SubBand{
373 int level;
374 int stride;
375 int width;
376 int height;
377 int qlog; ///< log(qscale)/log[2^(1/6)]
378 DWTELEM *buf;
7b49c309 379 int16_t *x;
0cea8a03 380 DWTELEM *coeff;
791e7b83
MN
381 struct SubBand *parent;
382 uint8_t state[/*7*2*/ 7 + 512][32];
383}SubBand;
384
385typedef struct Plane{
386 int width;
387 int height;
388 SubBand band[MAX_DECOMPOSITIONS][4];
389}Plane;
390
391typedef struct SnowContext{
392// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
393
394 AVCodecContext *avctx;
395 CABACContext c;
396 DSPContext dsp;
397 AVFrame input_picture;
398 AVFrame current_picture;
399 AVFrame last_picture;
400 AVFrame mconly_picture;
401// uint8_t q_context[16];
402 uint8_t header_state[32];
155ec6ed 403 uint8_t block_state[128 + 32*128];
791e7b83 404 int keyframe;
19aa028d 405 int always_reset;
791e7b83
MN
406 int version;
407 int spatial_decomposition_type;
408 int temporal_decomposition_type;
409 int spatial_decomposition_count;
410 int temporal_decomposition_count;
411 DWTELEM *spatial_dwt_buffer;
791e7b83
MN
412 int colorspace_type;
413 int chroma_h_shift;
414 int chroma_v_shift;
415 int spatial_scalability;
416 int qlog;
155ec6ed
MN
417 int lambda;
418 int lambda2;
791e7b83
MN
419 int mv_scale;
420 int qbias;
421#define QBIAS_SHIFT 3
155ec6ed
MN
422 int b_width;
423 int b_height;
424 int block_max_depth;
791e7b83 425 Plane plane[MAX_PLANES];
155ec6ed
MN
426 BlockNode *block;
427
791e7b83
MN
428 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
429}SnowContext;
430
034aff03 431#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
791e7b83
MN
432static const uint8_t qexp[8]={
433 128, 140, 152, 166, 181, 197, 215, 235
434// 64, 70, 76, 83, 91, 99, 108, 117
435// 32, 35, 38, 41, 45, 49, 54, 59
436// 16, 17, 19, 21, 23, 25, 27, 29
437// 8, 9, 10, 10, 11, 12, 13, 15
438};
439
440static inline int mirror(int v, int m){
441 if (v<0) return -v;
442 else if(v>m) return 2*m-v;
443 else return v;
444}
445
446static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
447 int i;
448
449 if(v){
450 const int a= ABS(v);
451 const int e= av_log2(a);
452#if 1
453 const int el= FFMIN(e, 10);
454 put_cabac(c, state+0, 0);
455
456 for(i=0; i<el; i++){
457 put_cabac(c, state+1+i, 1); //1..10
458 }
459 for(; i<e; i++){
460 put_cabac(c, state+1+9, 1); //1..10
461 }
462 put_cabac(c, state+1+FFMIN(i,9), 0);
463
464 for(i=e-1; i>=el; i--){
465 put_cabac(c, state+22+9, (a>>i)&1); //22..31
466 }
467 for(; i>=0; i--){
468 put_cabac(c, state+22+i, (a>>i)&1); //22..31
469 }
470
471 if(is_signed)
472 put_cabac(c, state+11 + el, v < 0); //11..21
473#else
474
475 put_cabac(c, state+0, 0);
476 if(e<=9){
477 for(i=0; i<e; i++){
478 put_cabac(c, state+1+i, 1); //1..10
479 }
480 put_cabac(c, state+1+i, 0);
481
482 for(i=e-1; i>=0; i--){
483 put_cabac(c, state+22+i, (a>>i)&1); //22..31
484 }
485
486 if(is_signed)
487 put_cabac(c, state+11 + e, v < 0); //11..21
488 }else{
489 for(i=0; i<e; i++){
490 put_cabac(c, state+1+FFMIN(i,9), 1); //1..10
491 }
492 put_cabac(c, state+1+FFMIN(i,9), 0);
493
494 for(i=e-1; i>=0; i--){
495 put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
496 }
497
498 if(is_signed)
499 put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
500 }
501#endif
502 }else{
503 put_cabac(c, state+0, 1);
504 }
505}
506
507static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
508 if(get_cabac(c, state+0))
509 return 0;
510 else{
7c2425d2
LM
511 int i, e, a;
512 e= 0;
513 while(get_cabac(c, state+1 + FFMIN(e,9))){ //1..10
514 e++;
791e7b83 515 }
7c2425d2 516
791e7b83 517 a= 1;
7c2425d2
LM
518 for(i=e-1; i>=0; i--){
519 a += a + get_cabac(c, state+22 + FFMIN(i,9)); //22..31
791e7b83
MN
520 }
521
7c2425d2 522 if(is_signed && get_cabac(c, state+11 + FFMIN(e,10))) //11..21
791e7b83
MN
523 return -a;
524 else
525 return a;
526 }
527}
528
4f4e9633
MN
529static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
530 int i;
0635cbfc 531 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
532
533 assert(v>=0);
0635cbfc
MN
534 assert(log2>=-4);
535
536 while(v >= r){
537 put_cabac(c, state+4+log2, 1);
538 v -= r;
4f4e9633 539 log2++;
0635cbfc 540 if(log2>0) r+=r;
4f4e9633 541 }
0635cbfc 542 put_cabac(c, state+4+log2, 0);
4f4e9633
MN
543
544 for(i=log2-1; i>=0; i--){
545 put_cabac(c, state+31-i, (v>>i)&1);
546 }
4f4e9633
MN
547}
548
549static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
550 int i;
0635cbfc 551 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
552 int v=0;
553
0635cbfc
MN
554 assert(log2>=-4);
555
556 while(get_cabac(c, state+4+log2)){
557 v+= r;
4f4e9633 558 log2++;
0635cbfc 559 if(log2>0) r+=r;
4f4e9633
MN
560 }
561
562 for(i=log2-1; i>=0; i--){
563 v+= get_cabac(c, state+31-i)<<i;
564 }
565
566 return v;
567}
568
791e7b83
MN
569static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
570 const int mirror_left= !highpass;
571 const int mirror_right= (width&1) ^ highpass;
572 const int w= (width>>1) - 1 + (highpass & width);
573 int i;
574
575#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
576 if(mirror_left){
577 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
578 dst += dst_step;
579 src += src_step;
580 }
581
582 for(i=0; i<w; i++){
583 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
584 }
585
586 if(mirror_right){
587 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
588 }
589}
590
591static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
592 const int mirror_left= !highpass;
593 const int mirror_right= (width&1) ^ highpass;
594 const int w= (width>>1) - 1 + (highpass & width);
595 int i;
596
597 if(mirror_left){
598 int r= 3*2*ref[0];
599 r += r>>4;
600 r += r>>8;
601 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
602 dst += dst_step;
603 src += src_step;
604 }
605
606 for(i=0; i<w; i++){
607 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
608 r += r>>4;
609 r += r>>8;
610 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
611 }
612
613 if(mirror_right){
614 int r= 3*2*ref[w*ref_step];
615 r += r>>4;
616 r += r>>8;
617 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
618 }
619}
620
621
aa25a462 622static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
623 int x, i;
624
625 for(x=start; x<width; x+=2){
626 int64_t sum=0;
627
628 for(i=0; i<n; i++){
629 int x2= x + 2*i - n + 1;
630 if (x2< 0) x2= -x2;
631 else if(x2>=width) x2= 2*width-x2-2;
632 sum += coeffs[i]*(int64_t)dst[x2];
633 }
634 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
635 else dst[x] += (sum + (1<<shift)/2)>>shift;
636 }
637}
638
aa25a462 639static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
791e7b83
MN
640 int x, y, i;
641 for(y=start; y<height; y+=2){
642 for(x=0; x<width; x++){
643 int64_t sum=0;
644
645 for(i=0; i<n; i++){
646 int y2= y + 2*i - n + 1;
647 if (y2< 0) y2= -y2;
648 else if(y2>=height) y2= 2*height-y2-2;
649 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
650 }
651 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
652 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
653 }
654 }
655}
656
657#define SCALEX 1
658#define LX0 0
659#define LX1 1
660
de890c9b 661#if 0 // more accurate 9/7
791e7b83
MN
662#define N1 2
663#define SHIFT1 14
664#define COEFFS1 (int[]){-25987,-25987}
665#define N2 2
666#define SHIFT2 19
667#define COEFFS2 (int[]){-27777,-27777}
668#define N3 2
669#define SHIFT3 15
670#define COEFFS3 (int[]){28931,28931}
671#define N4 2
672#define SHIFT4 15
673#define COEFFS4 (int[]){14533,14533}
674#elif 1 // 13/7 CRF
675#define N1 4
676#define SHIFT1 4
677#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 678#define N2 4
791e7b83
MN
679#define SHIFT2 4
680#define COEFFS2 (int[]){-1,5,5,-1}
681#define N3 0
682#define SHIFT3 1
683#define COEFFS3 NULL
684#define N4 0
685#define SHIFT4 1
686#define COEFFS4 NULL
687#elif 1 // 3/5
688#define LX0 1
689#define LX1 0
690#define SCALEX 0.5
691#define N1 2
692#define SHIFT1 1
693#define COEFFS1 (int[]){1,1}
694#define N2 2
695#define SHIFT2 2
696#define COEFFS2 (int[]){-1,-1}
697#define N3 0
698#define SHIFT3 0
699#define COEFFS3 NULL
700#define N4 0
701#define SHIFT4 0
702#define COEFFS4 NULL
703#elif 1 // 11/5
704#define N1 0
705#define SHIFT1 1
706#define COEFFS1 NULL
707#define N2 2
708#define SHIFT2 2
709#define COEFFS2 (int[]){-1,-1}
710#define N3 2
711#define SHIFT3 0
712#define COEFFS3 (int[]){-1,-1}
713#define N4 4
714#define SHIFT4 7
715#define COEFFS4 (int[]){-5,29,29,-5}
716#define SCALEX 4
717#elif 1 // 9/7 CDF
718#define N1 2
719#define SHIFT1 7
720#define COEFFS1 (int[]){-203,-203}
721#define N2 2
722#define SHIFT2 12
723#define COEFFS2 (int[]){-217,-217}
724#define N3 2
725#define SHIFT3 7
726#define COEFFS3 (int[]){113,113}
727#define N4 2
728#define SHIFT4 9
729#define COEFFS4 (int[]){227,227}
730#define SCALEX 1
731#elif 1 // 7/5 CDF
732#define N1 0
733#define SHIFT1 1
734#define COEFFS1 NULL
735#define N2 2
736#define SHIFT2 2
737#define COEFFS2 (int[]){-1,-1}
738#define N3 2
739#define SHIFT3 0
740#define COEFFS3 (int[]){-1,-1}
741#define N4 2
742#define SHIFT4 4
743#define COEFFS4 (int[]){3,3}
744#elif 1 // 9/7 MN
745#define N1 4
746#define SHIFT1 4
747#define COEFFS1 (int[]){1,-9,-9,1}
748#define N2 2
749#define SHIFT2 2
750#define COEFFS2 (int[]){1,1}
751#define N3 0
752#define SHIFT3 1
753#define COEFFS3 NULL
754#define N4 0
755#define SHIFT4 1
756#define COEFFS4 NULL
757#else // 13/7 CRF
758#define N1 4
759#define SHIFT1 4
760#define COEFFS1 (int[]){1,-9,-9,1}
761#define N2 4
762#define SHIFT2 4
763#define COEFFS2 (int[]){-1,5,5,-1}
764#define N3 0
765#define SHIFT3 1
766#define COEFFS3 NULL
767#define N4 0
768#define SHIFT4 1
769#define COEFFS4 NULL
770#endif
aa25a462
RFI
771static void horizontal_decomposeX(DWTELEM *b, int width){
772 DWTELEM temp[width];
791e7b83
MN
773 const int width2= width>>1;
774 const int w2= (width+1)>>1;
775 int A1,A2,A3,A4, x;
776
777 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
778 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
779 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
780 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
781
782 for(x=0; x<width2; x++){
783 temp[x ]= b[2*x ];
784 temp[x+w2]= b[2*x + 1];
785 }
786 if(width&1)
787 temp[x ]= b[2*x ];
788 memcpy(b, temp, width*sizeof(int));
789}
790
aa25a462
RFI
791static void horizontal_composeX(DWTELEM *b, int width){
792 DWTELEM temp[width];
791e7b83
MN
793 const int width2= width>>1;
794 int A1,A2,A3,A4, x;
795 const int w2= (width+1)>>1;
796
797 memcpy(temp, b, width*sizeof(int));
798 for(x=0; x<width2; x++){
799 b[2*x ]= temp[x ];
800 b[2*x + 1]= temp[x+w2];
801 }
802 if(width&1)
803 b[2*x ]= temp[x ];
804
805 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
806 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
807 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
808 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
809}
810
aa25a462 811static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83
MN
812 int x, y;
813
814 for(y=0; y<height; y++){
815 for(x=0; x<width; x++){
816 buffer[y*stride + x] *= SCALEX;
817 }
818 }
819
820 for(y=0; y<height; y++){
821 horizontal_decomposeX(buffer + y*stride, width);
822 }
823
824 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
825 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
826 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
827 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
828}
829
aa25a462 830static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
791e7b83
MN
831 int x, y;
832
833 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
834 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
835 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
836 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
837
838 for(y=0; y<height; y++){
839 horizontal_composeX(buffer + y*stride, width);
840 }
841
842 for(y=0; y<height; y++){
843 for(x=0; x<width; x++){
844 buffer[y*stride + x] /= SCALEX;
845 }
846 }
847}
848
aa25a462
RFI
849static void horizontal_decompose53i(DWTELEM *b, int width){
850 DWTELEM temp[width];
791e7b83
MN
851 const int width2= width>>1;
852 int A1,A2,A3,A4, x;
853 const int w2= (width+1)>>1;
854
855 for(x=0; x<width2; x++){
856 temp[x ]= b[2*x ];
857 temp[x+w2]= b[2*x + 1];
858 }
859 if(width&1)
860 temp[x ]= b[2*x ];
861#if 0
862 A2= temp[1 ];
863 A4= temp[0 ];
864 A1= temp[0+width2];
865 A1 -= (A2 + A4)>>1;
866 A4 += (A1 + 1)>>1;
867 b[0+width2] = A1;
868 b[0 ] = A4;
869 for(x=1; x+1<width2; x+=2){
870 A3= temp[x+width2];
871 A4= temp[x+1 ];
872 A3 -= (A2 + A4)>>1;
873 A2 += (A1 + A3 + 2)>>2;
874 b[x+width2] = A3;
875 b[x ] = A2;
876
877 A1= temp[x+1+width2];
878 A2= temp[x+2 ];
879 A1 -= (A2 + A4)>>1;
880 A4 += (A1 + A3 + 2)>>2;
881 b[x+1+width2] = A1;
882 b[x+1 ] = A4;
883 }
884 A3= temp[width-1];
885 A3 -= A2;
886 A2 += (A1 + A3 + 2)>>2;
887 b[width -1] = A3;
888 b[width2-1] = A2;
889#else
890 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
891 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
892#endif
893}
894
aa25a462 895static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
896 int i;
897
898 for(i=0; i<width; i++){
899 b1[i] -= (b0[i] + b2[i])>>1;
900 }
901}
902
aa25a462 903static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
904 int i;
905
906 for(i=0; i<width; i++){
907 b1[i] += (b0[i] + b2[i] + 2)>>2;
908 }
909}
910
aa25a462 911static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 912 int y;
791e7b83
MN
913 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
914 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
915
916 for(y=-2; y<height; y+=2){
917 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
918 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
919
920{START_TIMER
921 if(b1 <= b3) horizontal_decompose53i(b2, width);
922 if(y+2 < height) horizontal_decompose53i(b3, width);
923STOP_TIMER("horizontal_decompose53i")}
924
925{START_TIMER
926 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
927 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
928STOP_TIMER("vertical_decompose53i*")}
929
930 b0=b2;
931 b1=b3;
932 }
933}
934
935#define lift5 lift
936#if 1
937#define W_AM 3
938#define W_AO 0
939#define W_AS 1
940
941#define W_BM 1
942#define W_BO 8
943#define W_BS 4
944
945#undef lift5
946#define W_CM 9999
947#define W_CO 2
948#define W_CS 2
949
950#define W_DM 15
951#define W_DO 16
952#define W_DS 5
953#elif 0
954#define W_AM 55
955#define W_AO 16
956#define W_AS 5
957
958#define W_BM 3
959#define W_BO 32
960#define W_BS 6
961
962#define W_CM 127
963#define W_CO 64
964#define W_CS 7
965
966#define W_DM 7
967#define W_DO 8
968#define W_DS 4
969#elif 0
970#define W_AM 97
971#define W_AO 32
972#define W_AS 6
973
974#define W_BM 63
975#define W_BO 512
976#define W_BS 10
977
978#define W_CM 13
979#define W_CO 8
980#define W_CS 4
981
982#define W_DM 15
983#define W_DO 16
984#define W_DS 5
985
986#else
987
988#define W_AM 203
989#define W_AO 64
990#define W_AS 7
991
992#define W_BM 217
993#define W_BO 2048
994#define W_BS 12
995
996#define W_CM 113
997#define W_CO 64
998#define W_CS 7
999
1000#define W_DM 227
1001#define W_DO 128
1002#define W_DS 9
1003#endif
aa25a462
RFI
1004static void horizontal_decompose97i(DWTELEM *b, int width){
1005 DWTELEM temp[width];
791e7b83
MN
1006 const int w2= (width+1)>>1;
1007
1008 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1009 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1010 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1011 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1012}
1013
1014
aa25a462 1015static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1016 int i;
1017
1018 for(i=0; i<width; i++){
1019 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1020 }
1021}
1022
aa25a462 1023static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1024 int i;
1025
1026 for(i=0; i<width; i++){
1027#ifdef lift5
1028 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1029#else
1030 int r= 3*(b0[i] + b2[i]);
1031 r+= r>>4;
1032 r+= r>>8;
1033 b1[i] += (r+W_CO)>>W_CS;
1034#endif
1035 }
1036}
1037
aa25a462 1038static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1039 int i;
1040
1041 for(i=0; i<width; i++){
1042 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1043 }
1044}
1045
aa25a462 1046static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1047 int i;
1048
1049 for(i=0; i<width; i++){
1050 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1051 }
1052}
1053
aa25a462 1054static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1055 int y;
791e7b83
MN
1056 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1057 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1058 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1059 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1060
1061 for(y=-4; y<height; y+=2){
1062 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1063 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1064
1065{START_TIMER
1066 if(b3 <= b5) horizontal_decompose97i(b4, width);
1067 if(y+4 < height) horizontal_decompose97i(b5, width);
1068if(width>400){
1069STOP_TIMER("horizontal_decompose97i")
1070}}
1071
1072{START_TIMER
1073 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1074 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1075 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1076 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1077
1078if(width>400){
1079STOP_TIMER("vertical_decompose97i")
1080}}
1081
1082 b0=b2;
1083 b1=b3;
1084 b2=b4;
1085 b3=b5;
1086 }
1087}
1088
aa25a462 1089void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83
MN
1090 int level;
1091
46c281e8
MN
1092 for(level=0; level<decomposition_count; level++){
1093 switch(type){
791e7b83
MN
1094 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1095 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1096 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1097 }
1098 }
1099}
1100
aa25a462
RFI
1101static void horizontal_compose53i(DWTELEM *b, int width){
1102 DWTELEM temp[width];
791e7b83
MN
1103 const int width2= width>>1;
1104 const int w2= (width+1)>>1;
1105 int A1,A2,A3,A4, x;
1106
1107#if 0
1108 A2= temp[1 ];
1109 A4= temp[0 ];
1110 A1= temp[0+width2];
1111 A1 -= (A2 + A4)>>1;
1112 A4 += (A1 + 1)>>1;
1113 b[0+width2] = A1;
1114 b[0 ] = A4;
1115 for(x=1; x+1<width2; x+=2){
1116 A3= temp[x+width2];
1117 A4= temp[x+1 ];
1118 A3 -= (A2 + A4)>>1;
1119 A2 += (A1 + A3 + 2)>>2;
1120 b[x+width2] = A3;
1121 b[x ] = A2;
1122
1123 A1= temp[x+1+width2];
1124 A2= temp[x+2 ];
1125 A1 -= (A2 + A4)>>1;
1126 A4 += (A1 + A3 + 2)>>2;
1127 b[x+1+width2] = A1;
1128 b[x+1 ] = A4;
1129 }
1130 A3= temp[width-1];
1131 A3 -= A2;
1132 A2 += (A1 + A3 + 2)>>2;
1133 b[width -1] = A3;
1134 b[width2-1] = A2;
1135#else
1136 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1137 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1138#endif
1139 for(x=0; x<width2; x++){
1140 b[2*x ]= temp[x ];
1141 b[2*x + 1]= temp[x+w2];
1142 }
1143 if(width&1)
1144 b[2*x ]= temp[x ];
1145}
1146
aa25a462 1147static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1148 int i;
1149
1150 for(i=0; i<width; i++){
1151 b1[i] += (b0[i] + b2[i])>>1;
1152 }
1153}
1154
aa25a462 1155static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1156 int i;
1157
1158 for(i=0; i<width; i++){
1159 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1160 }
1161}
1162
aa25a462 1163static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1164 int y;
791e7b83
MN
1165 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1166 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride;
1167
1168 for(y=-1; y<=height; y+=2){
1169 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1170 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1171
1172{START_TIMER
1173 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1174 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1175STOP_TIMER("vertical_compose53i*")}
1176
1177{START_TIMER
1178 if(y-1 >= 0) horizontal_compose53i(b0, width);
1179 if(b0 <= b2) horizontal_compose53i(b1, width);
1180STOP_TIMER("horizontal_compose53i")}
1181
1182 b0=b2;
1183 b1=b3;
1184 }
1185}
1186
1187
aa25a462
RFI
1188static void horizontal_compose97i(DWTELEM *b, int width){
1189 DWTELEM temp[width];
791e7b83
MN
1190 const int w2= (width+1)>>1;
1191
1192 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1193 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1194 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1195 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1196}
1197
aa25a462 1198static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1199 int i;
1200
1201 for(i=0; i<width; i++){
1202 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1203 }
1204}
1205
aa25a462 1206static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1207 int i;
1208
1209 for(i=0; i<width; i++){
1210#ifdef lift5
1211 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1212#else
1213 int r= 3*(b0[i] + b2[i]);
1214 r+= r>>4;
1215 r+= r>>8;
1216 b1[i] -= (r+W_CO)>>W_CS;
1217#endif
1218 }
1219}
1220
aa25a462 1221static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1222 int i;
1223
1224 for(i=0; i<width; i++){
1225 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1226 }
1227}
1228
aa25a462 1229static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
791e7b83
MN
1230 int i;
1231
1232 for(i=0; i<width; i++){
1233 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1234 }
1235}
1236
aa25a462 1237static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
39c61bbb 1238 int y;
791e7b83
MN
1239 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1240 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride;
1241 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1242 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1243
1244 for(y=-3; y<=height; y+=2){
1245 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1246 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1247
1248 if(stride == width && y+4 < height && 0){
1249 int x;
1250 for(x=0; x<width/2; x++)
1251 b5[x] += 64*2;
1252 for(; x<width; x++)
1253 b5[x] += 169*2;
1254 }
1255
1256{START_TIMER
1257 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1258 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1259 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1260 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1261if(width>400){
1262STOP_TIMER("vertical_compose97i")}}
1263
1264{START_TIMER
1265 if(y-1>= 0) horizontal_compose97i(b0, width);
1266 if(b0 <= b2) horizontal_compose97i(b1, width);
1267if(width>400 && b0 <= b2){
1268STOP_TIMER("horizontal_compose97i")}}
1269
1270 b0=b2;
1271 b1=b3;
1272 b2=b4;
1273 b3=b5;
1274 }
1275}
1276
aa25a462 1277void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83
MN
1278 int level;
1279
46c281e8
MN
1280 for(level=decomposition_count-1; level>=0; level--){
1281 switch(type){
791e7b83
MN
1282 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1283 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1284 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break;
1285 }
1286 }
1287}
1288
4f4e9633 1289static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1290 const int w= b->width;
1291 const int h= b->height;
1292 int x, y;
1293
791e7b83 1294 if(1){
791e7b83 1295 int run=0;
a8d73e56 1296 int runs[w*h];
791e7b83
MN
1297 int run_index=0;
1298
791e7b83
MN
1299 for(y=0; y<h; y++){
1300 for(x=0; x<w; x++){
78486403 1301 int v, p=0;
6b2f6646 1302 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1303 v= src[x + y*stride];
791e7b83
MN
1304
1305 if(y){
a8d73e56 1306 t= src[x + (y-1)*stride];
791e7b83 1307 if(x){
a8d73e56 1308 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1309 }
1310 if(x + 1 < w){
a8d73e56 1311 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1312 }
1313 }
1314 if(x){
a8d73e56 1315 l= src[x - 1 + y*stride];
6b2f6646
MN
1316 /*if(x > 1){
1317 if(orientation==1) ll= src[y + (x-2)*stride];
1318 else ll= src[x - 2 + y*stride];
791e7b83
MN
1319 }*/
1320 }
78486403 1321 if(parent){
a8d73e56
MN
1322 int px= x>>1;
1323 int py= y>>1;
78486403
MN
1324 if(px<b->parent->width && py<b->parent->height)
1325 p= parent[px + py*2*stride];
1326 }
1327 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1328 if(v){
1329 runs[run_index++]= run;
1330 run=0;
1331 }else{
1332 run++;
1333 }
1334 }
1335 }
1336 }
1337 runs[run_index++]= run;
1338 run_index=0;
1339 run= runs[run_index++];
1340
4f4e9633 1341 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1342
1343 for(y=0; y<h; y++){
1344 for(x=0; x<w; x++){
78486403 1345 int v, p=0;
6b2f6646 1346 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1347 v= src[x + y*stride];
791e7b83
MN
1348
1349 if(y){
a8d73e56 1350 t= src[x + (y-1)*stride];
791e7b83 1351 if(x){
a8d73e56 1352 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1353 }
1354 if(x + 1 < w){
a8d73e56 1355 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1356 }
1357 }
1358 if(x){
a8d73e56 1359 l= src[x - 1 + y*stride];
6b2f6646
MN
1360 /*if(x > 1){
1361 if(orientation==1) ll= src[y + (x-2)*stride];
1362 else ll= src[x - 2 + y*stride];
791e7b83
MN
1363 }*/
1364 }
78486403 1365 if(parent){
a8d73e56
MN
1366 int px= x>>1;
1367 int py= y>>1;
78486403
MN
1368 if(px<b->parent->width && py<b->parent->height)
1369 p= parent[px + py*2*stride];
1370 }
1371 if(/*ll|*/l|lt|t|rt|p){
1372 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646
MN
1373
1374 put_cabac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1375 }else{
1376 if(!run){
1377 run= runs[run_index++];
4f4e9633
MN
1378
1379 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1380 assert(v);
1381 }else{
1382 run--;
1383 assert(!v);
1384 }
1385 }
1386 if(v){
78486403 1387 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1388
0635cbfc 1389 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
791e7b83
MN
1390 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1391 }
1392 }
1393 }
791e7b83 1394 }
791e7b83
MN
1395}
1396
4f4e9633
MN
1397static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1398// encode_subband_qtree(s, b, src, parent, stride, orientation);
1399// encode_subband_z0run(s, b, src, parent, stride, orientation);
1400 encode_subband_c0run(s, b, src, parent, stride, orientation);
1401// encode_subband_dzr(s, b, src, parent, stride, orientation);
1402}
1403
a8d73e56 1404static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
791e7b83
MN
1405 const int w= b->width;
1406 const int h= b->height;
1407 int x,y;
ea7d9cd4
MN
1408 const int qlog= clip(s->qlog + b->qlog, 0, 128);
1409 int qmul= qexp[qlog&7]<<(qlog>>3);
1410 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1411
fff6d4ea
MN
1412 START_TIMER
1413
ea7d9cd4
MN
1414 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1415 qadd= 0;
1416 qmul= 1<<QEXPSHIFT;
1417 }
791e7b83 1418
791e7b83 1419 if(1){
791e7b83 1420 int run;
7b49c309
MN
1421 int index=0;
1422 int prev_index=-1;
1423 int prev2_index=0;
1424 int parent_index= 0;
1425 int prev_parent_index= 0;
1426
791e7b83
MN
1427 for(y=0; y<b->height; y++)
1428 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1429
4f4e9633 1430 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83 1431 for(y=0; y<h; y++){
0cea8a03
MN
1432 int v=0;
1433 int lt=0, t=0, rt=0;
1434
ff765159
MN
1435 if(y && b->x[prev_index] == 0){
1436 rt= b->coeff[prev_index];
0cea8a03 1437 }
791e7b83 1438 for(x=0; x<w; x++){
0cea8a03
MN
1439 int p=0;
1440 const int l= v;
1441
1442 lt= t; t= rt;
791e7b83 1443
ff765159 1444 if(y){
ea7d9cd4 1445 if(b->x[prev_index] <= x)
ff765159
MN
1446 prev_index++;
1447 if(b->x[prev_index] == x + 1)
1448 rt= b->coeff[prev_index];
1449 else
1450 rt=0;
1451 }
78486403 1452 if(parent){
7b49c309
MN
1453 if(x>>1 > b->parent->x[parent_index]){
1454 parent_index++;
1455 }
ff765159
MN
1456 if(x>>1 == b->parent->x[parent_index]){
1457 p= b->parent->coeff[parent_index];
1458 }
78486403
MN
1459 }
1460 if(/*ll|*/l|lt|t|rt|p){
1461 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646
MN
1462
1463 v=get_cabac(&s->c, &b->state[0][context]);
791e7b83
MN
1464 }else{
1465 if(!run){
4f4e9633 1466 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83
MN
1467 v=1;
1468 }else{
1469 run--;
1470 v=0;
3c1adccd 1471
7b49c309
MN
1472 if(y && parent){
1473 int max_run;
7b49c309
MN
1474
1475 max_run= FFMIN(run, b->x[prev_index] - x - 2);
1476 max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1477 x+= max_run;
1478 run-= max_run;
3c1adccd 1479 }
791e7b83
MN
1480 }
1481 }
1482 if(v){
78486403 1483 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
0635cbfc 1484 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
ea7d9cd4
MN
1485 if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1486 src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
791e7b83 1487 v= -v;
ea7d9cd4
MN
1488 }else{
1489 src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1490 }
ff765159
MN
1491 b->x[index]=x; //FIXME interleave x/coeff
1492 b->coeff[index++]= v;
7b49c309
MN
1493 }
1494 }
1495 b->x[index++]= w+1; //end marker
1496 prev_index= prev2_index;
1497 prev2_index= index;
1498
1499 if(parent){
1500 while(b->parent->x[parent_index] != b->parent->width+1)
1501 parent_index++;
1502 parent_index++;
1503 if(y&1){
1504 prev_parent_index= parent_index;
1505 }else{
1506 parent_index= prev_parent_index;
791e7b83
MN
1507 }
1508 }
1509 }
7b49c309 1510 b->x[index++]= w+1; //end marker
3c1adccd 1511 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
791e7b83
MN
1512 STOP_TIMER("decode_subband")
1513 }
1514
1515 return;
1516 }
791e7b83
MN
1517}
1518
1519static void reset_contexts(SnowContext *s){
1520 int plane_index, level, orientation;
1521
19aa028d 1522 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1523 for(level=0; level<s->spatial_decomposition_count; level++){
1524 for(orientation=level ? 1:0; orientation<4; orientation++){
1525 memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
1526 }
1527 }
1528 }
791e7b83 1529 memset(s->header_state, 0, sizeof(s->header_state));
155ec6ed
MN
1530 memset(s->block_state, 0, sizeof(s->block_state));
1531}
1532
1533static int alloc_blocks(SnowContext *s){
1534 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1535 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1536
1537 s->b_width = w;
1538 s->b_height= h;
1539
1540 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1541 return 0;
1542}
1543
1544static inline void copy_cabac_state(CABACContext *d, CABACContext *s){
1545 PutBitContext bak= d->pb;
1546 *d= *s;
1547 d->pb= bak;
1548}
1549
1550//near copy & paste from dsputil, FIXME
1551static int pix_sum(uint8_t * pix, int line_size, int w)
1552{
1553 int s, i, j;
1554
1555 s = 0;
1556 for (i = 0; i < w; i++) {
1557 for (j = 0; j < w; j++) {
1558 s += pix[0];
1559 pix ++;
1560 }
1561 pix += line_size - w;
1562 }
1563 return s;
1564}
1565
1566//near copy & paste from dsputil, FIXME
1567static int pix_norm1(uint8_t * pix, int line_size, int w)
1568{
1569 int s, i, j;
1570 uint32_t *sq = squareTbl + 256;
1571
1572 s = 0;
1573 for (i = 0; i < w; i++) {
1574 for (j = 0; j < w; j ++) {
1575 s += sq[pix[0]];
1576 pix ++;
1577 }
1578 pix += line_size - w;
1579 }
1580 return s;
1581}
1582
1583static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1584 const int w= s->b_width << s->block_max_depth;
1585 const int rem_depth= s->block_max_depth - level;
1586 const int index= (x + y*w) << rem_depth;
1587 const int block_w= 1<<rem_depth;
1588 BlockNode block;
1589 int i,j;
1590
1591 block.color[0]= l;
1592 block.color[1]= cb;
1593 block.color[2]= cr;
1594 block.mx= mx;
1595 block.my= my;
1596 block.type= type;
1597 block.level= level;
1598
1599 for(j=0; j<block_w; j++){
1600 for(i=0; i<block_w; i++){
1601 s->block[index + i + j*w]= block;
1602 }
1603 }
1604}
1605
1606static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1607 const int offset[3]= {
1608 y*c-> stride + x,
1609 ((y*c->uvstride + x)>>1),
1610 ((y*c->uvstride + x)>>1),
1611 };
1612 int i;
1613 for(i=0; i<3; i++){
1614 c->src[0][i]= src [i];
1615 c->ref[0][i]= ref [i] + offset[i];
1616 }
1617 assert(!ref_index);
1618}
1619
1620//FIXME copy&paste
1621#define P_LEFT P[1]
1622#define P_TOP P[2]
1623#define P_TOPRIGHT P[3]
1624#define P_MEDIAN P[4]
1625#define P_MV1 P[9]
1626#define FLAG_QPEL 1 //must be 1
1627
1628static int encode_q_branch(SnowContext *s, int level, int x, int y){
1629 uint8_t p_buffer[1024];
1630 uint8_t i_buffer[1024];
1631 uint8_t p_state[sizeof(s->block_state)];
1632 uint8_t i_state[sizeof(s->block_state)];
1633 CABACContext pc, ic;
1634 PutBitContext pbbak= s->c.pb;
1635 int score, score2, iscore, i_len, p_len, block_s, sum;
1636 const int w= s->b_width << s->block_max_depth;
1637 const int h= s->b_height << s->block_max_depth;
1638 const int rem_depth= s->block_max_depth - level;
1639 const int index= (x + y*w) << rem_depth;
1640 const int block_w= 1<<(LOG2_MB_SIZE - level);
1641 static BlockNode null_block= { //FIXME add border maybe
1642 .color= {128,128,128},
1643 .mx= 0,
1644 .my= 0,
1645 .type= 0,
1646 .level= 0,
1647 };
1648 int trx= (x+1)<<rem_depth;
1649 int try= (y+1)<<rem_depth;
1650 BlockNode *left = x ? &s->block[index-1] : &null_block;
1651 BlockNode *top = y ? &s->block[index-w] : &null_block;
1652 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1653 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1654 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1655 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1656 int pl = left->color[0];
1657 int pcb= left->color[1];
1658 int pcr= left->color[2];
1659 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1660 int pmy= mid_pred(left->my, top->my, tr->my);
1661 int mx=0, my=0;
1662 int l,cr,cb, i;
1663 const int stride= s->current_picture.linesize[0];
1664 const int uvstride= s->current_picture.linesize[1];
1665 const int instride= s->input_picture.linesize[0];
1666 const int uvinstride= s->input_picture.linesize[1];
1667 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
1668 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1669 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1670 uint8_t current_mb[3][stride*block_w];
1671 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1672 int P[10][2];
1673 int16_t last_mv[3][2];
1674 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1675 const int shift= 1+qpel;
1676 MotionEstContext *c= &s->m.me;
1677 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1678 int my_context= av_log2(2*ABS(left->my - top->my));
1679 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1680
1681 assert(sizeof(s->block_state) >= 256);
1682 if(s->keyframe){
1683 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1684 return 0;
1685 }
1686
1687 //FIXME optimize
1688 for(i=0; i<block_w; i++)
1689 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
1690 for(i=0; i<block_w>>1; i++)
1691 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1692 for(i=0; i<block_w>>1; i++)
1693 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1694
1695// clip predictors / edge ?
1696
1697 P_LEFT[0]= left->mx;
1698 P_LEFT[1]= left->my;
1699 P_TOP [0]= top->mx;
1700 P_TOP [1]= top->my;
1701 P_TOPRIGHT[0]= tr->mx;
1702 P_TOPRIGHT[1]= tr->my;
1703
1704 last_mv[0][0]= s->block[index].mx;
1705 last_mv[0][1]= s->block[index].my;
1706 last_mv[1][0]= right->mx;
1707 last_mv[1][1]= right->my;
1708 last_mv[2][0]= bottom->mx;
1709 last_mv[2][1]= bottom->my;
1710
1711 s->m.mb_stride=2;
1712 s->m.mb_x=
1713 s->m.mb_y= 0;
1714 s->m.me.skip= 0;
1715
1716 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1717
1718 assert(s->m.me. stride == stride);
1719 assert(s->m.me.uvstride == uvstride);
1720
1721 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1722 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1723 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1724 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1725
ff158dc9
MN
1726 c->xmin = - x*block_w - 16+2;
1727 c->ymin = - y*block_w - 16+2;
1728 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1729 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
1730
1731 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1732 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1733 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1734 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1735 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1736 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1737 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1738
1739 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1740 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1741
1742 if (!y) {
1743 c->pred_x= P_LEFT[0];
1744 c->pred_y= P_LEFT[1];
1745 } else {
1746 c->pred_x = P_MEDIAN[0];
1747 c->pred_y = P_MEDIAN[1];
1748 }
1749
1750 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
1751 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1752
1753 assert(mx >= c->xmin);
1754 assert(mx <= c->xmax);
1755 assert(my >= c->ymin);
1756 assert(my <= c->ymax);
1757
1758 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1759 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1760 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1761
1762 // subpel search
1763 pc= s->c;
1764 init_put_bits(&pc.pb, p_buffer, sizeof(p_buffer));
1765 memcpy(p_state, s->block_state, sizeof(s->block_state));
1766
1767 if(level!=s->block_max_depth)
1768 put_cabac(&pc, &p_state[4 + s_context], 1);
1769 put_cabac(&pc, &p_state[1 + left->type + top->type], 0);
1770 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1771 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1772 p_len= put_bits_count(&pc.pb);
1773 score += (s->lambda2*(p_len + pc.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1774
1775 block_s= block_w*block_w;
1776 sum = pix_sum(&current_mb[0][0], stride, block_w);
1777 l= (sum + block_s/2)/block_s;
1778 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1779
1780 block_s= block_w*block_w>>2;
1781 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1782 cb= (sum + block_s/2)/block_s;
1783// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1784 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1785 cr= (sum + block_s/2)/block_s;
1786// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1787
1788 ic= s->c;
1789 init_put_bits(&ic.pb, i_buffer, sizeof(i_buffer));
1790 memcpy(i_state, s->block_state, sizeof(s->block_state));
1791 if(level!=s->block_max_depth)
1792 put_cabac(&ic, &i_state[4 + s_context], 1);
1793 put_cabac(&ic, &i_state[1 + left->type + top->type], 1);
1794 put_symbol(&ic, &i_state[32], l-pl , 1);
1795 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1796 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1797 i_len= put_bits_count(&ic.pb);
1798 iscore += (s->lambda2*(i_len + ic.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1799
1800// assert(score==256*256*256*64-1);
1801 assert(iscore < 255*255*256 + s->lambda2*10);
1802 assert(iscore >= 0);
1803 assert(l>=0 && l<=255);
1804 assert(pl>=0 && pl<=255);
1805
1806 if(level==0){
1807 int varc= iscore >> 8;
1808 int vard= score >> 8;
1809 if (vard <= 64 || vard < varc)
1810 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1811 else
1812 c->scene_change_score+= s->m.qscale;
1813 }
1814
1815 if(level!=s->block_max_depth){
1816 put_cabac(&s->c, &s->block_state[4 + s_context], 0);
1817 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1818 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1819 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1820 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1821 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1822
1823 if(score2 < score && score2 < iscore)
1824 return score2;
1825 }
1826
1827 if(iscore < score){
1828 flush_put_bits(&ic.pb);
1829 ff_copy_bits(&pbbak, i_buffer, i_len);
1830 s->c= ic;
1831 s->c.pb= pbbak;
1832 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1833 memcpy(s->block_state, i_state, sizeof(s->block_state));
1834 return iscore;
1835 }else{
1836 flush_put_bits(&pc.pb);
1837 ff_copy_bits(&pbbak, p_buffer, p_len);
1838 s->c= pc;
1839 s->c.pb= pbbak;
1840 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1841 memcpy(s->block_state, p_state, sizeof(s->block_state));
1842 return score;
1843 }
1844}
1845
1846static void decode_q_branch(SnowContext *s, int level, int x, int y){
1847 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
1848 const int rem_depth= s->block_max_depth - level;
1849 const int index= (x + y*w) << rem_depth;
1850 static BlockNode null_block= { //FIXME add border maybe
1851 .color= {128,128,128},
1852 .mx= 0,
1853 .my= 0,
1854 .type= 0,
1855 .level= 0,
1856 };
1857 int trx= (x+1)<<rem_depth;
155ec6ed
MN
1858 BlockNode *left = x ? &s->block[index-1] : &null_block;
1859 BlockNode *top = y ? &s->block[index-w] : &null_block;
1860 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1861 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1862 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1863
1864 if(s->keyframe){
1865 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1866 return;
1867 }
1868
1869 if(level==s->block_max_depth || get_cabac(&s->c, &s->block_state[4 + s_context])){
1870 int type;
1871 int l = left->color[0];
1872 int cb= left->color[1];
1873 int cr= left->color[2];
1874 int mx= mid_pred(left->mx, top->mx, tr->mx);
1875 int my= mid_pred(left->my, top->my, tr->my);
1876 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1877 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1878
1879 type= get_cabac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1880
1881 if(type){
1882 l += get_symbol(&s->c, &s->block_state[32], 1);
1883 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1884 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1885 }else{
1886 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1887 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1888 }
1889 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1890 }else{
1891 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1892 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1893 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1894 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1895 }
1896}
1897
1898static void encode_blocks(SnowContext *s){
1899 int x, y;
1900 int w= s->b_width;
1901 int h= s->b_height;
1902
1903 for(y=0; y<h; y++){
1904 for(x=0; x<w; x++){
1905 encode_q_branch(s, 0, x, y);
1906 }
1907 }
1908}
1909
1910static void decode_blocks(SnowContext *s){
1911 int x, y;
1912 int w= s->b_width;
1913 int h= s->b_height;
1914
1915 for(y=0; y<h; y++){
1916 for(x=0; x<w; x++){
1917 decode_q_branch(s, 0, x, y);
1918 }
1919 }
791e7b83
MN
1920}
1921
1922static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1923 int x, y;
3924dac4 1924START_TIMER
791e7b83
MN
1925 for(y=0; y < b_h+5; y++){
1926 for(x=0; x < b_w; x++){
3924dac4
MN
1927 int a0= src[x ];
1928 int a1= src[x + 1];
1929 int a2= src[x + 2];
1930 int a3= src[x + 3];
1931 int a4= src[x + 4];
1932 int a5= src[x + 5];
791e7b83
MN
1933// int am= 9*(a1+a2) - (a0+a3);
1934 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1935// int am= 18*(a2+a3) - 2*(a1+a4);
1936// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1937// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1938
1939// if(b_w==16) am= 8*(a1+a2);
1940
3924dac4
MN
1941 if(dx<8) tmp[x]= (32*a2*( 8-dx) + am* dx + 128)>>8;
1942 else tmp[x]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
791e7b83
MN
1943
1944/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
1945 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
1946 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
1947 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1948 }
3924dac4
MN
1949 tmp += stride;
1950 src += stride;
791e7b83 1951 }
3924dac4
MN
1952 tmp -= (b_h+5)*stride;
1953
791e7b83
MN
1954 for(y=0; y < b_h; y++){
1955 for(x=0; x < b_w; x++){
3924dac4
MN
1956 int a0= tmp[x + 0*stride];
1957 int a1= tmp[x + 1*stride];
1958 int a2= tmp[x + 2*stride];
1959 int a3= tmp[x + 3*stride];
1960 int a4= tmp[x + 4*stride];
1961 int a5= tmp[x + 5*stride];
791e7b83
MN
1962 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1963// int am= 18*(a2+a3) - 2*(a1+a4);
1964/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1965 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1966
1967// if(b_w==16) am= 8*(a1+a2);
1968
3924dac4
MN
1969 if(dy<8) dst[x]= (32*a2*( 8-dy) + am* dy + 128)>>8;
1970 else dst[x]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
791e7b83
MN
1971
1972/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
1973 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
1974 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
1975 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1976 }
3924dac4
MN
1977 dst += stride;
1978 tmp += stride;
791e7b83 1979 }
3924dac4 1980STOP_TIMER("mc_block")
791e7b83
MN
1981}
1982
791e7b83 1983#define mca(dx,dy,b_w)\
d92b5807 1984static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
791e7b83
MN
1985 uint8_t tmp[stride*(b_w+5)];\
1986 assert(h==b_w);\
1987 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1988}
1989
1990mca( 0, 0,16)
1991mca( 8, 0,16)
1992mca( 0, 8,16)
1993mca( 8, 8,16)
d92b5807
MN
1994mca( 0, 0,8)
1995mca( 8, 0,8)
1996mca( 0, 8,8)
1997mca( 8, 8,8)
791e7b83 1998
ff158dc9
MN
1999static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2000 if(block->type){
2001 int x, y;
2002 const int color= block->color[plane_index];
2003 for(y=0; y < b_h; y++){
2004 for(x=0; x < b_w; x++){
2005 dst[x + y*stride]= color;
2006 }
2007 }
2008 }else{
2009 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2010 int mx= block->mx*scale;
2011 int my= block->my*scale;
ec697587
MN
2012 const int dx= mx&15;
2013 const int dy= my&15;
ff158dc9
MN
2014 sx += (mx>>4) - 2;
2015 sy += (my>>4) - 2;
2016 src += sx + sy*stride;
2017 if( (unsigned)sx >= w - b_w - 4
2018 || (unsigned)sy >= h - b_h - 4){
2019 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2020 src= tmp + MB_SIZE;
2021 }
ec697587
MN
2022 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2023 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2024 else
2025 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
ff158dc9
MN
2026 }
2027}
2028
2029static always_inline int same_block(BlockNode *a, BlockNode *b){
2030 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2031}
2032
2033//FIXME name clenup (b_w, block_w, b_width stuff)
715a97f0 2034static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
ff158dc9
MN
2035 const int b_width = s->b_width << s->block_max_depth;
2036 const int b_height= s->b_height << s->block_max_depth;
2037 const int b_stride= b_width;
2038 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2039 BlockNode *rt= lt+1;
2040 BlockNode *lb= lt+b_stride;
2041 BlockNode *rb= lb+1;
2042 uint8_t *block[4];
2043 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
791e7b83
MN
2044 int x,y;
2045
ff158dc9
MN
2046 if(b_x<0){
2047 lt= rt;
2048 lb= rb;
2049 }else if(b_x + 1 >= b_width){
2050 rt= lt;
2051 rb= lb;
791e7b83 2052 }
ff158dc9
MN
2053 if(b_y<0){
2054 lt= lb;
2055 rt= rb;
2056 }else if(b_y + 1 >= b_height){
2057 lb= lt;
2058 rb= rt;
2059 }
2060
2061 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2062 obmc -= src_x;
2063 b_w += src_x;
2064 src_x=0;
2065 }else if(src_x + b_w > w){
2066 b_w = w - src_x;
2067 }
2068 if(src_y<0){
2069 obmc -= src_y*obmc_stride;
2070 b_h += src_y;
2071 src_y=0;
2072 }else if(src_y + b_h> h){
2073 b_h = h - src_y;
791e7b83 2074 }
620ab797 2075
ff158dc9 2076 if(b_w<=0 || b_h<=0) return;
155ec6ed 2077
ff158dc9
MN
2078assert(src_stride > 7*MB_SIZE);
2079 dst += src_x + src_y*dst_stride;
715a97f0 2080 dst8+= src_x + src_y*src_stride;
ff158dc9
MN
2081// src += src_x + src_y*src_stride;
2082
2083 block[0]= tmp+3*MB_SIZE;
2084 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2085
2086 if(same_block(lt, rt)){
2087 block[1]= block[0];
791e7b83 2088 }else{
ff158dc9
MN
2089 block[1]= tmp + 4*MB_SIZE;
2090 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2091 }
2092
2093 if(same_block(lt, lb)){
2094 block[2]= block[0];
2095 }else if(same_block(rt, lb)){
2096 block[2]= block[1];
2097 }else{
2098 block[2]= tmp+5*MB_SIZE;
2099 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2100 }
791e7b83 2101
ff158dc9
MN
2102 if(same_block(lt, rb) ){
2103 block[3]= block[0];
2104 }else if(same_block(rt, rb)){
2105 block[3]= block[1];
2106 }else if(same_block(lb, rb)){
2107 block[3]= block[2];
2108 }else{
2109 block[3]= tmp+6*MB_SIZE;
2110 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2111 }
2112#if 0
2113 for(y=0; y<b_h; y++){
2114 for(x=0; x<b_w; x++){
2115 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2116 if(add) dst[x + y*dst_stride] += v;
2117 else dst[x + y*dst_stride] -= v;
2118 }
2119 }
2120 for(y=0; y<b_h; y++){
2121 uint8_t *obmc2= obmc + (obmc_stride>>1);
2122 for(x=0; x<b_w; x++){
2123 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2124 if(add) dst[x + y*dst_stride] += v;
2125 else dst[x + y*dst_stride] -= v;
2126 }
2127 }
2128 for(y=0; y<b_h; y++){
2129 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2130 for(x=0; x<b_w; x++){
2131 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2132 if(add) dst[x + y*dst_stride] += v;
2133 else dst[x + y*dst_stride] -= v;
2134 }
2135 }
2136 for(y=0; y<b_h; y++){
2137 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2138 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2139 for(x=0; x<b_w; x++){
2140 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2141 if(add) dst[x + y*dst_stride] += v;
2142 else dst[x + y*dst_stride] -= v;
2143 }
2144 }
2145#else
2146 for(y=0; y<b_h; y++){
2147 //FIXME ugly missue of obmc_stride
2148 uint8_t *obmc1= obmc + y*obmc_stride;
2149 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2150 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2151 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2152 for(x=0; x<b_w; x++){
2153 int v= obmc1[x] * block[3][x + y*src_stride]
2154 +obmc2[x] * block[2][x + y*src_stride]
2155 +obmc3[x] * block[1][x + y*src_stride]
2156 +obmc4[x] * block[0][x + y*src_stride];
715a97f0
MN
2157
2158 v <<= 8 - LOG2_OBMC_MAX;
034aff03
MN
2159 if(FRAC_BITS != 8){
2160 v += 1<<(7 - FRAC_BITS);
2161 v >>= 8 - FRAC_BITS;
2162 }
715a97f0
MN
2163 if(add){
2164 v += dst[x + y*dst_stride];
2165 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2166 if(v&(~255)) v= ~(v>>31);
2167 dst8[x + y*src_stride] = v;
2168 }else{
2169 dst[x + y*dst_stride] -= v;
2170 }
791e7b83
MN
2171 }
2172 }
ff158dc9 2173#endif
791e7b83
MN
2174}
2175
ff158dc9 2176static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
791e7b83 2177 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2178 const int mb_w= s->b_width << s->block_max_depth;
2179 const int mb_h= s->b_height << s->block_max_depth;
791e7b83 2180 int x, y, mb_x, mb_y;
155ec6ed
MN
2181 int block_size = MB_SIZE >> s->block_max_depth;
2182 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2183 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
155ec6ed 2184 int obmc_stride= plane_index ? block_size : 2*block_size;
715a97f0 2185 int ref_stride= s->current_picture.linesize[plane_index];
791e7b83 2186 uint8_t *ref = s->last_picture.data[plane_index];
715a97f0 2187 uint8_t *dst8= s->current_picture.data[plane_index];
791e7b83
MN
2188 int w= p->width;
2189 int h= p->height;
fff6d4ea 2190 START_TIMER
791e7b83 2191
ff158dc9 2192 if(s->keyframe || (s->avctx->debug&512)){
715a97f0
MN
2193 if(add){
2194 for(y=0; y<h; y++){
2195 for(x=0; x<w; x++){
2196 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2197 v >>= FRAC_BITS;
2198 if(v&(~255)) v= ~(v>>31);
2199 dst8[x + y*ref_stride]= v;
2200 }
2201 }
2202 }else{
2203 for(y=0; y<h; y++){
2204 for(x=0; x<w; x++){
2205 buf[x + y*w]-= 128<<FRAC_BITS;
2206 }
ff158dc9 2207 }
791e7b83 2208 }
ff158dc9
MN
2209
2210 return;
791e7b83
MN
2211 }
2212
ff158dc9
MN
2213 for(mb_y=0; mb_y<=mb_h; mb_y++){
2214 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2215 START_TIMER
ff158dc9 2216
715a97f0 2217 add_yblock(s, buf, dst8, ref, obmc,
ff158dc9 2218 block_w*mb_x - block_w/2,
791e7b83 2219 block_w*mb_y - block_w/2,
ff158dc9 2220 block_w, block_w,
791e7b83 2221 w, h,
ff158dc9
MN
2222 w, ref_stride, obmc_stride,
2223 mb_x - 1, mb_y - 1,
2224 add, plane_index);
2225
2226 STOP_TIMER("add_yblock")
791e7b83
MN
2227 }
2228 }
fff6d4ea
MN
2229
2230 STOP_TIMER("predict_plane")
791e7b83
MN
2231}
2232
2233static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2234 const int level= b->level;
2235 const int w= b->width;
2236 const int h= b->height;
2237 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2238 const int qmul= qexp[qlog&7]<<(qlog>>3);
da66b631
MN
2239 int x,y, thres1, thres2;
2240 START_TIMER
791e7b83
MN
2241
2242 assert(QROOT==8);
2243
93fbdb5a
MN
2244 if(s->qlog == LOSSLESS_QLOG) return;
2245
791e7b83 2246 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
2247 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2248 thres2= 2*thres1;
791e7b83
MN
2249
2250 if(!bias){
2251 for(y=0; y<h; y++){
2252 for(x=0; x<w; x++){
da66b631
MN
2253 int i= src[x + y*stride];
2254
2255 if((unsigned)(i+thres1) > thres2){
2256 if(i>=0){
2257 i<<= QEXPSHIFT;
2258 i/= qmul; //FIXME optimize
2259 src[x + y*stride]= i;
2260 }else{
2261 i= -i;
2262 i<<= QEXPSHIFT;
2263 i/= qmul; //FIXME optimize
2264 src[x + y*stride]= -i;
2265 }
2266 }else
2267 src[x + y*stride]= 0;
791e7b83
MN
2268 }
2269 }
2270 }else{
2271 for(y=0; y<h; y++){
2272 for(x=0; x<w; x++){
2273 int i= src[x + y*stride];
2274
da66b631
MN
2275 if((unsigned)(i+thres1) > thres2){
2276 if(i>=0){
2277 i<<= QEXPSHIFT;
2278 i= (i + bias) / qmul; //FIXME optimize
2279 src[x + y*stride]= i;
2280 }else{
2281 i= -i;
2282 i<<= QEXPSHIFT;
2283 i= (i + bias) / qmul; //FIXME optimize
2284 src[x + y*stride]= -i;
2285 }
2286 }else
2287 src[x + y*stride]= 0;
791e7b83
MN
2288 }
2289 }
2290 }
da66b631
MN
2291 if(level+1 == s->spatial_decomposition_count){
2292// STOP_TIMER("quantize")
2293 }
791e7b83
MN
2294}
2295
2296static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
791e7b83
MN
2297 const int w= b->width;
2298 const int h= b->height;
2299 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2300 const int qmul= qexp[qlog&7]<<(qlog>>3);
2301 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2302 int x,y;
ea7d9cd4 2303 START_TIMER
791e7b83 2304
93fbdb5a
MN
2305 if(s->qlog == LOSSLESS_QLOG) return;
2306
791e7b83
MN
2307 assert(QROOT==8);
2308
2309 for(y=0; y<h; y++){
2310 for(x=0; x<w; x++){
2311 int i= src[x + y*stride];
2312 if(i<0){
2313 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2314 }else if(i>0){
2315 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
2316 }
2317 }
2318 }
ea7d9cd4
MN
2319 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2320 STOP_TIMER("dquant")
2321 }
791e7b83
MN
2322}
2323
2324static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2325 const int w= b->width;
2326 const int h= b->height;
2327 int x,y;
2328
2329 for(y=h-1; y>=0; y--){
2330 for(x=w-1; x>=0; x--){
2331 int i= x + y*stride;
2332
2333 if(x){
2334 if(use_median){
2335 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2336 else src[i] -= src[i - 1];
2337 }else{
2338 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2339 else src[i] -= src[i - 1];
2340 }
2341 }else{
2342 if(y) src[i] -= src[i - stride];
2343 }
2344 }
2345 }
2346}
2347
2348static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2349 const int w= b->width;
2350 const int h= b->height;
2351 int x,y;
2352
2353 for(y=0; y<h; y++){
2354 for(x=0; x<w; x++){
2355 int i= x + y*stride;
2356
2357 if(x){
2358 if(use_median){
2359 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2360 else src[i] += src[i - 1];
2361 }else{
2362 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2363 else src[i] += src[i - 1];
2364 }
2365 }else{
2366 if(y) src[i] += src[i - stride];
2367 }
2368 }
2369 }
2370}
2371
2372static void encode_header(SnowContext *s){
2373 int plane_index, level, orientation;
19aa028d 2374 uint8_t kstate[32]={0};
791e7b83 2375
19aa028d
MN
2376 put_cabac(&s->c, kstate, s->keyframe);
2377 if(s->keyframe || s->always_reset)
2378 reset_contexts(s);
791e7b83
MN
2379 if(s->keyframe){
2380 put_symbol(&s->c, s->header_state, s->version, 0);
19aa028d 2381 put_cabac(&s->c, s->header_state, s->always_reset);
791e7b83
MN
2382 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2383 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2384 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2385 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
791e7b83
MN
2386 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2387 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2388 put_cabac(&s->c, s->header_state, s->spatial_scalability);
2389// put_cabac(&s->c, s->header_state, s->rate_scalability);
2390
2391 for(plane_index=0; plane_index<2; plane_index++){
2392 for(level=0; level<s->spatial_decomposition_count; level++){
2393 for(orientation=level ? 1:0; orientation<4; orientation++){
2394 if(orientation==2) continue;
2395 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2396 }
2397 }
2398 }
2399 }
2400 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2401 put_symbol(&s->c, s->header_state, s->qlog, 1);
2402 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
2403 put_symbol(&s->c, s->header_state, s->qbias, 1);
155ec6ed 2404 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
791e7b83
MN
2405}
2406
2407static int decode_header(SnowContext *s){
2408 int plane_index, level, orientation;
19aa028d 2409 uint8_t kstate[32]={0};
791e7b83 2410
19aa028d
MN
2411 s->keyframe= get_cabac(&s->c, kstate);
2412 if(s->keyframe || s->always_reset)
2413 reset_contexts(s);
791e7b83
MN
2414 if(s->keyframe){
2415 s->version= get_symbol(&s->c, s->header_state, 0);
2416 if(s->version>0){
2417 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2418 return -1;
2419 }
19aa028d 2420 s->always_reset= get_cabac(&s->c, s->header_state);
791e7b83
MN
2421 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2422 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2423 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2424 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
2425 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2426 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2427 s->spatial_scalability= get_cabac(&s->c, s->header_state);
2428// s->rate_scalability= get_cabac(&s->c, s->header_state);
2429
2430 for(plane_index=0; plane_index<3; plane_index++){
2431 for(level=0; level<s->spatial_decomposition_count; level++){
2432 for(orientation=level ? 1:0; orientation<4; orientation++){
2433 int q;
2434 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2435 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2436 else q= get_symbol(&s->c, s->header_state, 1);
2437 s->plane[plane_index].band[level][orientation].qlog= q;
2438 }
2439 }
2440 }
2441 }
2442
2443 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2444 if(s->spatial_decomposition_type > 2){
2445 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2446 return -1;
2447 }
2448
2449 s->qlog= get_symbol(&s->c, s->header_state, 1);
2450 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2451 s->qbias= get_symbol(&s->c, s->header_state, 1);
155ec6ed 2452 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
2453
2454 return 0;
2455}
2456
2457static int common_init(AVCodecContext *avctx){
2458 SnowContext *s = avctx->priv_data;
2459 int width, height;
2460 int level, orientation, plane_index, dec;
2461
2462 s->avctx= avctx;
2463
2464 dsputil_init(&s->dsp, avctx);
2465
2466#define mcf(dx,dy)\
2467 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
2468 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
d92b5807
MN
2469 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
2470 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
2471 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
2472 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
791e7b83
MN
2473
2474 mcf( 0, 0)
2475 mcf( 4, 0)
2476 mcf( 8, 0)
2477 mcf(12, 0)
2478 mcf( 0, 4)
2479 mcf( 4, 4)
2480 mcf( 8, 4)
2481 mcf(12, 4)
2482 mcf( 0, 8)
2483 mcf( 4, 8)
2484 mcf( 8, 8)
2485 mcf(12, 8)
2486 mcf( 0,12)
2487 mcf( 4,12)
2488 mcf( 8,12)
2489 mcf(12,12)
2490
2491#define mcfh(dx,dy)\
2492 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
2493 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
d92b5807
MN
2494 mc_block_hpel ## dx ## dy ## 16;\
2495 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
2496 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
2497 mc_block_hpel ## dx ## dy ## 8;
791e7b83
MN
2498
2499 mcfh(0, 0)
2500 mcfh(8, 0)
2501 mcfh(0, 8)
2502 mcfh(8, 8)
2503
2504 dec= s->spatial_decomposition_count= 5;
2505 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2506
2507 s->chroma_h_shift= 1; //FIXME XXX
2508 s->chroma_v_shift= 1;
2509
2510// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2511
155ec6ed
MN
2512 width= s->avctx->width;
2513 height= s->avctx->height;
2514
2515 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
791e7b83
MN
2516
2517 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
155ec6ed 2518 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
791e7b83
MN
2519
2520 for(plane_index=0; plane_index<3; plane_index++){
2521 int w= s->avctx->width;
2522 int h= s->avctx->height;
2523
2524 if(plane_index){
2525 w>>= s->chroma_h_shift;
2526 h>>= s->chroma_v_shift;
2527 }
2528 s->plane[plane_index].width = w;
2529 s->plane[plane_index].height= h;
3bb9f096 2530//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
791e7b83
MN
2531 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2532 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2533 SubBand *b= &s->plane[plane_index].band[level][orientation];
2534
2535 b->buf= s->spatial_dwt_buffer;
2536 b->level= level;
2537 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2538 b->width = (w + !(orientation&1))>>1;
2539 b->height= (h + !(orientation>1))>>1;
2540
2541 if(orientation&1) b->buf += (w+1)>>1;
2542 if(orientation>1) b->buf += b->stride>>1;
2543
2544 if(level)
2545 b->parent= &s->plane[plane_index].band[level-1][orientation];
0cea8a03
MN
2546 b->x = av_mallocz(((b->width+1) * b->height+1)*sizeof(int16_t));
2547 b->coeff= av_mallocz(((b->width+1) * b->height+1)*sizeof(DWTELEM));
791e7b83
MN
2548 }
2549 w= (w+1)>>1;
2550 h= (h+1)>>1;
2551 }
2552 }
2553
791e7b83
MN
2554 reset_contexts(s);
2555/*
2556 width= s->width= avctx->width;
2557 height= s->height= avctx->height;
2558
2559 assert(width && height);
2560*/
2561 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2562
2563 return 0;
2564}
2565
2566
2567static void calculate_vissual_weight(SnowContext *s, Plane *p){
2568 int width = p->width;
2569 int height= p->height;
39c61bbb 2570 int level, orientation, x, y;
791e7b83
MN
2571
2572 for(level=0; level<s->spatial_decomposition_count; level++){
2573 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2574 SubBand *b= &p->band[level][orientation];
2575 DWTELEM *buf= b->buf;
2576 int64_t error=0;
2577
2578 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2579 buf[b->width/2 + b->height/2*b->stride]= 256*256;
46c281e8 2580 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
2581 for(y=0; y<height; y++){
2582 for(x=0; x<width; x++){
2583 int64_t d= s->spatial_dwt_buffer[x + y*width];
2584 error += d*d;
2585 }
2586 }
2587
2588 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
46c281e8 2589// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
791e7b83
MN
2590 }
2591 }
2592}
2593
2594static int encode_init(AVCodecContext *avctx)
2595{
2596 SnowContext *s = avctx->priv_data;
39c61bbb 2597 int plane_index;
791e7b83 2598
2ff9ff5b
MN
2599 if(avctx->strict_std_compliance >= 0){
2600 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2601 "use vstrict=-1 to use it anyway\n");
2602 return -1;
2603 }
2604
791e7b83 2605 common_init(avctx);
155ec6ed 2606 alloc_blocks(s);
791e7b83
MN
2607
2608 s->version=0;
2609
2610 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2611 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2612 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
791e7b83
MN
2613 h263_encode_init(&s->m); //mv_penalty
2614
2615 for(plane_index=0; plane_index<3; plane_index++){
2616 calculate_vissual_weight(s, &s->plane[plane_index]);
2617 }
2618
2619
2620 avctx->coded_frame= &s->current_picture;
2621 switch(avctx->pix_fmt){
2622// case PIX_FMT_YUV444P:
2623// case PIX_FMT_YUV422P:
2624 case PIX_FMT_YUV420P:
2625 case PIX_FMT_GRAY8:
2626// case PIX_FMT_YUV411P:
2627// case PIX_FMT_YUV410P:
2628 s->colorspace_type= 0;
2629 break;
2630/* case PIX_FMT_RGBA32:
2631 s->colorspace= 1;
2632 break;*/
2633 default:
2634 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2635 return -1;
2636 }
2637// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2638 s->chroma_h_shift= 1;
2639 s->chroma_v_shift= 1;
2640 return 0;
2641}
2642
2643static int frame_start(SnowContext *s){
2644 AVFrame tmp;
64886072
MN
2645 int w= s->avctx->width; //FIXME round up to x16 ?
2646 int h= s->avctx->height;
791e7b83 2647
64886072
MN
2648 if(s->current_picture.data[0]){
2649 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
2650 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2651 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2652 }
2653
791e7b83
MN
2654 tmp= s->last_picture;
2655 s->last_picture= s->current_picture;
2656 s->current_picture= tmp;
2657
2658 s->current_picture.reference= 1;
2659 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2660 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2661 return -1;
2662 }
2663
2664 return 0;
2665}
2666
2667static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2668 SnowContext *s = avctx->priv_data;
2669 CABACContext * const c= &s->c;
2670 AVFrame *pict = data;
2671 const int width= s->avctx->width;
2672 const int height= s->avctx->height;
39c61bbb 2673 int level, orientation, plane_index;
791e7b83 2674
791e7b83
MN
2675 ff_init_cabac_encoder(c, buf, buf_size);
2676 ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2677
2678 s->input_picture = *pict;
2679
791e7b83
MN
2680 s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2681 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2682
93fbdb5a
MN
2683 if(pict->quality){
2684 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2685 //<64 >60
2686 s->qlog += 61;
2687 }else{
2688 s->qlog= LOSSLESS_QLOG;
2689 }
791e7b83 2690
791e7b83 2691 frame_start(s);
19aa028d 2692 s->current_picture.key_frame= s->keyframe;
791e7b83
MN
2693
2694 if(pict->pict_type == P_TYPE){
2695 int block_width = (width +15)>>4;
2696 int block_height= (height+15)>>4;
2697 int stride= s->current_picture.linesize[0];
791e7b83
MN
2698
2699 assert(s->current_picture.data[0]);
2700 assert(s->last_picture.data[0]);
2701
2702 s->m.avctx= s->avctx;
2703 s->m.current_picture.data[0]= s->current_picture.data[0];
2704 s->m. last_picture.data[0]= s-> last_picture.data[0];
2705 s->m. new_picture.data[0]= s-> input_picture.data[0];
2706 s->m.current_picture_ptr= &s->m.current_picture;
2707 s->m. last_picture_ptr= &s->m. last_picture;
2708 s->m.linesize=
2709 s->m. last_picture.linesize[0]=
2710 s->m. new_picture.linesize[0]=
2711 s->m.current_picture.linesize[0]= stride;
155ec6ed 2712 s->m.uvlinesize= s->current_picture.linesize[1];
791e7b83
MN
2713 s->m.width = width;
2714 s->m.height= height;
2715 s->m.mb_width = block_width;
2716 s->m.mb_height= block_height;
2717 s->m.mb_stride= s->m.mb_width+1;
2718 s->m.b8_stride= 2*s->m.mb_width+1;
2719 s->m.f_code=1;
2720 s->m.pict_type= pict->pict_type;
2721 s->m.me_method= s->avctx->me_method;
2722 s->m.me.scene_change_score=0;
2723 s->m.flags= s->avctx->flags;
2724 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2725 s->m.out_format= FMT_H263;
2726 s->m.unrestricted_mv= 1;
2727
155ec6ed 2728 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
791e7b83 2729 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
155ec6ed 2730 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
791e7b83 2731
791e7b83
MN
2732 s->m.dsp= s->dsp; //move
2733 ff_init_me(&s->m);
791e7b83 2734 }
791e7b83 2735
155ec6ed
MN
2736redo_frame:
2737
791e7b83
MN
2738 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2739
2740 encode_header(s);
155ec6ed
MN
2741 encode_blocks(s);
2742
791e7b83
MN
2743 for(plane_index=0; plane_index<3; plane_index++){
2744 Plane *p= &s->plane[plane_index];
2745 int w= p->width;
2746 int h= p->height;
2747 int x, y;
39c61bbb 2748// int bits= put_bits_count(&s->c.pb);
791e7b83
MN
2749
2750 //FIXME optimize
791e7b83
MN
2751 if(pict->data[plane_index]) //FIXME gray hack
2752 for(y=0; y<h; y++){
2753 for(x=0; x<w; x++){
034aff03 2754 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
791e7b83
MN
2755 }
2756 }
2757 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
155ec6ed
MN
2758
2759 if( plane_index==0
2760 && pict->pict_type == P_TYPE
2761 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
2762 ff_init_cabac_encoder(c, buf, buf_size);
2763 ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2764 pict->pict_type= FF_I_TYPE;
2765 s->keyframe=1;
2766 reset_contexts(s);
2767 goto redo_frame;
2768 }
2769
93fbdb5a
MN
2770 if(s->qlog == LOSSLESS_QLOG){
2771 for(y=0; y<h; y++){
2772 for(x=0; x<w; x++){
034aff03 2773 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1)))>>FRAC_BITS;
93fbdb5a
MN
2774 }
2775 }
2776 }
791e7b83 2777
46c281e8 2778 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a 2779
791e7b83
MN
2780 for(level=0; level<s->spatial_decomposition_count; level++){
2781 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2782 SubBand *b= &p->band[level][orientation];
2783
2784 quantize(s, b, b->buf, b->stride, s->qbias);
2785 if(orientation==0)
2786 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2787 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2788 assert(b->parent==NULL || b->parent->stride == b->stride*2);
2789 if(orientation==0)
2790 correlate(s, b, b->buf, b->stride, 1, 0);
2791 }
2792 }
2793// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2794
2795 for(level=0; level<s->spatial_decomposition_count; level++){
2796 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2797 SubBand *b= &p->band[level][orientation];
2798
2799 dequantize(s, b, b->buf, b->stride);
2800 }
2801 }
93fbdb5a 2802
46c281e8 2803 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a
MN
2804 if(s->qlog == LOSSLESS_QLOG){
2805 for(y=0; y<h; y++){
2806 for(x=0; x<w; x++){
034aff03 2807 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
93fbdb5a
MN
2808 }
2809 }
2810 }
715a97f0 2811{START_TIMER
791e7b83 2812 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
715a97f0 2813STOP_TIMER("pred-conv")}
791e7b83
MN
2814 if(s->avctx->flags&CODEC_FLAG_PSNR){
2815 int64_t error= 0;
2816
2817 if(pict->data[plane_index]) //FIXME gray hack
2818 for(y=0; y<h; y++){
2819 for(x=0; x<w; x++){
93fbdb5a 2820 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
791e7b83
MN
2821 error += d*d;
2822 }
2823 }
791e7b83 2824 s->avctx->error[plane_index] += error;
bd368b56 2825 s->current_picture.error[plane_index] = error;
791e7b83
MN
2826 }
2827 }
2828
2829 if(s->last_picture.data[0])
2830 avctx->release_buffer(avctx, &s->last_picture);
2831
2832 emms_c();
2833
2834 return put_cabac_terminate(c, 1);
2835}
2836
2837static void common_end(SnowContext *s){
7b49c309
MN
2838 int plane_index, level, orientation;
2839
791e7b83 2840 av_freep(&s->spatial_dwt_buffer);
791e7b83
MN
2841
2842 av_freep(&s->m.me.scratchpad);
2843 av_freep(&s->m.me.map);
2844 av_freep(&s->m.me.score_map);
155ec6ed
MN
2845
2846 av_freep(&s->block);
7b49c309
MN
2847
2848 for(plane_index=0; plane_index<3; plane_index++){
2849 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2850 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2851 SubBand *b= &s->plane[plane_index].band[level][orientation];
2852
2853 av_freep(&b->x);
0cea8a03 2854 av_freep(&b->coeff);
7b49c309
MN
2855 }
2856 }
2857 }
791e7b83
MN
2858}
2859
2860static int encode_end(AVCodecContext *avctx)
2861{
2862 SnowContext *s = avctx->priv_data;
2863
2864 common_end(s);
2865
2866 return 0;
2867}
2868
2869static int decode_init(AVCodecContext *avctx)
2870{
2871// SnowContext *s = avctx->priv_data;
2872
2873 common_init(avctx);
2874
2875 return 0;
2876}
2877
2878static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2879 SnowContext *s = avctx->priv_data;
2880 CABACContext * const c= &s->c;
791e7b83
MN
2881 int bytes_read;
2882 AVFrame *picture = data;
39c61bbb 2883 int level, orientation, plane_index;
791e7b83
MN
2884
2885
2886 /* no supplementary picture */
2887 if (buf_size == 0)
2888 return 0;
2889
2890 ff_init_cabac_decoder(c, buf, buf_size);
2891 ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2892
791e7b83
MN
2893 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2894 decode_header(s);
155ec6ed 2895 if(!s->block) alloc_blocks(s);
791e7b83
MN
2896
2897 frame_start(s);
2898 //keyframe flag dupliaction mess FIXME
2899 if(avctx->debug&FF_DEBUG_PICT_INFO)
2900 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2901
155ec6ed 2902 decode_blocks(s);
791e7b83
MN
2903
2904 for(plane_index=0; plane_index<3; plane_index++){
2905 Plane *p= &s->plane[plane_index];
2906 int w= p->width;
2907 int h= p->height;
2908 int x, y;
2909
2910if(s->avctx->debug&2048){
2911 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2912 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2913
2914 for(y=0; y<h; y++){
2915 for(x=0; x<w; x++){
715a97f0 2916 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
791e7b83
MN
2917 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2918 }
2919 }
2920}
2921 for(level=0; level<s->spatial_decomposition_count; level++){
2922 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2923 SubBand *b= &p->band[level][orientation];
2924
2925 decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
ea7d9cd4 2926 if(orientation==0){
791e7b83 2927 correlate(s, b, b->buf, b->stride, 1, 0);
ea7d9cd4
MN
2928 dequantize(s, b, b->buf, b->stride);
2929 assert(b->buf == s->spatial_dwt_buffer);
2930 }
791e7b83
MN
2931 }
2932 }
2933
46c281e8 2934 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a
MN
2935 if(s->qlog == LOSSLESS_QLOG){
2936 for(y=0; y<h; y++){
2937 for(x=0; x<w; x++){
034aff03 2938 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
93fbdb5a
MN
2939 }
2940 }
2941 }
715a97f0 2942{START_TIMER
791e7b83 2943 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
715a97f0 2944STOP_TIMER("predict_plane conv2")}
791e7b83
MN
2945 }
2946
2947 emms_c();
2948
2949 if(s->last_picture.data[0])
2950 avctx->release_buffer(avctx, &s->last_picture);
2951
2952if(!(s->avctx->debug&2048))
2953 *picture= s->current_picture;
2954else
2955 *picture= s->mconly_picture;
2956
2957 *data_size = sizeof(AVFrame);
2958
2959 bytes_read= get_cabac_terminate(c);
2960 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
2961
2962 return bytes_read;
2963}
2964
2965static int decode_end(AVCodecContext *avctx)
2966{
2967 SnowContext *s = avctx->priv_data;
2968
2969 common_end(s);
2970
2971 return 0;
2972}
2973
2974AVCodec snow_decoder = {
2975 "snow",
2976 CODEC_TYPE_VIDEO,
2977 CODEC_ID_SNOW,
2978 sizeof(SnowContext),
2979 decode_init,
2980 NULL,
2981 decode_end,
2982 decode_frame,
2983 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2984 NULL
2985};
2986
2987AVCodec snow_encoder = {
2988 "snow",
2989 CODEC_TYPE_VIDEO,
2990 CODEC_ID_SNOW,
2991 sizeof(SnowContext),
2992 encode_init,
2993 encode_frame,
2994 encode_end,
2995};
2996
2997
2998#if 0
2999#undef malloc
3000#undef free
3001#undef printf
3002
3003int main(){
3004 int width=256;
3005 int height=256;
3006 int buffer[2][width*height];
3007 SnowContext s;
3008 int i;
3009 s.spatial_decomposition_count=6;
3010 s.spatial_decomposition_type=1;
3011
3012 printf("testing 5/3 DWT\n");
3013 for(i=0; i<width*height; i++)
3014 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3015
46c281e8
MN
3016 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3017 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3018
3019 for(i=0; i<width*height; i++)
3020 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3021
3022 printf("testing 9/7 DWT\n");
3023 s.spatial_decomposition_type=0;
3024 for(i=0; i<width*height; i++)
3025 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3026
46c281e8
MN
3027 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3028 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3029
3030 for(i=0; i<width*height; i++)
3031 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3032
3033 printf("testing AC coder\n");
3034 memset(s.header_state, 0, sizeof(s.header_state));
3035 ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
3036 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3037
3038 for(i=-256; i<256; i++){
3039START_TIMER
3040 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3041STOP_TIMER("put_symbol")
3042 }
3043 put_cabac_terminate(&s.c, 1);
3044
3045 memset(s.header_state, 0, sizeof(s.header_state));
3046 ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
3047 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3048
3049 for(i=-256; i<256; i++){
3050 int j;
3051START_TIMER
3052 j= get_symbol(&s.c, s.header_state, 1);
3053STOP_TIMER("get_symbol")
3054 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3055 }
3056{
3057int level, orientation, x, y;
3058int64_t errors[8][4];
3059int64_t g=0;
3060
3061 memset(errors, 0, sizeof(errors));
3062 s.spatial_decomposition_count=3;
3063 s.spatial_decomposition_type=0;
3064 for(level=0; level<s.spatial_decomposition_count; level++){
3065 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3066 int w= width >> (s.spatial_decomposition_count-level);
3067 int h= height >> (s.spatial_decomposition_count-level);
3068 int stride= width << (s.spatial_decomposition_count-level);
3069 DWTELEM *buf= buffer[0];
3070 int64_t error=0;
3071
3072 if(orientation&1) buf+=w;
3073 if(orientation>1) buf+=stride>>1;
3074
3075 memset(buffer[0], 0, sizeof(int)*width*height);
3076 buf[w/2 + h/2*stride]= 256*256;
46c281e8 3077 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3078 for(y=0; y<height; y++){
3079 for(x=0; x<width; x++){
3080 int64_t d= buffer[0][x + y*width];
3081 error += d*d;
3082 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3083 }
3084 if(ABS(height/2-y)<9 && level==2) printf("\n");
3085 }
3086 error= (int)(sqrt(error)+0.5);
3087 errors[level][orientation]= error;
3088 if(g) g=ff_gcd(g, error);
3089 else g= error;
3090 }
3091 }
3092 printf("static int const visual_weight[][4]={\n");
3093 for(level=0; level<s.spatial_decomposition_count; level++){
3094 printf(" {");
3095 for(orientation=0; orientation<4; orientation++){
3096 printf("%8lld,", errors[level][orientation]/g);
3097 }
3098 printf("},\n");
3099 }
3100 printf("};\n");
3101 {
3102 int level=2;
3103 int orientation=3;
3104 int w= width >> (s.spatial_decomposition_count-level);
3105 int h= height >> (s.spatial_decomposition_count-level);
3106 int stride= width << (s.spatial_decomposition_count-level);
3107 DWTELEM *buf= buffer[0];
3108 int64_t error=0;
3109
3110 buf+=w;
3111 buf+=stride>>1;
3112
3113 memset(buffer[0], 0, sizeof(int)*width*height);
3114#if 1
3115 for(y=0; y<height; y++){
3116 for(x=0; x<width; x++){
3117 int tab[4]={0,2,3,1};
3118 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3119 }
3120 }
46c281e8 3121 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3122#else
3123 for(y=0; y<h; y++){
3124 for(x=0; x<w; x++){
3125 buf[x + y*stride ]=169;
3126 buf[x + y*stride-w]=64;
3127 }
3128 }
46c281e8 3129 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3130#endif
3131 for(y=0; y<height; y++){
3132 for(x=0; x<width; x++){
3133 int64_t d= buffer[0][x + y*width];
3134 error += d*d;
3135 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3136 }
3137 if(ABS(height/2-y)<9) printf("\n");
3138 }
3139 }
3140
3141}
3142 return 0;
3143}
3144#endif
3145