20% faster predict_plane()
[libav.git] / libavcodec / snow.c
CommitLineData
791e7b83
MN
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
22#include "cabac.h"
23
24#include "mpegvideo.h"
25
26#undef NDEBUG
27#include <assert.h>
28
29#define MAX_DECOMPOSITIONS 8
30#define MAX_PLANES 4
31#define DWTELEM int
32#define QROOT 8
93fbdb5a 33#define LOSSLESS_QLOG -128
791e7b83
MN
34
35static const int8_t quant3[256]={
36 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52};
53static const int8_t quant3b[256]={
54 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70};
71static const int8_t quant5[256]={
72 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
88};
89static const int8_t quant7[256]={
90 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
93 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
104-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
106};
107static const int8_t quant9[256]={
108 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
123-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
124};
125static const int8_t quant11[256]={
126 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
140-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
142};
143static const int8_t quant13[256]={
144 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
145 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
157-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
160};
161
162#define OBMC_MAX 64
163#if 0 //64*cubic
164static const uint8_t obmc32[1024]={
165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
167 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
168 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
169 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
170 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
171 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
172 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
173 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
174 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
175 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
176 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
177 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
178 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
179 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
180 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
182 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
183 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
184 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
185 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
186 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
190 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
191 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
194 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
195 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197//error:0.000022
198};
199static const uint8_t obmc16[256]={
200 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
201 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
202 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
203 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
204 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
205 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
206 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
207 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
209 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
210 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
211 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
212 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
213 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216//error:0.000033
217};
218#elif 1 // 64*linear
219static const uint8_t obmc32[1024]={
220 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
221 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
222 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
223 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
224 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
225 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
226 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
227 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
228 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
229 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
230 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
231 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
232 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
233 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
234 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
235 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
237 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
238 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
239 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
240 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
241 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
242 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
243 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
244 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
245 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
246 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
249 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
250 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
251 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
252 //error:0.000020
253};
254static const uint8_t obmc16[256]={
255 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
256 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
257 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
258 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
259 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
260 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
261 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
262 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
264 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
265 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
266 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
267 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
268 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
269 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
270 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
271//error:0.000015
272};
273#else //64*cos
274static const uint8_t obmc32[1024]={
275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
277 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
278 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
279 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
280 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
281 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
282 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
283 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
284 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
285 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
286 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
287 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
288 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
289 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
290 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
292 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
293 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
294 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
295 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
296 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
300 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
301 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
304 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
305 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
306 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307//error:0.000022
308};
309static const uint8_t obmc16[256]={
310 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
311 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
312 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
313 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
314 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
315 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
316 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
317 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
319 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
320 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
321 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
322 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
323 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326//error:0.000022
327};
328#endif
329
155ec6ed
MN
330//linear *64
331static const uint8_t obmc8[64]={
332 1, 3, 5, 7, 7, 5, 3, 1,
333 3, 9,15,21,21,15, 9, 3,
334 5,15,25,35,35,25,15, 5,
335 7,21,35,49,49,35,21, 7,
336 7,21,35,49,49,35,21, 7,
337 5,15,25,35,35,25,15, 5,
338 3, 9,15,21,21,15, 9, 3,
339 1, 3, 5, 7, 7, 5, 3, 1,
340//error:0.000000
341};
342
343//linear *64
344static const uint8_t obmc4[16]={
345 4,12,12, 4,
34612,36,36,12,
34712,36,36,12,
348 4,12,12, 4,
349//error:0.000000
350};
351
352static const uint8_t *obmc_tab[4]={
353 obmc32, obmc16, obmc8, obmc4
354};
355
356typedef struct BlockNode{
357 int16_t mx;
358 int16_t my;
359 uint8_t color[3];
360 uint8_t type;
361//#define TYPE_SPLIT 1
362#define BLOCK_INTRA 1
363//#define TYPE_NOCOLOR 4
364 uint8_t level; //FIXME merge into type?
365}BlockNode;
366
367#define LOG2_MB_SIZE 4
368#define MB_SIZE (1<<LOG2_MB_SIZE)
369
791e7b83
MN
370typedef struct SubBand{
371 int level;
372 int stride;
373 int width;
374 int height;
375 int qlog; ///< log(qscale)/log[2^(1/6)]
376 DWTELEM *buf;
7b49c309 377 int16_t *x;
0cea8a03 378 DWTELEM *coeff;
791e7b83
MN
379 struct SubBand *parent;
380 uint8_t state[/*7*2*/ 7 + 512][32];
381}SubBand;
382
383typedef struct Plane{
384 int width;
385 int height;
386 SubBand band[MAX_DECOMPOSITIONS][4];
387}Plane;
388
389typedef struct SnowContext{
390// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
391
392 AVCodecContext *avctx;
393 CABACContext c;
394 DSPContext dsp;
395 AVFrame input_picture;
396 AVFrame current_picture;
397 AVFrame last_picture;
398 AVFrame mconly_picture;
399// uint8_t q_context[16];
400 uint8_t header_state[32];
155ec6ed 401 uint8_t block_state[128 + 32*128];
791e7b83 402 int keyframe;
19aa028d 403 int always_reset;
791e7b83
MN
404 int version;
405 int spatial_decomposition_type;
406 int temporal_decomposition_type;
407 int spatial_decomposition_count;
408 int temporal_decomposition_count;
409 DWTELEM *spatial_dwt_buffer;
410 DWTELEM *pred_buffer;
411 int colorspace_type;
412 int chroma_h_shift;
413 int chroma_v_shift;
414 int spatial_scalability;
415 int qlog;
155ec6ed
MN
416 int lambda;
417 int lambda2;
791e7b83
MN
418 int mv_scale;
419 int qbias;
420#define QBIAS_SHIFT 3
155ec6ed
MN
421 int b_width;
422 int b_height;
423 int block_max_depth;
791e7b83 424 Plane plane[MAX_PLANES];
155ec6ed
MN
425 BlockNode *block;
426
791e7b83
MN
427 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
428}SnowContext;
429
430#define QEXPSHIFT 7 //FIXME try to change this to 0
431static const uint8_t qexp[8]={
432 128, 140, 152, 166, 181, 197, 215, 235
433// 64, 70, 76, 83, 91, 99, 108, 117
434// 32, 35, 38, 41, 45, 49, 54, 59
435// 16, 17, 19, 21, 23, 25, 27, 29
436// 8, 9, 10, 10, 11, 12, 13, 15
437};
438
439static inline int mirror(int v, int m){
440 if (v<0) return -v;
441 else if(v>m) return 2*m-v;
442 else return v;
443}
444
445static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
446 int i;
447
448 if(v){
449 const int a= ABS(v);
450 const int e= av_log2(a);
451#if 1
452 const int el= FFMIN(e, 10);
453 put_cabac(c, state+0, 0);
454
455 for(i=0; i<el; i++){
456 put_cabac(c, state+1+i, 1); //1..10
457 }
458 for(; i<e; i++){
459 put_cabac(c, state+1+9, 1); //1..10
460 }
461 put_cabac(c, state+1+FFMIN(i,9), 0);
462
463 for(i=e-1; i>=el; i--){
464 put_cabac(c, state+22+9, (a>>i)&1); //22..31
465 }
466 for(; i>=0; i--){
467 put_cabac(c, state+22+i, (a>>i)&1); //22..31
468 }
469
470 if(is_signed)
471 put_cabac(c, state+11 + el, v < 0); //11..21
472#else
473
474 put_cabac(c, state+0, 0);
475 if(e<=9){
476 for(i=0; i<e; i++){
477 put_cabac(c, state+1+i, 1); //1..10
478 }
479 put_cabac(c, state+1+i, 0);
480
481 for(i=e-1; i>=0; i--){
482 put_cabac(c, state+22+i, (a>>i)&1); //22..31
483 }
484
485 if(is_signed)
486 put_cabac(c, state+11 + e, v < 0); //11..21
487 }else{
488 for(i=0; i<e; i++){
489 put_cabac(c, state+1+FFMIN(i,9), 1); //1..10
490 }
491 put_cabac(c, state+1+FFMIN(i,9), 0);
492
493 for(i=e-1; i>=0; i--){
494 put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
495 }
496
497 if(is_signed)
498 put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
499 }
500#endif
501 }else{
502 put_cabac(c, state+0, 1);
503 }
504}
505
506static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
507 if(get_cabac(c, state+0))
508 return 0;
509 else{
510 int i, e, a, el;
511 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
512 for(e=0; e<10; e++){
513 if(get_cabac(c, state + 1 + e)==0) // 1..10
514 break;
515 }
516 el= e;
517
518 if(e==10){
519 while(get_cabac(c, state + 1 + 9)) //10
520 e++;
521 }
522 a= 1;
523 for(i=e-1; i>=el; i--){
524 a += a + get_cabac(c, state+22+9); //31
525 }
526 for(; i>=0; i--){
527 a += a + get_cabac(c, state+22+i); //22..31
528 }
529
530 if(is_signed && get_cabac(c, state+11 + el)) //11..21
531 return -a;
532 else
533 return a;
534 }
535}
536
4f4e9633
MN
537static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
538 int i;
0635cbfc 539 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
540
541 assert(v>=0);
0635cbfc
MN
542 assert(log2>=-4);
543
544 while(v >= r){
545 put_cabac(c, state+4+log2, 1);
546 v -= r;
4f4e9633 547 log2++;
0635cbfc 548 if(log2>0) r+=r;
4f4e9633 549 }
0635cbfc 550 put_cabac(c, state+4+log2, 0);
4f4e9633
MN
551
552 for(i=log2-1; i>=0; i--){
553 put_cabac(c, state+31-i, (v>>i)&1);
554 }
4f4e9633
MN
555}
556
557static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
558 int i;
0635cbfc 559 int r= log2>=0 ? 1<<log2 : 1;
4f4e9633
MN
560 int v=0;
561
0635cbfc
MN
562 assert(log2>=-4);
563
564 while(get_cabac(c, state+4+log2)){
565 v+= r;
4f4e9633 566 log2++;
0635cbfc 567 if(log2>0) r+=r;
4f4e9633
MN
568 }
569
570 for(i=log2-1; i>=0; i--){
571 v+= get_cabac(c, state+31-i)<<i;
572 }
573
574 return v;
575}
576
791e7b83
MN
577static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
578 const int mirror_left= !highpass;
579 const int mirror_right= (width&1) ^ highpass;
580 const int w= (width>>1) - 1 + (highpass & width);
581 int i;
582
583#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
584 if(mirror_left){
585 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
586 dst += dst_step;
587 src += src_step;
588 }
589
590 for(i=0; i<w; i++){
591 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
592 }
593
594 if(mirror_right){
595 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
596 }
597}
598
599static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
600 const int mirror_left= !highpass;
601 const int mirror_right= (width&1) ^ highpass;
602 const int w= (width>>1) - 1 + (highpass & width);
603 int i;
604
605 if(mirror_left){
606 int r= 3*2*ref[0];
607 r += r>>4;
608 r += r>>8;
609 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
610 dst += dst_step;
611 src += src_step;
612 }
613
614 for(i=0; i<w; i++){
615 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
616 r += r>>4;
617 r += r>>8;
618 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
619 }
620
621 if(mirror_right){
622 int r= 3*2*ref[w*ref_step];
623 r += r>>4;
624 r += r>>8;
625 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
626 }
627}
628
629
630static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
631 int x, i;
632
633 for(x=start; x<width; x+=2){
634 int64_t sum=0;
635
636 for(i=0; i<n; i++){
637 int x2= x + 2*i - n + 1;
638 if (x2< 0) x2= -x2;
639 else if(x2>=width) x2= 2*width-x2-2;
640 sum += coeffs[i]*(int64_t)dst[x2];
641 }
642 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
643 else dst[x] += (sum + (1<<shift)/2)>>shift;
644 }
645}
646
647static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
648 int x, y, i;
649 for(y=start; y<height; y+=2){
650 for(x=0; x<width; x++){
651 int64_t sum=0;
652
653 for(i=0; i<n; i++){
654 int y2= y + 2*i - n + 1;
655 if (y2< 0) y2= -y2;
656 else if(y2>=height) y2= 2*height-y2-2;
657 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
658 }
659 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
660 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
661 }
662 }
663}
664
665#define SCALEX 1
666#define LX0 0
667#define LX1 1
668
de890c9b 669#if 0 // more accurate 9/7
791e7b83
MN
670#define N1 2
671#define SHIFT1 14
672#define COEFFS1 (int[]){-25987,-25987}
673#define N2 2
674#define SHIFT2 19
675#define COEFFS2 (int[]){-27777,-27777}
676#define N3 2
677#define SHIFT3 15
678#define COEFFS3 (int[]){28931,28931}
679#define N4 2
680#define SHIFT4 15
681#define COEFFS4 (int[]){14533,14533}
682#elif 1 // 13/7 CRF
683#define N1 4
684#define SHIFT1 4
685#define COEFFS1 (int[]){1,-9,-9,1}
de890c9b 686#define N2 4
791e7b83
MN
687#define SHIFT2 4
688#define COEFFS2 (int[]){-1,5,5,-1}
689#define N3 0
690#define SHIFT3 1
691#define COEFFS3 NULL
692#define N4 0
693#define SHIFT4 1
694#define COEFFS4 NULL
695#elif 1 // 3/5
696#define LX0 1
697#define LX1 0
698#define SCALEX 0.5
699#define N1 2
700#define SHIFT1 1
701#define COEFFS1 (int[]){1,1}
702#define N2 2
703#define SHIFT2 2
704#define COEFFS2 (int[]){-1,-1}
705#define N3 0
706#define SHIFT3 0
707#define COEFFS3 NULL
708#define N4 0
709#define SHIFT4 0
710#define COEFFS4 NULL
711#elif 1 // 11/5
712#define N1 0
713#define SHIFT1 1
714#define COEFFS1 NULL
715#define N2 2
716#define SHIFT2 2
717#define COEFFS2 (int[]){-1,-1}
718#define N3 2
719#define SHIFT3 0
720#define COEFFS3 (int[]){-1,-1}
721#define N4 4
722#define SHIFT4 7
723#define COEFFS4 (int[]){-5,29,29,-5}
724#define SCALEX 4
725#elif 1 // 9/7 CDF
726#define N1 2
727#define SHIFT1 7
728#define COEFFS1 (int[]){-203,-203}
729#define N2 2
730#define SHIFT2 12
731#define COEFFS2 (int[]){-217,-217}
732#define N3 2
733#define SHIFT3 7
734#define COEFFS3 (int[]){113,113}
735#define N4 2
736#define SHIFT4 9
737#define COEFFS4 (int[]){227,227}
738#define SCALEX 1
739#elif 1 // 7/5 CDF
740#define N1 0
741#define SHIFT1 1
742#define COEFFS1 NULL
743#define N2 2
744#define SHIFT2 2
745#define COEFFS2 (int[]){-1,-1}
746#define N3 2
747#define SHIFT3 0
748#define COEFFS3 (int[]){-1,-1}
749#define N4 2
750#define SHIFT4 4
751#define COEFFS4 (int[]){3,3}
752#elif 1 // 9/7 MN
753#define N1 4
754#define SHIFT1 4
755#define COEFFS1 (int[]){1,-9,-9,1}
756#define N2 2
757#define SHIFT2 2
758#define COEFFS2 (int[]){1,1}
759#define N3 0
760#define SHIFT3 1
761#define COEFFS3 NULL
762#define N4 0
763#define SHIFT4 1
764#define COEFFS4 NULL
765#else // 13/7 CRF
766#define N1 4
767#define SHIFT1 4
768#define COEFFS1 (int[]){1,-9,-9,1}
769#define N2 4
770#define SHIFT2 4
771#define COEFFS2 (int[]){-1,5,5,-1}
772#define N3 0
773#define SHIFT3 1
774#define COEFFS3 NULL
775#define N4 0
776#define SHIFT4 1
777#define COEFFS4 NULL
778#endif
779static void horizontal_decomposeX(int *b, int width){
780 int temp[width];
781 const int width2= width>>1;
782 const int w2= (width+1)>>1;
783 int A1,A2,A3,A4, x;
784
785 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
786 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
787 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
788 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
789
790 for(x=0; x<width2; x++){
791 temp[x ]= b[2*x ];
792 temp[x+w2]= b[2*x + 1];
793 }
794 if(width&1)
795 temp[x ]= b[2*x ];
796 memcpy(b, temp, width*sizeof(int));
797}
798
799static void horizontal_composeX(int *b, int width){
800 int temp[width];
801 const int width2= width>>1;
802 int A1,A2,A3,A4, x;
803 const int w2= (width+1)>>1;
804
805 memcpy(temp, b, width*sizeof(int));
806 for(x=0; x<width2; x++){
807 b[2*x ]= temp[x ];
808 b[2*x + 1]= temp[x+w2];
809 }
810 if(width&1)
811 b[2*x ]= temp[x ];
812
813 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
814 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
815 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
816 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
817}
818
819static void spatial_decomposeX(int *buffer, int width, int height, int stride){
820 int x, y;
821
822 for(y=0; y<height; y++){
823 for(x=0; x<width; x++){
824 buffer[y*stride + x] *= SCALEX;
825 }
826 }
827
828 for(y=0; y<height; y++){
829 horizontal_decomposeX(buffer + y*stride, width);
830 }
831
832 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
833 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
834 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
835 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
836}
837
838static void spatial_composeX(int *buffer, int width, int height, int stride){
839 int x, y;
840
841 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
842 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
843 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
844 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
845
846 for(y=0; y<height; y++){
847 horizontal_composeX(buffer + y*stride, width);
848 }
849
850 for(y=0; y<height; y++){
851 for(x=0; x<width; x++){
852 buffer[y*stride + x] /= SCALEX;
853 }
854 }
855}
856
857static void horizontal_decompose53i(int *b, int width){
858 int temp[width];
859 const int width2= width>>1;
860 int A1,A2,A3,A4, x;
861 const int w2= (width+1)>>1;
862
863 for(x=0; x<width2; x++){
864 temp[x ]= b[2*x ];
865 temp[x+w2]= b[2*x + 1];
866 }
867 if(width&1)
868 temp[x ]= b[2*x ];
869#if 0
870 A2= temp[1 ];
871 A4= temp[0 ];
872 A1= temp[0+width2];
873 A1 -= (A2 + A4)>>1;
874 A4 += (A1 + 1)>>1;
875 b[0+width2] = A1;
876 b[0 ] = A4;
877 for(x=1; x+1<width2; x+=2){
878 A3= temp[x+width2];
879 A4= temp[x+1 ];
880 A3 -= (A2 + A4)>>1;
881 A2 += (A1 + A3 + 2)>>2;
882 b[x+width2] = A3;
883 b[x ] = A2;
884
885 A1= temp[x+1+width2];
886 A2= temp[x+2 ];
887 A1 -= (A2 + A4)>>1;
888 A4 += (A1 + A3 + 2)>>2;
889 b[x+1+width2] = A1;
890 b[x+1 ] = A4;
891 }
892 A3= temp[width-1];
893 A3 -= A2;
894 A2 += (A1 + A3 + 2)>>2;
895 b[width -1] = A3;
896 b[width2-1] = A2;
897#else
898 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
899 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
900#endif
901}
902
903static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
904 int i;
905
906 for(i=0; i<width; i++){
907 b1[i] -= (b0[i] + b2[i])>>1;
908 }
909}
910
911static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
912 int i;
913
914 for(i=0; i<width; i++){
915 b1[i] += (b0[i] + b2[i] + 2)>>2;
916 }
917}
918
919static void spatial_decompose53i(int *buffer, int width, int height, int stride){
39c61bbb 920 int y;
791e7b83
MN
921 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
922 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
923
924 for(y=-2; y<height; y+=2){
925 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
926 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
927
928{START_TIMER
929 if(b1 <= b3) horizontal_decompose53i(b2, width);
930 if(y+2 < height) horizontal_decompose53i(b3, width);
931STOP_TIMER("horizontal_decompose53i")}
932
933{START_TIMER
934 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
935 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
936STOP_TIMER("vertical_decompose53i*")}
937
938 b0=b2;
939 b1=b3;
940 }
941}
942
943#define lift5 lift
944#if 1
945#define W_AM 3
946#define W_AO 0
947#define W_AS 1
948
949#define W_BM 1
950#define W_BO 8
951#define W_BS 4
952
953#undef lift5
954#define W_CM 9999
955#define W_CO 2
956#define W_CS 2
957
958#define W_DM 15
959#define W_DO 16
960#define W_DS 5
961#elif 0
962#define W_AM 55
963#define W_AO 16
964#define W_AS 5
965
966#define W_BM 3
967#define W_BO 32
968#define W_BS 6
969
970#define W_CM 127
971#define W_CO 64
972#define W_CS 7
973
974#define W_DM 7
975#define W_DO 8
976#define W_DS 4
977#elif 0
978#define W_AM 97
979#define W_AO 32
980#define W_AS 6
981
982#define W_BM 63
983#define W_BO 512
984#define W_BS 10
985
986#define W_CM 13
987#define W_CO 8
988#define W_CS 4
989
990#define W_DM 15
991#define W_DO 16
992#define W_DS 5
993
994#else
995
996#define W_AM 203
997#define W_AO 64
998#define W_AS 7
999
1000#define W_BM 217
1001#define W_BO 2048
1002#define W_BS 12
1003
1004#define W_CM 113
1005#define W_CO 64
1006#define W_CS 7
1007
1008#define W_DM 227
1009#define W_DO 128
1010#define W_DS 9
1011#endif
1012static void horizontal_decompose97i(int *b, int width){
1013 int temp[width];
1014 const int w2= (width+1)>>1;
1015
1016 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1017 lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1018 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1019 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1020}
1021
1022
1023static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
1024 int i;
1025
1026 for(i=0; i<width; i++){
1027 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1028 }
1029}
1030
1031static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
1032 int i;
1033
1034 for(i=0; i<width; i++){
1035#ifdef lift5
1036 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1037#else
1038 int r= 3*(b0[i] + b2[i]);
1039 r+= r>>4;
1040 r+= r>>8;
1041 b1[i] += (r+W_CO)>>W_CS;
1042#endif
1043 }
1044}
1045
1046static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1047 int i;
1048
1049 for(i=0; i<width; i++){
1050 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1051 }
1052}
1053
1054static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1055 int i;
1056
1057 for(i=0; i<width; i++){
1058 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1059 }
1060}
1061
1062static void spatial_decompose97i(int *buffer, int width, int height, int stride){
39c61bbb 1063 int y;
791e7b83
MN
1064 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1065 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1066 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1067 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1068
1069 for(y=-4; y<height; y+=2){
1070 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1071 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1072
1073{START_TIMER
1074 if(b3 <= b5) horizontal_decompose97i(b4, width);
1075 if(y+4 < height) horizontal_decompose97i(b5, width);
1076if(width>400){
1077STOP_TIMER("horizontal_decompose97i")
1078}}
1079
1080{START_TIMER
1081 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1082 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1083 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1084 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1085
1086if(width>400){
1087STOP_TIMER("vertical_decompose97i")
1088}}
1089
1090 b0=b2;
1091 b1=b3;
1092 b2=b4;
1093 b3=b5;
1094 }
1095}
1096
46c281e8 1097void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83
MN
1098 int level;
1099
46c281e8
MN
1100 for(level=0; level<decomposition_count; level++){
1101 switch(type){
791e7b83
MN
1102 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1103 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1104 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1105 }
1106 }
1107}
1108
1109static void horizontal_compose53i(int *b, int width){
1110 int temp[width];
1111 const int width2= width>>1;
1112 const int w2= (width+1)>>1;
1113 int A1,A2,A3,A4, x;
1114
1115#if 0
1116 A2= temp[1 ];
1117 A4= temp[0 ];
1118 A1= temp[0+width2];
1119 A1 -= (A2 + A4)>>1;
1120 A4 += (A1 + 1)>>1;
1121 b[0+width2] = A1;
1122 b[0 ] = A4;
1123 for(x=1; x+1<width2; x+=2){
1124 A3= temp[x+width2];
1125 A4= temp[x+1 ];
1126 A3 -= (A2 + A4)>>1;
1127 A2 += (A1 + A3 + 2)>>2;
1128 b[x+width2] = A3;
1129 b[x ] = A2;
1130
1131 A1= temp[x+1+width2];
1132 A2= temp[x+2 ];
1133 A1 -= (A2 + A4)>>1;
1134 A4 += (A1 + A3 + 2)>>2;
1135 b[x+1+width2] = A1;
1136 b[x+1 ] = A4;
1137 }
1138 A3= temp[width-1];
1139 A3 -= A2;
1140 A2 += (A1 + A3 + 2)>>2;
1141 b[width -1] = A3;
1142 b[width2-1] = A2;
1143#else
1144 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1145 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1146#endif
1147 for(x=0; x<width2; x++){
1148 b[2*x ]= temp[x ];
1149 b[2*x + 1]= temp[x+w2];
1150 }
1151 if(width&1)
1152 b[2*x ]= temp[x ];
1153}
1154
1155static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1156 int i;
1157
1158 for(i=0; i<width; i++){
1159 b1[i] += (b0[i] + b2[i])>>1;
1160 }
1161}
1162
1163static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1164 int i;
1165
1166 for(i=0; i<width; i++){
1167 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1168 }
1169}
1170
1171static void spatial_compose53i(int *buffer, int width, int height, int stride){
39c61bbb 1172 int y;
791e7b83
MN
1173 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1174 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride;
1175
1176 for(y=-1; y<=height; y+=2){
1177 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1178 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1179
1180{START_TIMER
1181 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1182 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1183STOP_TIMER("vertical_compose53i*")}
1184
1185{START_TIMER
1186 if(y-1 >= 0) horizontal_compose53i(b0, width);
1187 if(b0 <= b2) horizontal_compose53i(b1, width);
1188STOP_TIMER("horizontal_compose53i")}
1189
1190 b0=b2;
1191 b1=b3;
1192 }
1193}
1194
1195
1196static void horizontal_compose97i(int *b, int width){
1197 int temp[width];
1198 const int w2= (width+1)>>1;
1199
1200 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1201 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1202 lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1203 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1204}
1205
1206static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1207 int i;
1208
1209 for(i=0; i<width; i++){
1210 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1211 }
1212}
1213
1214static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1215 int i;
1216
1217 for(i=0; i<width; i++){
1218#ifdef lift5
1219 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1220#else
1221 int r= 3*(b0[i] + b2[i]);
1222 r+= r>>4;
1223 r+= r>>8;
1224 b1[i] -= (r+W_CO)>>W_CS;
1225#endif
1226 }
1227}
1228
1229static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1230 int i;
1231
1232 for(i=0; i<width; i++){
1233 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1234 }
1235}
1236
1237static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1238 int i;
1239
1240 for(i=0; i<width; i++){
1241 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1242 }
1243}
1244
1245static void spatial_compose97i(int *buffer, int width, int height, int stride){
39c61bbb 1246 int y;
791e7b83
MN
1247 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1248 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride;
1249 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1250 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1251
1252 for(y=-3; y<=height; y+=2){
1253 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1254 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1255
1256 if(stride == width && y+4 < height && 0){
1257 int x;
1258 for(x=0; x<width/2; x++)
1259 b5[x] += 64*2;
1260 for(; x<width; x++)
1261 b5[x] += 169*2;
1262 }
1263
1264{START_TIMER
1265 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1266 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1267 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1268 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1269if(width>400){
1270STOP_TIMER("vertical_compose97i")}}
1271
1272{START_TIMER
1273 if(y-1>= 0) horizontal_compose97i(b0, width);
1274 if(b0 <= b2) horizontal_compose97i(b1, width);
1275if(width>400 && b0 <= b2){
1276STOP_TIMER("horizontal_compose97i")}}
1277
1278 b0=b2;
1279 b1=b3;
1280 b2=b4;
1281 b3=b5;
1282 }
1283}
1284
46c281e8 1285void ff_spatial_idwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
791e7b83
MN
1286 int level;
1287
46c281e8
MN
1288 for(level=decomposition_count-1; level>=0; level--){
1289 switch(type){
791e7b83
MN
1290 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1291 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1292 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break;
1293 }
1294 }
1295}
1296
4f4e9633 1297static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
4f4e9633
MN
1298 const int w= b->width;
1299 const int h= b->height;
1300 int x, y;
1301
791e7b83 1302 if(1){
791e7b83 1303 int run=0;
a8d73e56 1304 int runs[w*h];
791e7b83
MN
1305 int run_index=0;
1306
791e7b83
MN
1307 for(y=0; y<h; y++){
1308 for(x=0; x<w; x++){
78486403 1309 int v, p=0;
6b2f6646 1310 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1311 v= src[x + y*stride];
791e7b83
MN
1312
1313 if(y){
a8d73e56 1314 t= src[x + (y-1)*stride];
791e7b83 1315 if(x){
a8d73e56 1316 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1317 }
1318 if(x + 1 < w){
a8d73e56 1319 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1320 }
1321 }
1322 if(x){
a8d73e56 1323 l= src[x - 1 + y*stride];
6b2f6646
MN
1324 /*if(x > 1){
1325 if(orientation==1) ll= src[y + (x-2)*stride];
1326 else ll= src[x - 2 + y*stride];
791e7b83
MN
1327 }*/
1328 }
78486403 1329 if(parent){
a8d73e56
MN
1330 int px= x>>1;
1331 int py= y>>1;
78486403
MN
1332 if(px<b->parent->width && py<b->parent->height)
1333 p= parent[px + py*2*stride];
1334 }
1335 if(!(/*ll|*/l|lt|t|rt|p)){
791e7b83
MN
1336 if(v){
1337 runs[run_index++]= run;
1338 run=0;
1339 }else{
1340 run++;
1341 }
1342 }
1343 }
1344 }
1345 runs[run_index++]= run;
1346 run_index=0;
1347 run= runs[run_index++];
1348
4f4e9633 1349 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1350
1351 for(y=0; y<h; y++){
1352 for(x=0; x<w; x++){
78486403 1353 int v, p=0;
6b2f6646 1354 int /*ll=0, */l=0, lt=0, t=0, rt=0;
a8d73e56 1355 v= src[x + y*stride];
791e7b83
MN
1356
1357 if(y){
a8d73e56 1358 t= src[x + (y-1)*stride];
791e7b83 1359 if(x){
a8d73e56 1360 lt= src[x - 1 + (y-1)*stride];
791e7b83
MN
1361 }
1362 if(x + 1 < w){
a8d73e56 1363 rt= src[x + 1 + (y-1)*stride];
791e7b83
MN
1364 }
1365 }
1366 if(x){
a8d73e56 1367 l= src[x - 1 + y*stride];
6b2f6646
MN
1368 /*if(x > 1){
1369 if(orientation==1) ll= src[y + (x-2)*stride];
1370 else ll= src[x - 2 + y*stride];
791e7b83
MN
1371 }*/
1372 }
78486403 1373 if(parent){
a8d73e56
MN
1374 int px= x>>1;
1375 int py= y>>1;
78486403
MN
1376 if(px<b->parent->width && py<b->parent->height)
1377 p= parent[px + py*2*stride];
1378 }
1379 if(/*ll|*/l|lt|t|rt|p){
1380 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646
MN
1381
1382 put_cabac(&s->c, &b->state[0][context], !!v);
791e7b83
MN
1383 }else{
1384 if(!run){
1385 run= runs[run_index++];
4f4e9633
MN
1386
1387 put_symbol2(&s->c, b->state[1], run, 3);
791e7b83
MN
1388 assert(v);
1389 }else{
1390 run--;
1391 assert(!v);
1392 }
1393 }
1394 if(v){
78486403 1395 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646 1396
0635cbfc 1397 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
791e7b83
MN
1398 put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1399 }
1400 }
1401 }
791e7b83 1402 }
791e7b83
MN
1403}
1404
4f4e9633
MN
1405static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1406// encode_subband_qtree(s, b, src, parent, stride, orientation);
1407// encode_subband_z0run(s, b, src, parent, stride, orientation);
1408 encode_subband_c0run(s, b, src, parent, stride, orientation);
1409// encode_subband_dzr(s, b, src, parent, stride, orientation);
1410}
1411
a8d73e56 1412static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
791e7b83
MN
1413 const int w= b->width;
1414 const int h= b->height;
1415 int x,y;
ea7d9cd4
MN
1416 const int qlog= clip(s->qlog + b->qlog, 0, 128);
1417 int qmul= qexp[qlog&7]<<(qlog>>3);
1418 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1419
fff6d4ea
MN
1420 START_TIMER
1421
ea7d9cd4
MN
1422 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1423 qadd= 0;
1424 qmul= 1<<QEXPSHIFT;
1425 }
791e7b83 1426
791e7b83 1427 if(1){
791e7b83 1428 int run;
7b49c309
MN
1429 int index=0;
1430 int prev_index=-1;
1431 int prev2_index=0;
1432 int parent_index= 0;
1433 int prev_parent_index= 0;
1434
791e7b83
MN
1435 for(y=0; y<b->height; y++)
1436 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1437
4f4e9633 1438 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83 1439 for(y=0; y<h; y++){
0cea8a03
MN
1440 int v=0;
1441 int lt=0, t=0, rt=0;
1442
ff765159
MN
1443 if(y && b->x[prev_index] == 0){
1444 rt= b->coeff[prev_index];
0cea8a03 1445 }
791e7b83 1446 for(x=0; x<w; x++){
0cea8a03
MN
1447 int p=0;
1448 const int l= v;
1449
1450 lt= t; t= rt;
791e7b83 1451
ff765159 1452 if(y){
ea7d9cd4 1453 if(b->x[prev_index] <= x)
ff765159
MN
1454 prev_index++;
1455 if(b->x[prev_index] == x + 1)
1456 rt= b->coeff[prev_index];
1457 else
1458 rt=0;
1459 }
78486403 1460 if(parent){
7b49c309
MN
1461 if(x>>1 > b->parent->x[parent_index]){
1462 parent_index++;
1463 }
ff765159
MN
1464 if(x>>1 == b->parent->x[parent_index]){
1465 p= b->parent->coeff[parent_index];
1466 }
78486403
MN
1467 }
1468 if(/*ll|*/l|lt|t|rt|p){
1469 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
6b2f6646
MN
1470
1471 v=get_cabac(&s->c, &b->state[0][context]);
791e7b83
MN
1472 }else{
1473 if(!run){
4f4e9633 1474 run= get_symbol2(&s->c, b->state[1], 3);
791e7b83
MN
1475 v=1;
1476 }else{
1477 run--;
1478 v=0;
3c1adccd 1479
7b49c309
MN
1480 if(y && parent){
1481 int max_run;
7b49c309
MN
1482
1483 max_run= FFMIN(run, b->x[prev_index] - x - 2);
1484 max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1485 x+= max_run;
1486 run-= max_run;
3c1adccd 1487 }
791e7b83
MN
1488 }
1489 }
1490 if(v){
78486403 1491 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
0635cbfc 1492 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
ea7d9cd4
MN
1493 if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1494 src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
791e7b83 1495 v= -v;
ea7d9cd4
MN
1496 }else{
1497 src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1498 }
ff765159
MN
1499 b->x[index]=x; //FIXME interleave x/coeff
1500 b->coeff[index++]= v;
7b49c309
MN
1501 }
1502 }
1503 b->x[index++]= w+1; //end marker
1504 prev_index= prev2_index;
1505 prev2_index= index;
1506
1507 if(parent){
1508 while(b->parent->x[parent_index] != b->parent->width+1)
1509 parent_index++;
1510 parent_index++;
1511 if(y&1){
1512 prev_parent_index= parent_index;
1513 }else{
1514 parent_index= prev_parent_index;
791e7b83
MN
1515 }
1516 }
1517 }
7b49c309 1518 b->x[index++]= w+1; //end marker
3c1adccd 1519 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
791e7b83
MN
1520 STOP_TIMER("decode_subband")
1521 }
1522
1523 return;
1524 }
791e7b83
MN
1525}
1526
1527static void reset_contexts(SnowContext *s){
1528 int plane_index, level, orientation;
1529
19aa028d 1530 for(plane_index=0; plane_index<3; plane_index++){
791e7b83
MN
1531 for(level=0; level<s->spatial_decomposition_count; level++){
1532 for(orientation=level ? 1:0; orientation<4; orientation++){
1533 memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
1534 }
1535 }
1536 }
791e7b83 1537 memset(s->header_state, 0, sizeof(s->header_state));
155ec6ed
MN
1538 memset(s->block_state, 0, sizeof(s->block_state));
1539}
1540
1541static int alloc_blocks(SnowContext *s){
1542 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1543 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1544
1545 s->b_width = w;
1546 s->b_height= h;
1547
1548 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1549 return 0;
1550}
1551
1552static inline void copy_cabac_state(CABACContext *d, CABACContext *s){
1553 PutBitContext bak= d->pb;
1554 *d= *s;
1555 d->pb= bak;
1556}
1557
1558//near copy & paste from dsputil, FIXME
1559static int pix_sum(uint8_t * pix, int line_size, int w)
1560{
1561 int s, i, j;
1562
1563 s = 0;
1564 for (i = 0; i < w; i++) {
1565 for (j = 0; j < w; j++) {
1566 s += pix[0];
1567 pix ++;
1568 }
1569 pix += line_size - w;
1570 }
1571 return s;
1572}
1573
1574//near copy & paste from dsputil, FIXME
1575static int pix_norm1(uint8_t * pix, int line_size, int w)
1576{
1577 int s, i, j;
1578 uint32_t *sq = squareTbl + 256;
1579
1580 s = 0;
1581 for (i = 0; i < w; i++) {
1582 for (j = 0; j < w; j ++) {
1583 s += sq[pix[0]];
1584 pix ++;
1585 }
1586 pix += line_size - w;
1587 }
1588 return s;
1589}
1590
1591static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1592 const int w= s->b_width << s->block_max_depth;
1593 const int rem_depth= s->block_max_depth - level;
1594 const int index= (x + y*w) << rem_depth;
1595 const int block_w= 1<<rem_depth;
1596 BlockNode block;
1597 int i,j;
1598
1599 block.color[0]= l;
1600 block.color[1]= cb;
1601 block.color[2]= cr;
1602 block.mx= mx;
1603 block.my= my;
1604 block.type= type;
1605 block.level= level;
1606
1607 for(j=0; j<block_w; j++){
1608 for(i=0; i<block_w; i++){
1609 s->block[index + i + j*w]= block;
1610 }
1611 }
1612}
1613
1614static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1615 const int offset[3]= {
1616 y*c-> stride + x,
1617 ((y*c->uvstride + x)>>1),
1618 ((y*c->uvstride + x)>>1),
1619 };
1620 int i;
1621 for(i=0; i<3; i++){
1622 c->src[0][i]= src [i];
1623 c->ref[0][i]= ref [i] + offset[i];
1624 }
1625 assert(!ref_index);
1626}
1627
1628//FIXME copy&paste
1629#define P_LEFT P[1]
1630#define P_TOP P[2]
1631#define P_TOPRIGHT P[3]
1632#define P_MEDIAN P[4]
1633#define P_MV1 P[9]
1634#define FLAG_QPEL 1 //must be 1
1635
1636static int encode_q_branch(SnowContext *s, int level, int x, int y){
1637 uint8_t p_buffer[1024];
1638 uint8_t i_buffer[1024];
1639 uint8_t p_state[sizeof(s->block_state)];
1640 uint8_t i_state[sizeof(s->block_state)];
1641 CABACContext pc, ic;
1642 PutBitContext pbbak= s->c.pb;
1643 int score, score2, iscore, i_len, p_len, block_s, sum;
1644 const int w= s->b_width << s->block_max_depth;
1645 const int h= s->b_height << s->block_max_depth;
1646 const int rem_depth= s->block_max_depth - level;
1647 const int index= (x + y*w) << rem_depth;
1648 const int block_w= 1<<(LOG2_MB_SIZE - level);
1649 static BlockNode null_block= { //FIXME add border maybe
1650 .color= {128,128,128},
1651 .mx= 0,
1652 .my= 0,
1653 .type= 0,
1654 .level= 0,
1655 };
1656 int trx= (x+1)<<rem_depth;
1657 int try= (y+1)<<rem_depth;
1658 BlockNode *left = x ? &s->block[index-1] : &null_block;
1659 BlockNode *top = y ? &s->block[index-w] : &null_block;
1660 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1661 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1662 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1663 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1664 int pl = left->color[0];
1665 int pcb= left->color[1];
1666 int pcr= left->color[2];
1667 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1668 int pmy= mid_pred(left->my, top->my, tr->my);
1669 int mx=0, my=0;
1670 int l,cr,cb, i;
1671 const int stride= s->current_picture.linesize[0];
1672 const int uvstride= s->current_picture.linesize[1];
1673 const int instride= s->input_picture.linesize[0];
1674 const int uvinstride= s->input_picture.linesize[1];
1675 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w;
1676 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1677 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1678 uint8_t current_mb[3][stride*block_w];
1679 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1680 int P[10][2];
1681 int16_t last_mv[3][2];
1682 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1683 const int shift= 1+qpel;
1684 MotionEstContext *c= &s->m.me;
1685 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1686 int my_context= av_log2(2*ABS(left->my - top->my));
1687 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1688
1689 assert(sizeof(s->block_state) >= 256);
1690 if(s->keyframe){
1691 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1692 return 0;
1693 }
1694
1695 //FIXME optimize
1696 for(i=0; i<block_w; i++)
1697 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
1698 for(i=0; i<block_w>>1; i++)
1699 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1700 for(i=0; i<block_w>>1; i++)
1701 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1702
1703// clip predictors / edge ?
1704
1705 P_LEFT[0]= left->mx;
1706 P_LEFT[1]= left->my;
1707 P_TOP [0]= top->mx;
1708 P_TOP [1]= top->my;
1709 P_TOPRIGHT[0]= tr->mx;
1710 P_TOPRIGHT[1]= tr->my;
1711
1712 last_mv[0][0]= s->block[index].mx;
1713 last_mv[0][1]= s->block[index].my;
1714 last_mv[1][0]= right->mx;
1715 last_mv[1][1]= right->my;
1716 last_mv[2][0]= bottom->mx;
1717 last_mv[2][1]= bottom->my;
1718
1719 s->m.mb_stride=2;
1720 s->m.mb_x=
1721 s->m.mb_y= 0;
1722 s->m.me.skip= 0;
1723
1724 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1725
1726 assert(s->m.me. stride == stride);
1727 assert(s->m.me.uvstride == uvstride);
1728
1729 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1730 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1731 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1732 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1733
ff158dc9
MN
1734 c->xmin = - x*block_w - 16+2;
1735 c->ymin = - y*block_w - 16+2;
1736 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1737 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
155ec6ed
MN
1738
1739 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1740 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1741 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1742 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1743 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1744 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1745 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1746
1747 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1748 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1749
1750 if (!y) {
1751 c->pred_x= P_LEFT[0];
1752 c->pred_y= P_LEFT[1];
1753 } else {
1754 c->pred_x = P_MEDIAN[0];
1755 c->pred_y = P_MEDIAN[1];
1756 }
1757
1758 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
1759 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1760
1761 assert(mx >= c->xmin);
1762 assert(mx <= c->xmax);
1763 assert(my >= c->ymin);
1764 assert(my <= c->ymax);
1765
1766 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1767 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1768 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1769
1770 // subpel search
1771 pc= s->c;
1772 init_put_bits(&pc.pb, p_buffer, sizeof(p_buffer));
1773 memcpy(p_state, s->block_state, sizeof(s->block_state));
1774
1775 if(level!=s->block_max_depth)
1776 put_cabac(&pc, &p_state[4 + s_context], 1);
1777 put_cabac(&pc, &p_state[1 + left->type + top->type], 0);
1778 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1779 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1780 p_len= put_bits_count(&pc.pb);
1781 score += (s->lambda2*(p_len + pc.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1782
1783 block_s= block_w*block_w;
1784 sum = pix_sum(&current_mb[0][0], stride, block_w);
1785 l= (sum + block_s/2)/block_s;
1786 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1787
1788 block_s= block_w*block_w>>2;
1789 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1790 cb= (sum + block_s/2)/block_s;
1791// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1792 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1793 cr= (sum + block_s/2)/block_s;
1794// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1795
1796 ic= s->c;
1797 init_put_bits(&ic.pb, i_buffer, sizeof(i_buffer));
1798 memcpy(i_state, s->block_state, sizeof(s->block_state));
1799 if(level!=s->block_max_depth)
1800 put_cabac(&ic, &i_state[4 + s_context], 1);
1801 put_cabac(&ic, &i_state[1 + left->type + top->type], 1);
1802 put_symbol(&ic, &i_state[32], l-pl , 1);
1803 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1804 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1805 i_len= put_bits_count(&ic.pb);
1806 iscore += (s->lambda2*(i_len + ic.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1807
1808// assert(score==256*256*256*64-1);
1809 assert(iscore < 255*255*256 + s->lambda2*10);
1810 assert(iscore >= 0);
1811 assert(l>=0 && l<=255);
1812 assert(pl>=0 && pl<=255);
1813
1814 if(level==0){
1815 int varc= iscore >> 8;
1816 int vard= score >> 8;
1817 if (vard <= 64 || vard < varc)
1818 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1819 else
1820 c->scene_change_score+= s->m.qscale;
1821 }
1822
1823 if(level!=s->block_max_depth){
1824 put_cabac(&s->c, &s->block_state[4 + s_context], 0);
1825 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1826 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1827 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1828 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1829 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1830
1831 if(score2 < score && score2 < iscore)
1832 return score2;
1833 }
1834
1835 if(iscore < score){
1836 flush_put_bits(&ic.pb);
1837 ff_copy_bits(&pbbak, i_buffer, i_len);
1838 s->c= ic;
1839 s->c.pb= pbbak;
1840 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1841 memcpy(s->block_state, i_state, sizeof(s->block_state));
1842 return iscore;
1843 }else{
1844 flush_put_bits(&pc.pb);
1845 ff_copy_bits(&pbbak, p_buffer, p_len);
1846 s->c= pc;
1847 s->c.pb= pbbak;
1848 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1849 memcpy(s->block_state, p_state, sizeof(s->block_state));
1850 return score;
1851 }
1852}
1853
1854static void decode_q_branch(SnowContext *s, int level, int x, int y){
1855 const int w= s->b_width << s->block_max_depth;
155ec6ed
MN
1856 const int rem_depth= s->block_max_depth - level;
1857 const int index= (x + y*w) << rem_depth;
1858 static BlockNode null_block= { //FIXME add border maybe
1859 .color= {128,128,128},
1860 .mx= 0,
1861 .my= 0,
1862 .type= 0,
1863 .level= 0,
1864 };
1865 int trx= (x+1)<<rem_depth;
155ec6ed
MN
1866 BlockNode *left = x ? &s->block[index-1] : &null_block;
1867 BlockNode *top = y ? &s->block[index-w] : &null_block;
1868 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1869 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1870 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1871
1872 if(s->keyframe){
1873 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1874 return;
1875 }
1876
1877 if(level==s->block_max_depth || get_cabac(&s->c, &s->block_state[4 + s_context])){
1878 int type;
1879 int l = left->color[0];
1880 int cb= left->color[1];
1881 int cr= left->color[2];
1882 int mx= mid_pred(left->mx, top->mx, tr->mx);
1883 int my= mid_pred(left->my, top->my, tr->my);
1884 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1885 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1886
1887 type= get_cabac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1888
1889 if(type){
1890 l += get_symbol(&s->c, &s->block_state[32], 1);
1891 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1892 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1893 }else{
1894 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1895 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1896 }
1897 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1898 }else{
1899 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1900 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1901 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1902 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1903 }
1904}
1905
1906static void encode_blocks(SnowContext *s){
1907 int x, y;
1908 int w= s->b_width;
1909 int h= s->b_height;
1910
1911 for(y=0; y<h; y++){
1912 for(x=0; x<w; x++){
1913 encode_q_branch(s, 0, x, y);
1914 }
1915 }
1916}
1917
1918static void decode_blocks(SnowContext *s){
1919 int x, y;
1920 int w= s->b_width;
1921 int h= s->b_height;
1922
1923 for(y=0; y<h; y++){
1924 for(x=0; x<w; x++){
1925 decode_q_branch(s, 0, x, y);
1926 }
1927 }
791e7b83
MN
1928}
1929
1930static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1931 int x, y;
791e7b83
MN
1932 for(y=0; y < b_h+5; y++){
1933 for(x=0; x < b_w; x++){
1934 int a0= src[x + y*stride];
1935 int a1= src[x + 1 + y*stride];
1936 int a2= src[x + 2 + y*stride];
1937 int a3= src[x + 3 + y*stride];
1938 int a4= src[x + 4 + y*stride];
1939 int a5= src[x + 5 + y*stride];
1940// int am= 9*(a1+a2) - (a0+a3);
1941 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1942// int am= 18*(a2+a3) - 2*(a1+a4);
1943// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1944// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1945
1946// if(b_w==16) am= 8*(a1+a2);
1947
1948 if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) + am* dx + 128)>>8;
1949 else tmp[x + y*stride]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1950
1951/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
1952 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
1953 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
1954 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1955 }
1956 }
1957 for(y=0; y < b_h; y++){
1958 for(x=0; x < b_w; x++){
1959 int a0= tmp[x + y *stride];
1960 int a1= tmp[x + (y + 1)*stride];
1961 int a2= tmp[x + (y + 2)*stride];
1962 int a3= tmp[x + (y + 3)*stride];
1963 int a4= tmp[x + (y + 4)*stride];
1964 int a5= tmp[x + (y + 5)*stride];
1965 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1966// int am= 18*(a2+a3) - 2*(a1+a4);
1967/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1968 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1969
1970// if(b_w==16) am= 8*(a1+a2);
1971
1972 if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) + am* dy + 128)>>8;
1973 else dst[x + y*stride]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1974
1975/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
1976 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
1977 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
1978 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1979 }
1980 }
1981}
1982
1983#define mcb(dx,dy,b_w)\
1984static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
1985 uint8_t tmp[stride*(b_w+5)];\
1986 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1987}
1988
1989mcb( 0, 0,16)
1990mcb( 4, 0,16)
1991mcb( 8, 0,16)
1992mcb(12, 0,16)
1993mcb( 0, 4,16)
1994mcb( 4, 4,16)
1995mcb( 8, 4,16)
1996mcb(12, 4,16)
1997mcb( 0, 8,16)
1998mcb( 4, 8,16)
1999mcb( 8, 8,16)
2000mcb(12, 8,16)
2001mcb( 0,12,16)
2002mcb( 4,12,16)
2003mcb( 8,12,16)
2004mcb(12,12,16)
2005
2006#define mca(dx,dy,b_w)\
2007static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
2008 uint8_t tmp[stride*(b_w+5)];\
2009 assert(h==b_w);\
2010 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2011}
2012
2013mca( 0, 0,16)
2014mca( 8, 0,16)
2015mca( 0, 8,16)
2016mca( 8, 8,16)
2017
ff158dc9
MN
2018static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2019 if(block->type){
2020 int x, y;
2021 const int color= block->color[plane_index];
2022 for(y=0; y < b_h; y++){
2023 for(x=0; x < b_w; x++){
2024 dst[x + y*stride]= color;
2025 }
2026 }
2027 }else{
2028 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2029 int mx= block->mx*scale;
2030 int my= block->my*scale;
2031 sx += (mx>>4) - 2;
2032 sy += (my>>4) - 2;
2033 src += sx + sy*stride;
2034 if( (unsigned)sx >= w - b_w - 4
2035 || (unsigned)sy >= h - b_h - 4){
2036 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2037 src= tmp + MB_SIZE;
2038 }
2039 mc_block(dst, src, tmp, stride, b_w, b_h, mx&15, my&15);
2040 }
2041}
2042
2043static always_inline int same_block(BlockNode *a, BlockNode *b){
2044 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2045}
2046
2047//FIXME name clenup (b_w, block_w, b_width stuff)
2048static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2049 const int b_width = s->b_width << s->block_max_depth;
2050 const int b_height= s->b_height << s->block_max_depth;
2051 const int b_stride= b_width;
2052 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2053 BlockNode *rt= lt+1;
2054 BlockNode *lb= lt+b_stride;
2055 BlockNode *rb= lb+1;
2056 uint8_t *block[4];
2057 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
791e7b83
MN
2058 int x,y;
2059
ff158dc9
MN
2060 if(b_x<0){
2061 lt= rt;
2062 lb= rb;
2063 }else if(b_x + 1 >= b_width){
2064 rt= lt;
2065 rb= lb;
791e7b83 2066 }
ff158dc9
MN
2067 if(b_y<0){
2068 lt= lb;
2069 rt= rb;
2070 }else if(b_y + 1 >= b_height){
2071 lb= lt;
2072 rb= rt;
2073 }
2074
2075 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2076 obmc -= src_x;
2077 b_w += src_x;
2078 src_x=0;
2079 }else if(src_x + b_w > w){
2080 b_w = w - src_x;
2081 }
2082 if(src_y<0){
2083 obmc -= src_y*obmc_stride;
2084 b_h += src_y;
2085 src_y=0;
2086 }else if(src_y + b_h> h){
2087 b_h = h - src_y;
791e7b83 2088 }
620ab797 2089
ff158dc9 2090 if(b_w<=0 || b_h<=0) return;
155ec6ed 2091
ff158dc9
MN
2092assert(src_stride > 7*MB_SIZE);
2093 dst += src_x + src_y*dst_stride;
2094// src += src_x + src_y*src_stride;
2095
2096 block[0]= tmp+3*MB_SIZE;
2097 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2098
2099 if(same_block(lt, rt)){
2100 block[1]= block[0];
791e7b83 2101 }else{
ff158dc9
MN
2102 block[1]= tmp + 4*MB_SIZE;
2103 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2104 }
2105
2106 if(same_block(lt, lb)){
2107 block[2]= block[0];
2108 }else if(same_block(rt, lb)){
2109 block[2]= block[1];
2110 }else{
2111 block[2]= tmp+5*MB_SIZE;
2112 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2113 }
791e7b83 2114
ff158dc9
MN
2115 if(same_block(lt, rb) ){
2116 block[3]= block[0];
2117 }else if(same_block(rt, rb)){
2118 block[3]= block[1];
2119 }else if(same_block(lb, rb)){
2120 block[3]= block[2];
2121 }else{
2122 block[3]= tmp+6*MB_SIZE;
2123 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2124 }
2125#if 0
2126 for(y=0; y<b_h; y++){
2127 for(x=0; x<b_w; x++){
2128 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2129 if(add) dst[x + y*dst_stride] += v;
2130 else dst[x + y*dst_stride] -= v;
2131 }
2132 }
2133 for(y=0; y<b_h; y++){
2134 uint8_t *obmc2= obmc + (obmc_stride>>1);
2135 for(x=0; x<b_w; x++){
2136 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2137 if(add) dst[x + y*dst_stride] += v;
2138 else dst[x + y*dst_stride] -= v;
2139 }
2140 }
2141 for(y=0; y<b_h; y++){
2142 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2143 for(x=0; x<b_w; x++){
2144 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2145 if(add) dst[x + y*dst_stride] += v;
2146 else dst[x + y*dst_stride] -= v;
2147 }
2148 }
2149 for(y=0; y<b_h; y++){
2150 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2151 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2152 for(x=0; x<b_w; x++){
2153 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2154 if(add) dst[x + y*dst_stride] += v;
2155 else dst[x + y*dst_stride] -= v;
2156 }
2157 }
2158#else
2159 for(y=0; y<b_h; y++){
2160 //FIXME ugly missue of obmc_stride
2161 uint8_t *obmc1= obmc + y*obmc_stride;
2162 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2163 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2164 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2165 for(x=0; x<b_w; x++){
2166 int v= obmc1[x] * block[3][x + y*src_stride]
2167 +obmc2[x] * block[2][x + y*src_stride]
2168 +obmc3[x] * block[1][x + y*src_stride]
2169 +obmc4[x] * block[0][x + y*src_stride];
2170 if(add) dst[x + y*dst_stride] += v * (256/OBMC_MAX);
2171 else dst[x + y*dst_stride] -= v * (256/OBMC_MAX);
791e7b83
MN
2172 }
2173 }
ff158dc9 2174#endif
791e7b83
MN
2175}
2176
ff158dc9 2177static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
791e7b83 2178 Plane *p= &s->plane[plane_index];
155ec6ed
MN
2179 const int mb_w= s->b_width << s->block_max_depth;
2180 const int mb_h= s->b_height << s->block_max_depth;
791e7b83 2181 int x, y, mb_x, mb_y;
155ec6ed
MN
2182 int block_size = MB_SIZE >> s->block_max_depth;
2183 int block_w = plane_index ? block_size/2 : block_size;
ff158dc9 2184 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
155ec6ed 2185 int obmc_stride= plane_index ? block_size : 2*block_size;
791e7b83
MN
2186 int ref_stride= s->last_picture.linesize[plane_index];
2187 uint8_t *ref = s->last_picture.data[plane_index];
2188 int w= p->width;
2189 int h= p->height;
fff6d4ea 2190 START_TIMER
791e7b83 2191
ff158dc9
MN
2192 if(s->keyframe || (s->avctx->debug&512)){
2193 for(y=0; y<h; y++){
2194 for(x=0; x<w; x++){
2195 if(add) buf[x + y*w]+= 128*256;
2196 else buf[x + y*w]-= 128*256;
2197 }
791e7b83 2198 }
ff158dc9
MN
2199
2200 return;
791e7b83
MN
2201 }
2202
ff158dc9
MN
2203 for(mb_y=0; mb_y<=mb_h; mb_y++){
2204 for(mb_x=0; mb_x<=mb_w; mb_x++){
fff6d4ea 2205 START_TIMER
ff158dc9
MN
2206
2207 add_yblock(s, buf, ref, obmc,
2208 block_w*mb_x - block_w/2,
791e7b83 2209 block_w*mb_y - block_w/2,
ff158dc9 2210 block_w, block_w,
791e7b83 2211 w, h,
ff158dc9
MN
2212 w, ref_stride, obmc_stride,
2213 mb_x - 1, mb_y - 1,
2214 add, plane_index);
2215
2216 STOP_TIMER("add_yblock")
791e7b83
MN
2217 }
2218 }
fff6d4ea
MN
2219
2220 STOP_TIMER("predict_plane")
791e7b83
MN
2221}
2222
2223static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2224 const int level= b->level;
2225 const int w= b->width;
2226 const int h= b->height;
2227 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2228 const int qmul= qexp[qlog&7]<<(qlog>>3);
da66b631
MN
2229 int x,y, thres1, thres2;
2230 START_TIMER
791e7b83
MN
2231
2232 assert(QROOT==8);
2233
93fbdb5a
MN
2234 if(s->qlog == LOSSLESS_QLOG) return;
2235
791e7b83 2236 bias= bias ? 0 : (3*qmul)>>3;
da66b631
MN
2237 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2238 thres2= 2*thres1;
791e7b83
MN
2239
2240 if(!bias){
2241 for(y=0; y<h; y++){
2242 for(x=0; x<w; x++){
da66b631
MN
2243 int i= src[x + y*stride];
2244
2245 if((unsigned)(i+thres1) > thres2){
2246 if(i>=0){
2247 i<<= QEXPSHIFT;
2248 i/= qmul; //FIXME optimize
2249 src[x + y*stride]= i;
2250 }else{
2251 i= -i;
2252 i<<= QEXPSHIFT;
2253 i/= qmul; //FIXME optimize
2254 src[x + y*stride]= -i;
2255 }
2256 }else
2257 src[x + y*stride]= 0;
791e7b83
MN
2258 }
2259 }
2260 }else{
2261 for(y=0; y<h; y++){
2262 for(x=0; x<w; x++){
2263 int i= src[x + y*stride];
2264
da66b631
MN
2265 if((unsigned)(i+thres1) > thres2){
2266 if(i>=0){
2267 i<<= QEXPSHIFT;
2268 i= (i + bias) / qmul; //FIXME optimize
2269 src[x + y*stride]= i;
2270 }else{
2271 i= -i;
2272 i<<= QEXPSHIFT;
2273 i= (i + bias) / qmul; //FIXME optimize
2274 src[x + y*stride]= -i;
2275 }
2276 }else
2277 src[x + y*stride]= 0;
791e7b83
MN
2278 }
2279 }
2280 }
da66b631
MN
2281 if(level+1 == s->spatial_decomposition_count){
2282// STOP_TIMER("quantize")
2283 }
791e7b83
MN
2284}
2285
2286static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
791e7b83
MN
2287 const int w= b->width;
2288 const int h= b->height;
2289 const int qlog= clip(s->qlog + b->qlog, 0, 128);
2290 const int qmul= qexp[qlog&7]<<(qlog>>3);
2291 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2292 int x,y;
ea7d9cd4 2293 START_TIMER
791e7b83 2294
93fbdb5a
MN
2295 if(s->qlog == LOSSLESS_QLOG) return;
2296
791e7b83
MN
2297 assert(QROOT==8);
2298
2299 for(y=0; y<h; y++){
2300 for(x=0; x<w; x++){
2301 int i= src[x + y*stride];
2302 if(i<0){
2303 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2304 }else if(i>0){
2305 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
2306 }
2307 }
2308 }
ea7d9cd4
MN
2309 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2310 STOP_TIMER("dquant")
2311 }
791e7b83
MN
2312}
2313
2314static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2315 const int w= b->width;
2316 const int h= b->height;
2317 int x,y;
2318
2319 for(y=h-1; y>=0; y--){
2320 for(x=w-1; x>=0; x--){
2321 int i= x + y*stride;
2322
2323 if(x){
2324 if(use_median){
2325 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2326 else src[i] -= src[i - 1];
2327 }else{
2328 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2329 else src[i] -= src[i - 1];
2330 }
2331 }else{
2332 if(y) src[i] -= src[i - stride];
2333 }
2334 }
2335 }
2336}
2337
2338static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2339 const int w= b->width;
2340 const int h= b->height;
2341 int x,y;
2342
2343 for(y=0; y<h; y++){
2344 for(x=0; x<w; x++){
2345 int i= x + y*stride;
2346
2347 if(x){
2348 if(use_median){
2349 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2350 else src[i] += src[i - 1];
2351 }else{
2352 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2353 else src[i] += src[i - 1];
2354 }
2355 }else{
2356 if(y) src[i] += src[i - stride];
2357 }
2358 }
2359 }
2360}
2361
2362static void encode_header(SnowContext *s){
2363 int plane_index, level, orientation;
19aa028d 2364 uint8_t kstate[32]={0};
791e7b83 2365
19aa028d
MN
2366 put_cabac(&s->c, kstate, s->keyframe);
2367 if(s->keyframe || s->always_reset)
2368 reset_contexts(s);
791e7b83
MN
2369 if(s->keyframe){
2370 put_symbol(&s->c, s->header_state, s->version, 0);
19aa028d 2371 put_cabac(&s->c, s->header_state, s->always_reset);
791e7b83
MN
2372 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2373 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2374 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2375 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
791e7b83
MN
2376 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2377 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2378 put_cabac(&s->c, s->header_state, s->spatial_scalability);
2379// put_cabac(&s->c, s->header_state, s->rate_scalability);
2380
2381 for(plane_index=0; plane_index<2; plane_index++){
2382 for(level=0; level<s->spatial_decomposition_count; level++){
2383 for(orientation=level ? 1:0; orientation<4; orientation++){
2384 if(orientation==2) continue;
2385 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2386 }
2387 }
2388 }
2389 }
2390 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2391 put_symbol(&s->c, s->header_state, s->qlog, 1);
2392 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
2393 put_symbol(&s->c, s->header_state, s->qbias, 1);
155ec6ed 2394 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
791e7b83
MN
2395}
2396
2397static int decode_header(SnowContext *s){
2398 int plane_index, level, orientation;
19aa028d 2399 uint8_t kstate[32]={0};
791e7b83 2400
19aa028d
MN
2401 s->keyframe= get_cabac(&s->c, kstate);
2402 if(s->keyframe || s->always_reset)
2403 reset_contexts(s);
791e7b83
MN
2404 if(s->keyframe){
2405 s->version= get_symbol(&s->c, s->header_state, 0);
2406 if(s->version>0){
2407 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2408 return -1;
2409 }
19aa028d 2410 s->always_reset= get_cabac(&s->c, s->header_state);
791e7b83
MN
2411 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2412 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2413 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2414 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
2415 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2416 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2417 s->spatial_scalability= get_cabac(&s->c, s->header_state);
2418// s->rate_scalability= get_cabac(&s->c, s->header_state);
2419
2420 for(plane_index=0; plane_index<3; plane_index++){
2421 for(level=0; level<s->spatial_decomposition_count; level++){
2422 for(orientation=level ? 1:0; orientation<4; orientation++){
2423 int q;
2424 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2425 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2426 else q= get_symbol(&s->c, s->header_state, 1);
2427 s->plane[plane_index].band[level][orientation].qlog= q;
2428 }
2429 }
2430 }
2431 }
2432
2433 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2434 if(s->spatial_decomposition_type > 2){
2435 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2436 return -1;
2437 }
2438
2439 s->qlog= get_symbol(&s->c, s->header_state, 1);
2440 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2441 s->qbias= get_symbol(&s->c, s->header_state, 1);
155ec6ed 2442 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
791e7b83
MN
2443
2444 return 0;
2445}
2446
2447static int common_init(AVCodecContext *avctx){
2448 SnowContext *s = avctx->priv_data;
2449 int width, height;
2450 int level, orientation, plane_index, dec;
2451
2452 s->avctx= avctx;
2453
2454 dsputil_init(&s->dsp, avctx);
2455
2456#define mcf(dx,dy)\
2457 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
2458 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2459 mc_block ## dx ## dy;
2460
2461 mcf( 0, 0)
2462 mcf( 4, 0)
2463 mcf( 8, 0)
2464 mcf(12, 0)
2465 mcf( 0, 4)
2466 mcf( 4, 4)
2467 mcf( 8, 4)
2468 mcf(12, 4)
2469 mcf( 0, 8)
2470 mcf( 4, 8)
2471 mcf( 8, 8)
2472 mcf(12, 8)
2473 mcf( 0,12)
2474 mcf( 4,12)
2475 mcf( 8,12)
2476 mcf(12,12)
2477
2478#define mcfh(dx,dy)\
2479 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
2480 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2481 mc_block_hpel ## dx ## dy;
2482
2483 mcfh(0, 0)
2484 mcfh(8, 0)
2485 mcfh(0, 8)
2486 mcfh(8, 8)
2487
2488 dec= s->spatial_decomposition_count= 5;
2489 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2490
2491 s->chroma_h_shift= 1; //FIXME XXX
2492 s->chroma_v_shift= 1;
2493
2494// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2495
155ec6ed
MN
2496 width= s->avctx->width;
2497 height= s->avctx->height;
2498
2499 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
2500 s->pred_buffer= av_mallocz(width*height*sizeof(DWTELEM));
791e7b83
MN
2501
2502 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
155ec6ed 2503 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
791e7b83
MN
2504
2505 for(plane_index=0; plane_index<3; plane_index++){
2506 int w= s->avctx->width;
2507 int h= s->avctx->height;
2508
2509 if(plane_index){
2510 w>>= s->chroma_h_shift;
2511 h>>= s->chroma_v_shift;
2512 }
2513 s->plane[plane_index].width = w;
2514 s->plane[plane_index].height= h;
3bb9f096 2515//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
791e7b83
MN
2516 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2517 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2518 SubBand *b= &s->plane[plane_index].band[level][orientation];
2519
2520 b->buf= s->spatial_dwt_buffer;
2521 b->level= level;
2522 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2523 b->width = (w + !(orientation&1))>>1;
2524 b->height= (h + !(orientation>1))>>1;
2525
2526 if(orientation&1) b->buf += (w+1)>>1;
2527 if(orientation>1) b->buf += b->stride>>1;
2528
2529 if(level)
2530 b->parent= &s->plane[plane_index].band[level-1][orientation];
0cea8a03
MN
2531 b->x = av_mallocz(((b->width+1) * b->height+1)*sizeof(int16_t));
2532 b->coeff= av_mallocz(((b->width+1) * b->height+1)*sizeof(DWTELEM));
791e7b83
MN
2533 }
2534 w= (w+1)>>1;
2535 h= (h+1)>>1;
2536 }
2537 }
2538
791e7b83
MN
2539 reset_contexts(s);
2540/*
2541 width= s->width= avctx->width;
2542 height= s->height= avctx->height;
2543
2544 assert(width && height);
2545*/
2546 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2547
2548 return 0;
2549}
2550
2551
2552static void calculate_vissual_weight(SnowContext *s, Plane *p){
2553 int width = p->width;
2554 int height= p->height;
39c61bbb 2555 int level, orientation, x, y;
791e7b83
MN
2556
2557 for(level=0; level<s->spatial_decomposition_count; level++){
2558 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2559 SubBand *b= &p->band[level][orientation];
2560 DWTELEM *buf= b->buf;
2561 int64_t error=0;
2562
2563 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2564 buf[b->width/2 + b->height/2*b->stride]= 256*256;
46c281e8 2565 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
2566 for(y=0; y<height; y++){
2567 for(x=0; x<width; x++){
2568 int64_t d= s->spatial_dwt_buffer[x + y*width];
2569 error += d*d;
2570 }
2571 }
2572
2573 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
46c281e8 2574// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
791e7b83
MN
2575 }
2576 }
2577}
2578
2579static int encode_init(AVCodecContext *avctx)
2580{
2581 SnowContext *s = avctx->priv_data;
39c61bbb 2582 int plane_index;
791e7b83 2583
2ff9ff5b
MN
2584 if(avctx->strict_std_compliance >= 0){
2585 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2586 "use vstrict=-1 to use it anyway\n");
2587 return -1;
2588 }
2589
791e7b83 2590 common_init(avctx);
155ec6ed 2591 alloc_blocks(s);
791e7b83
MN
2592
2593 s->version=0;
2594
2595 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2596 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2597 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
791e7b83
MN
2598 h263_encode_init(&s->m); //mv_penalty
2599
2600 for(plane_index=0; plane_index<3; plane_index++){
2601 calculate_vissual_weight(s, &s->plane[plane_index]);
2602 }
2603
2604
2605 avctx->coded_frame= &s->current_picture;
2606 switch(avctx->pix_fmt){
2607// case PIX_FMT_YUV444P:
2608// case PIX_FMT_YUV422P:
2609 case PIX_FMT_YUV420P:
2610 case PIX_FMT_GRAY8:
2611// case PIX_FMT_YUV411P:
2612// case PIX_FMT_YUV410P:
2613 s->colorspace_type= 0;
2614 break;
2615/* case PIX_FMT_RGBA32:
2616 s->colorspace= 1;
2617 break;*/
2618 default:
2619 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2620 return -1;
2621 }
2622// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2623 s->chroma_h_shift= 1;
2624 s->chroma_v_shift= 1;
2625 return 0;
2626}
2627
2628static int frame_start(SnowContext *s){
2629 AVFrame tmp;
64886072
MN
2630 int w= s->avctx->width; //FIXME round up to x16 ?
2631 int h= s->avctx->height;
791e7b83 2632
64886072
MN
2633 if(s->current_picture.data[0]){
2634 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
2635 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2636 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2637 }
2638
791e7b83
MN
2639 tmp= s->last_picture;
2640 s->last_picture= s->current_picture;
2641 s->current_picture= tmp;
2642
2643 s->current_picture.reference= 1;
2644 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2645 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2646 return -1;
2647 }
2648
2649 return 0;
2650}
2651
2652static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2653 SnowContext *s = avctx->priv_data;
2654 CABACContext * const c= &s->c;
2655 AVFrame *pict = data;
2656 const int width= s->avctx->width;
2657 const int height= s->avctx->height;
39c61bbb 2658 int level, orientation, plane_index;
791e7b83 2659
791e7b83
MN
2660 ff_init_cabac_encoder(c, buf, buf_size);
2661 ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2662
2663 s->input_picture = *pict;
2664
791e7b83
MN
2665 s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2666 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2667
93fbdb5a
MN
2668 if(pict->quality){
2669 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2670 //<64 >60
2671 s->qlog += 61;
2672 }else{
2673 s->qlog= LOSSLESS_QLOG;
2674 }
791e7b83 2675
791e7b83 2676 frame_start(s);
19aa028d 2677 s->current_picture.key_frame= s->keyframe;
791e7b83
MN
2678
2679 if(pict->pict_type == P_TYPE){
2680 int block_width = (width +15)>>4;
2681 int block_height= (height+15)>>4;
2682 int stride= s->current_picture.linesize[0];
791e7b83
MN
2683
2684 assert(s->current_picture.data[0]);
2685 assert(s->last_picture.data[0]);
2686
2687 s->m.avctx= s->avctx;
2688 s->m.current_picture.data[0]= s->current_picture.data[0];
2689 s->m. last_picture.data[0]= s-> last_picture.data[0];
2690 s->m. new_picture.data[0]= s-> input_picture.data[0];
2691 s->m.current_picture_ptr= &s->m.current_picture;
2692 s->m. last_picture_ptr= &s->m. last_picture;
2693 s->m.linesize=
2694 s->m. last_picture.linesize[0]=
2695 s->m. new_picture.linesize[0]=
2696 s->m.current_picture.linesize[0]= stride;
155ec6ed 2697 s->m.uvlinesize= s->current_picture.linesize[1];
791e7b83
MN
2698 s->m.width = width;
2699 s->m.height= height;
2700 s->m.mb_width = block_width;
2701 s->m.mb_height= block_height;
2702 s->m.mb_stride= s->m.mb_width+1;
2703 s->m.b8_stride= 2*s->m.mb_width+1;
2704 s->m.f_code=1;
2705 s->m.pict_type= pict->pict_type;
2706 s->m.me_method= s->avctx->me_method;
2707 s->m.me.scene_change_score=0;
2708 s->m.flags= s->avctx->flags;
2709 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2710 s->m.out_format= FMT_H263;
2711 s->m.unrestricted_mv= 1;
2712
155ec6ed 2713 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
791e7b83 2714 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
155ec6ed 2715 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
791e7b83 2716
791e7b83
MN
2717 s->m.dsp= s->dsp; //move
2718 ff_init_me(&s->m);
791e7b83 2719 }
791e7b83 2720
155ec6ed
MN
2721redo_frame:
2722
791e7b83
MN
2723 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2724
2725 encode_header(s);
155ec6ed
MN
2726 encode_blocks(s);
2727
791e7b83
MN
2728 for(plane_index=0; plane_index<3; plane_index++){
2729 Plane *p= &s->plane[plane_index];
2730 int w= p->width;
2731 int h= p->height;
2732 int x, y;
39c61bbb 2733// int bits= put_bits_count(&s->c.pb);
791e7b83
MN
2734
2735 //FIXME optimize
791e7b83
MN
2736 if(pict->data[plane_index]) //FIXME gray hack
2737 for(y=0; y<h; y++){
2738 for(x=0; x<w; x++){
2739 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
2740 }
2741 }
2742 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
155ec6ed
MN
2743
2744 if( plane_index==0
2745 && pict->pict_type == P_TYPE
2746 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
2747 ff_init_cabac_encoder(c, buf, buf_size);
2748 ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2749 pict->pict_type= FF_I_TYPE;
2750 s->keyframe=1;
2751 reset_contexts(s);
2752 goto redo_frame;
2753 }
2754
93fbdb5a
MN
2755 if(s->qlog == LOSSLESS_QLOG){
2756 for(y=0; y<h; y++){
2757 for(x=0; x<w; x++){
2758 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + 127)>>8;
2759 }
2760 }
2761 }
791e7b83 2762
46c281e8 2763 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a 2764
791e7b83
MN
2765 for(level=0; level<s->spatial_decomposition_count; level++){
2766 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2767 SubBand *b= &p->band[level][orientation];
2768
2769 quantize(s, b, b->buf, b->stride, s->qbias);
2770 if(orientation==0)
2771 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2772 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2773 assert(b->parent==NULL || b->parent->stride == b->stride*2);
2774 if(orientation==0)
2775 correlate(s, b, b->buf, b->stride, 1, 0);
2776 }
2777 }
2778// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2779
2780 for(level=0; level<s->spatial_decomposition_count; level++){
2781 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2782 SubBand *b= &p->band[level][orientation];
2783
2784 dequantize(s, b, b->buf, b->stride);
2785 }
2786 }
93fbdb5a 2787
46c281e8 2788 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a
MN
2789 if(s->qlog == LOSSLESS_QLOG){
2790 for(y=0; y<h; y++){
2791 for(x=0; x<w; x++){
2792 s->spatial_dwt_buffer[y*w + x]<<=8;
2793 }
2794 }
2795 }
791e7b83 2796 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
791e7b83
MN
2797 //FIXME optimize
2798 for(y=0; y<h; y++){
2799 for(x=0; x<w; x++){
2800 int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2801 if(v&(~255)) v= ~(v>>31);
2802 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2803 }
2804 }
2805 if(s->avctx->flags&CODEC_FLAG_PSNR){
2806 int64_t error= 0;
2807
2808 if(pict->data[plane_index]) //FIXME gray hack
2809 for(y=0; y<h; y++){
2810 for(x=0; x<w; x++){
93fbdb5a 2811 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
791e7b83
MN
2812 error += d*d;
2813 }
2814 }
791e7b83
MN
2815 s->avctx->error[plane_index] += error;
2816 s->avctx->error[3] += error;
2817 }
2818 }
2819
2820 if(s->last_picture.data[0])
2821 avctx->release_buffer(avctx, &s->last_picture);
2822
2823 emms_c();
2824
2825 return put_cabac_terminate(c, 1);
2826}
2827
2828static void common_end(SnowContext *s){
7b49c309
MN
2829 int plane_index, level, orientation;
2830
791e7b83 2831 av_freep(&s->spatial_dwt_buffer);
791e7b83
MN
2832
2833 av_freep(&s->m.me.scratchpad);
2834 av_freep(&s->m.me.map);
2835 av_freep(&s->m.me.score_map);
155ec6ed
MN
2836
2837 av_freep(&s->block);
7b49c309
MN
2838
2839 for(plane_index=0; plane_index<3; plane_index++){
2840 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2841 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2842 SubBand *b= &s->plane[plane_index].band[level][orientation];
2843
2844 av_freep(&b->x);
0cea8a03 2845 av_freep(&b->coeff);
7b49c309
MN
2846 }
2847 }
2848 }
791e7b83
MN
2849}
2850
2851static int encode_end(AVCodecContext *avctx)
2852{
2853 SnowContext *s = avctx->priv_data;
2854
2855 common_end(s);
2856
2857 return 0;
2858}
2859
2860static int decode_init(AVCodecContext *avctx)
2861{
2862// SnowContext *s = avctx->priv_data;
2863
2864 common_init(avctx);
2865
2866 return 0;
2867}
2868
2869static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2870 SnowContext *s = avctx->priv_data;
2871 CABACContext * const c= &s->c;
791e7b83
MN
2872 int bytes_read;
2873 AVFrame *picture = data;
39c61bbb 2874 int level, orientation, plane_index;
791e7b83
MN
2875
2876
2877 /* no supplementary picture */
2878 if (buf_size == 0)
2879 return 0;
2880
2881 ff_init_cabac_decoder(c, buf, buf_size);
2882 ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2883
791e7b83
MN
2884 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2885 decode_header(s);
155ec6ed 2886 if(!s->block) alloc_blocks(s);
791e7b83
MN
2887
2888 frame_start(s);
2889 //keyframe flag dupliaction mess FIXME
2890 if(avctx->debug&FF_DEBUG_PICT_INFO)
2891 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2892
155ec6ed 2893 decode_blocks(s);
791e7b83
MN
2894
2895 for(plane_index=0; plane_index<3; plane_index++){
2896 Plane *p= &s->plane[plane_index];
2897 int w= p->width;
2898 int h= p->height;
2899 int x, y;
2900
2901if(s->avctx->debug&2048){
2902 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2903 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2904
2905 for(y=0; y<h; y++){
2906 for(x=0; x<w; x++){
2907 int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2908 if(v&(~255)) v= ~(v>>31);
2909 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2910 }
2911 }
2912}
2913 for(level=0; level<s->spatial_decomposition_count; level++){
2914 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2915 SubBand *b= &p->band[level][orientation];
2916
2917 decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
ea7d9cd4 2918 if(orientation==0){
791e7b83 2919 correlate(s, b, b->buf, b->stride, 1, 0);
ea7d9cd4
MN
2920 dequantize(s, b, b->buf, b->stride);
2921 assert(b->buf == s->spatial_dwt_buffer);
2922 }
791e7b83
MN
2923 }
2924 }
2925
46c281e8 2926 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
93fbdb5a
MN
2927 if(s->qlog == LOSSLESS_QLOG){
2928 for(y=0; y<h; y++){
2929 for(x=0; x<w; x++){
2930 s->spatial_dwt_buffer[y*w + x]<<=8;
2931 }
2932 }
2933 }
791e7b83 2934 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
791e7b83
MN
2935
2936 //FIXME optimize
2937 for(y=0; y<h; y++){
2938 for(x=0; x<w; x++){
2939 int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2940 if(v&(~255)) v= ~(v>>31);
2941 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2942 }
2943 }
2944 }
2945
2946 emms_c();
2947
2948 if(s->last_picture.data[0])
2949 avctx->release_buffer(avctx, &s->last_picture);
2950
2951if(!(s->avctx->debug&2048))
2952 *picture= s->current_picture;
2953else
2954 *picture= s->mconly_picture;
2955
2956 *data_size = sizeof(AVFrame);
2957
2958 bytes_read= get_cabac_terminate(c);
2959 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
2960
2961 return bytes_read;
2962}
2963
2964static int decode_end(AVCodecContext *avctx)
2965{
2966 SnowContext *s = avctx->priv_data;
2967
2968 common_end(s);
2969
2970 return 0;
2971}
2972
2973AVCodec snow_decoder = {
2974 "snow",
2975 CODEC_TYPE_VIDEO,
2976 CODEC_ID_SNOW,
2977 sizeof(SnowContext),
2978 decode_init,
2979 NULL,
2980 decode_end,
2981 decode_frame,
2982 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2983 NULL
2984};
2985
2986AVCodec snow_encoder = {
2987 "snow",
2988 CODEC_TYPE_VIDEO,
2989 CODEC_ID_SNOW,
2990 sizeof(SnowContext),
2991 encode_init,
2992 encode_frame,
2993 encode_end,
2994};
2995
2996
2997#if 0
2998#undef malloc
2999#undef free
3000#undef printf
3001
3002int main(){
3003 int width=256;
3004 int height=256;
3005 int buffer[2][width*height];
3006 SnowContext s;
3007 int i;
3008 s.spatial_decomposition_count=6;
3009 s.spatial_decomposition_type=1;
3010
3011 printf("testing 5/3 DWT\n");
3012 for(i=0; i<width*height; i++)
3013 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3014
46c281e8
MN
3015 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3016 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3017
3018 for(i=0; i<width*height; i++)
3019 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3020
3021 printf("testing 9/7 DWT\n");
3022 s.spatial_decomposition_type=0;
3023 for(i=0; i<width*height; i++)
3024 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3025
46c281e8
MN
3026 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3027 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3028
3029 for(i=0; i<width*height; i++)
3030 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3031
3032 printf("testing AC coder\n");
3033 memset(s.header_state, 0, sizeof(s.header_state));
3034 ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
3035 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3036
3037 for(i=-256; i<256; i++){
3038START_TIMER
3039 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3040STOP_TIMER("put_symbol")
3041 }
3042 put_cabac_terminate(&s.c, 1);
3043
3044 memset(s.header_state, 0, sizeof(s.header_state));
3045 ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
3046 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3047
3048 for(i=-256; i<256; i++){
3049 int j;
3050START_TIMER
3051 j= get_symbol(&s.c, s.header_state, 1);
3052STOP_TIMER("get_symbol")
3053 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3054 }
3055{
3056int level, orientation, x, y;
3057int64_t errors[8][4];
3058int64_t g=0;
3059
3060 memset(errors, 0, sizeof(errors));
3061 s.spatial_decomposition_count=3;
3062 s.spatial_decomposition_type=0;
3063 for(level=0; level<s.spatial_decomposition_count; level++){
3064 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3065 int w= width >> (s.spatial_decomposition_count-level);
3066 int h= height >> (s.spatial_decomposition_count-level);
3067 int stride= width << (s.spatial_decomposition_count-level);
3068 DWTELEM *buf= buffer[0];
3069 int64_t error=0;
3070
3071 if(orientation&1) buf+=w;
3072 if(orientation>1) buf+=stride>>1;
3073
3074 memset(buffer[0], 0, sizeof(int)*width*height);
3075 buf[w/2 + h/2*stride]= 256*256;
46c281e8 3076 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3077 for(y=0; y<height; y++){
3078 for(x=0; x<width; x++){
3079 int64_t d= buffer[0][x + y*width];
3080 error += d*d;
3081 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3082 }
3083 if(ABS(height/2-y)<9 && level==2) printf("\n");
3084 }
3085 error= (int)(sqrt(error)+0.5);
3086 errors[level][orientation]= error;
3087 if(g) g=ff_gcd(g, error);
3088 else g= error;
3089 }
3090 }
3091 printf("static int const visual_weight[][4]={\n");
3092 for(level=0; level<s.spatial_decomposition_count; level++){
3093 printf(" {");
3094 for(orientation=0; orientation<4; orientation++){
3095 printf("%8lld,", errors[level][orientation]/g);
3096 }
3097 printf("},\n");
3098 }
3099 printf("};\n");
3100 {
3101 int level=2;
3102 int orientation=3;
3103 int w= width >> (s.spatial_decomposition_count-level);
3104 int h= height >> (s.spatial_decomposition_count-level);
3105 int stride= width << (s.spatial_decomposition_count-level);
3106 DWTELEM *buf= buffer[0];
3107 int64_t error=0;
3108
3109 buf+=w;
3110 buf+=stride>>1;
3111
3112 memset(buffer[0], 0, sizeof(int)*width*height);
3113#if 1
3114 for(y=0; y<height; y++){
3115 for(x=0; x<width; x++){
3116 int tab[4]={0,2,3,1};
3117 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3118 }
3119 }
46c281e8 3120 ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3121#else
3122 for(y=0; y<h; y++){
3123 for(x=0; x<w; x++){
3124 buf[x + y*stride ]=169;
3125 buf[x + y*stride-w]=64;
3126 }
3127 }
46c281e8 3128 ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
791e7b83
MN
3129#endif
3130 for(y=0; y<height; y++){
3131 for(x=0; x<width; x++){
3132 int64_t d= buffer[0][x + y*width];
3133 error += d*d;
3134 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3135 }
3136 if(ABS(height/2-y)<9) printf("\n");
3137 }
3138 }
3139
3140}
3141 return 0;
3142}
3143#endif
3144